From dbab65be6bdea68b6a201d03334b44a02860dd2c Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 11 Apr 2018 18:13:30 -0600 Subject: [PATCH 01/88] usbip: usbip_host: refine probe and disconnect debug msgs to be useful commit 28b68acc4a88dcf91fd1dcf2577371dc9bf574cc upstream. Refine probe and disconnect debug msgs to be useful and say what is in progress. Signed-off-by: Shuah Khan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index 3550224f4d69..70973ca36a4d 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -316,7 +316,7 @@ static int stub_probe(struct usb_device *udev) struct bus_id_priv *busid_priv; int rc; - dev_dbg(&udev->dev, "Enter\n"); + dev_dbg(&udev->dev, "Enter probe\n"); /* check we should claim or not by busid_table */ busid_priv = get_busid_priv(udev_busid); @@ -418,7 +418,7 @@ static void stub_disconnect(struct usb_device *udev) struct bus_id_priv *busid_priv; int rc; - dev_dbg(&udev->dev, "Enter\n"); + dev_dbg(&udev->dev, "Enter disconnect\n"); busid_priv = get_busid_priv(udev_busid); if (!busid_priv) { From 58c9c70cb7e10599f44a996cc4b481fc5c02ea4f Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Mon, 30 Apr 2018 16:17:19 -0600 Subject: [PATCH 02/88] usbip: usbip_host: delete device from busid_table after rebind commit 1e180f167d4e413afccbbb4a421b48b2de832549 upstream. Device is left in the busid_table after unbind and rebind. Rebind initiates usb bus scan and the original driver claims the device. After rescan the device should be deleted from the busid_table as it no longer belongs to usbip_host. Fix it to delete the device after device_attach() succeeds. Signed-off-by: Shuah Khan (Samsung OSG) Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index f761e02e75c9..c289f0593bb8 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -201,6 +201,9 @@ static ssize_t rebind_store(struct device_driver *dev, const char *buf, if (!bid) return -ENODEV; + /* mark the device for deletion so probe ignores it during rescan */ + bid->status = STUB_BUSID_OTHER; + /* device_attach() callers should hold parent lock for USB */ if (bid->udev->dev.parent) device_lock(bid->udev->dev.parent); @@ -212,6 +215,9 @@ static ssize_t rebind_store(struct device_driver *dev, const char *buf, return ret; } + /* delete device from busid_table */ + del_match_busid((char *) buf); + return count; } From 59ad4f5342dadadbcf9fd67f29fd6a8ed70ab125 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Mon, 30 Apr 2018 16:17:20 -0600 Subject: [PATCH 03/88] usbip: usbip_host: run rebind from exit when module is removed commit 7510df3f29d44685bab7b1918b61a8ccd57126a9 upstream. After removing usbip_host module, devices it releases are left without a driver. For example, when a keyboard or a mass storage device are bound to usbip_host when it is removed, these devices are no longer bound to any driver. Fix it to run device_attach() from the module exit routine to restore the devices to their original drivers. This includes cleanup changes and moving device_attach() code to a common routine to be called from rebind_store() and usbip_host_exit(). Signed-off-by: Shuah Khan (Samsung OSG) Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_dev.c | 6 +--- drivers/usb/usbip/stub_main.c | 60 +++++++++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index 70973ca36a4d..12d6d95a142b 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -462,12 +462,8 @@ static void stub_disconnect(struct usb_device *udev) busid_priv->sdev = NULL; stub_device_free(sdev); - if (busid_priv->status == STUB_BUSID_ALLOC) { + if (busid_priv->status == STUB_BUSID_ALLOC) busid_priv->status = STUB_BUSID_ADDED; - } else { - busid_priv->status = STUB_BUSID_OTHER; - del_match_busid((char *)udev_busid); - } } #ifdef CONFIG_PM diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index c289f0593bb8..5a4f5d9d3ea1 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -28,6 +28,7 @@ #define DRIVER_DESC "USB/IP Host Driver" struct kmem_cache *stub_priv_cache; + /* * busid_tables defines matching busids that usbip can grab. A user can change * dynamically what device is locally used and what device is exported to a @@ -184,6 +185,51 @@ static ssize_t store_match_busid(struct device_driver *dev, const char *buf, static DRIVER_ATTR(match_busid, S_IRUSR | S_IWUSR, show_match_busid, store_match_busid); +static int do_rebind(char *busid, struct bus_id_priv *busid_priv) +{ + int ret; + + /* device_attach() callers should hold parent lock for USB */ + if (busid_priv->udev->dev.parent) + device_lock(busid_priv->udev->dev.parent); + ret = device_attach(&busid_priv->udev->dev); + if (busid_priv->udev->dev.parent) + device_unlock(busid_priv->udev->dev.parent); + if (ret < 0) { + dev_err(&busid_priv->udev->dev, "rebind failed\n"); + return ret; + } + return 0; +} + +static void stub_device_rebind(void) +{ +#if IS_MODULE(CONFIG_USBIP_HOST) + struct bus_id_priv *busid_priv; + int i; + + /* update status to STUB_BUSID_OTHER so probe ignores the device */ + spin_lock(&busid_table_lock); + for (i = 0; i < MAX_BUSID; i++) { + if (busid_table[i].name[0] && + busid_table[i].shutdown_busid) { + busid_priv = &(busid_table[i]); + busid_priv->status = STUB_BUSID_OTHER; + } + } + spin_unlock(&busid_table_lock); + + /* now run rebind */ + for (i = 0; i < MAX_BUSID; i++) { + if (busid_table[i].name[0] && + busid_table[i].shutdown_busid) { + busid_priv = &(busid_table[i]); + do_rebind(busid_table[i].name, busid_priv); + } + } +#endif +} + static ssize_t rebind_store(struct device_driver *dev, const char *buf, size_t count) { @@ -204,16 +250,9 @@ static ssize_t rebind_store(struct device_driver *dev, const char *buf, /* mark the device for deletion so probe ignores it during rescan */ bid->status = STUB_BUSID_OTHER; - /* device_attach() callers should hold parent lock for USB */ - if (bid->udev->dev.parent) - device_lock(bid->udev->dev.parent); - ret = device_attach(&bid->udev->dev); - if (bid->udev->dev.parent) - device_unlock(bid->udev->dev.parent); - if (ret < 0) { - dev_err(&bid->udev->dev, "rebind failed\n"); + ret = do_rebind((char *) buf, bid); + if (ret < 0) return ret; - } /* delete device from busid_table */ del_match_busid((char *) buf); @@ -339,6 +378,9 @@ static void __exit usbip_host_exit(void) */ usb_deregister_device_driver(&stub_driver); + /* initiate scan to attach devices */ + stub_device_rebind(); + kmem_cache_destroy(stub_priv_cache); } From f2a6d5f19450086e5cbdac7168d3fc75af32becf Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Mon, 14 May 2018 20:49:58 -0600 Subject: [PATCH 04/88] usbip: usbip_host: fix NULL-ptr deref and use-after-free errors commit 22076557b07c12086eeb16b8ce2b0b735f7a27e7 upstream. usbip_host updates device status without holding lock from stub probe, disconnect and rebind code paths. When multiple requests to import a device are received, these unprotected code paths step all over each other and drive fails with NULL-ptr deref and use-after-free errors. The driver uses a table lock to protect the busid array for adding and deleting busids to the table. However, the probe, disconnect and rebind paths get the busid table entry and update the status without holding the busid table lock. Add a new finer grain lock to protect the busid entry. This new lock will be held to search and update the busid entry fields from get_busid_idx(), add_match_busid() and del_match_busid(). match_busid_show() does the same to access the busid entry fields. get_busid_priv() changed to return the pointer to the busid entry holding the busid lock. stub_probe(), stub_disconnect() and stub_device_rebind() call put_busid_priv() to release the busid lock before returning. This changes fixes the unprotected code paths eliminating the race conditions in updating the busid entries. Reported-by: Jakub Jirasek Signed-off-by: Shuah Khan (Samsung OSG) Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub.h | 2 ++ drivers/usb/usbip/stub_dev.c | 33 ++++++++++++++++++++--------- drivers/usb/usbip/stub_main.c | 40 ++++++++++++++++++++++++++++++----- 3 files changed, 60 insertions(+), 15 deletions(-) diff --git a/drivers/usb/usbip/stub.h b/drivers/usb/usbip/stub.h index 910f027773aa..84c0599b45b7 100644 --- a/drivers/usb/usbip/stub.h +++ b/drivers/usb/usbip/stub.h @@ -87,6 +87,7 @@ struct bus_id_priv { struct stub_device *sdev; struct usb_device *udev; char shutdown_busid; + spinlock_t busid_lock; }; /* stub_priv is allocated from stub_priv_cache */ @@ -97,6 +98,7 @@ extern struct usb_device_driver stub_driver; /* stub_main.c */ struct bus_id_priv *get_busid_priv(const char *busid); +void put_busid_priv(struct bus_id_priv *bid); int del_match_busid(char *busid); void stub_device_cleanup_urbs(struct stub_device *sdev); diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index 12d6d95a142b..8e629b6a6f3f 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -314,7 +314,7 @@ static int stub_probe(struct usb_device *udev) struct stub_device *sdev = NULL; const char *udev_busid = dev_name(&udev->dev); struct bus_id_priv *busid_priv; - int rc; + int rc = 0; dev_dbg(&udev->dev, "Enter probe\n"); @@ -331,13 +331,15 @@ static int stub_probe(struct usb_device *udev) * other matched drivers by the driver core. * See driver_probe_device() in driver/base/dd.c */ - return -ENODEV; + rc = -ENODEV; + goto call_put_busid_priv; } if (udev->descriptor.bDeviceClass == USB_CLASS_HUB) { dev_dbg(&udev->dev, "%s is a usb hub device... skip!\n", udev_busid); - return -ENODEV; + rc = -ENODEV; + goto call_put_busid_priv; } if (!strcmp(udev->bus->bus_name, "vhci_hcd")) { @@ -345,13 +347,16 @@ static int stub_probe(struct usb_device *udev) "%s is attached on vhci_hcd... skip!\n", udev_busid); - return -ENODEV; + rc = -ENODEV; + goto call_put_busid_priv; } /* ok, this is my device */ sdev = stub_device_alloc(udev); - if (!sdev) - return -ENOMEM; + if (!sdev) { + rc = -ENOMEM; + goto call_put_busid_priv; + } dev_info(&udev->dev, "usbip-host: register new device (bus %u dev %u)\n", @@ -383,7 +388,9 @@ static int stub_probe(struct usb_device *udev) } busid_priv->status = STUB_BUSID_ALLOC; - return 0; + rc = 0; + goto call_put_busid_priv; + err_files: usb_hub_release_port(udev->parent, udev->portnum, (struct usb_dev_state *) udev); @@ -393,6 +400,9 @@ err_port: busid_priv->sdev = NULL; stub_device_free(sdev); + +call_put_busid_priv: + put_busid_priv(busid_priv); return rc; } @@ -431,7 +441,7 @@ static void stub_disconnect(struct usb_device *udev) /* get stub_device */ if (!sdev) { dev_err(&udev->dev, "could not get device"); - return; + goto call_put_busid_priv; } dev_set_drvdata(&udev->dev, NULL); @@ -446,12 +456,12 @@ static void stub_disconnect(struct usb_device *udev) (struct usb_dev_state *) udev); if (rc) { dev_dbg(&udev->dev, "unable to release port\n"); - return; + goto call_put_busid_priv; } /* If usb reset is called from event handler */ if (usbip_in_eh(current)) - return; + goto call_put_busid_priv; /* shutdown the current connection */ shutdown_busid(busid_priv); @@ -464,6 +474,9 @@ static void stub_disconnect(struct usb_device *udev) if (busid_priv->status == STUB_BUSID_ALLOC) busid_priv->status = STUB_BUSID_ADDED; + +call_put_busid_priv: + put_busid_priv(busid_priv); } #ifdef CONFIG_PM diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index 5a4f5d9d3ea1..82df6ea4d1a6 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -40,6 +40,8 @@ static spinlock_t busid_table_lock; static void init_busid_table(void) { + int i; + /* * This also sets the bus_table[i].status to * STUB_BUSID_OTHER, which is 0. @@ -47,6 +49,9 @@ static void init_busid_table(void) memset(busid_table, 0, sizeof(busid_table)); spin_lock_init(&busid_table_lock); + + for (i = 0; i < MAX_BUSID; i++) + spin_lock_init(&busid_table[i].busid_lock); } /* @@ -58,15 +63,20 @@ static int get_busid_idx(const char *busid) int i; int idx = -1; - for (i = 0; i < MAX_BUSID; i++) + for (i = 0; i < MAX_BUSID; i++) { + spin_lock(&busid_table[i].busid_lock); if (busid_table[i].name[0]) if (!strncmp(busid_table[i].name, busid, BUSID_SIZE)) { idx = i; + spin_unlock(&busid_table[i].busid_lock); break; } + spin_unlock(&busid_table[i].busid_lock); + } return idx; } +/* Returns holding busid_lock. Should call put_busid_priv() to unlock */ struct bus_id_priv *get_busid_priv(const char *busid) { int idx; @@ -74,13 +84,21 @@ struct bus_id_priv *get_busid_priv(const char *busid) spin_lock(&busid_table_lock); idx = get_busid_idx(busid); - if (idx >= 0) + if (idx >= 0) { bid = &(busid_table[idx]); + /* get busid_lock before returning */ + spin_lock(&bid->busid_lock); + } spin_unlock(&busid_table_lock); return bid; } +void put_busid_priv(struct bus_id_priv *bid) +{ + spin_unlock(&bid->busid_lock); +} + static int add_match_busid(char *busid) { int i; @@ -93,15 +111,19 @@ static int add_match_busid(char *busid) goto out; } - for (i = 0; i < MAX_BUSID; i++) + for (i = 0; i < MAX_BUSID; i++) { + spin_lock(&busid_table[i].busid_lock); if (!busid_table[i].name[0]) { strlcpy(busid_table[i].name, busid, BUSID_SIZE); if ((busid_table[i].status != STUB_BUSID_ALLOC) && (busid_table[i].status != STUB_BUSID_REMOV)) busid_table[i].status = STUB_BUSID_ADDED; ret = 0; + spin_unlock(&busid_table[i].busid_lock); break; } + spin_unlock(&busid_table[i].busid_lock); + } out: spin_unlock(&busid_table_lock); @@ -122,6 +144,8 @@ int del_match_busid(char *busid) /* found */ ret = 0; + spin_lock(&busid_table[idx].busid_lock); + if (busid_table[idx].status == STUB_BUSID_OTHER) memset(busid_table[idx].name, 0, BUSID_SIZE); @@ -129,6 +153,7 @@ int del_match_busid(char *busid) (busid_table[idx].status != STUB_BUSID_ADDED)) busid_table[idx].status = STUB_BUSID_REMOV; + spin_unlock(&busid_table[idx].busid_lock); out: spin_unlock(&busid_table_lock); @@ -141,9 +166,12 @@ static ssize_t show_match_busid(struct device_driver *drv, char *buf) char *out = buf; spin_lock(&busid_table_lock); - for (i = 0; i < MAX_BUSID; i++) + for (i = 0; i < MAX_BUSID; i++) { + spin_lock(&busid_table[i].busid_lock); if (busid_table[i].name[0]) out += sprintf(out, "%s ", busid_table[i].name); + spin_unlock(&busid_table[i].busid_lock); + } spin_unlock(&busid_table_lock); out += sprintf(out, "\n"); @@ -219,7 +247,7 @@ static void stub_device_rebind(void) } spin_unlock(&busid_table_lock); - /* now run rebind */ + /* now run rebind - no need to hold locks. driver files are removed */ for (i = 0; i < MAX_BUSID; i++) { if (busid_table[i].name[0] && busid_table[i].shutdown_busid) { @@ -249,6 +277,8 @@ static ssize_t rebind_store(struct device_driver *dev, const char *buf, /* mark the device for deletion so probe ignores it during rescan */ bid->status = STUB_BUSID_OTHER; + /* release the busid lock */ + put_busid_priv(bid); ret = do_rebind((char *) buf, bid); if (ret < 0) From 0471d407998b58d1f1cbb4f594fc63f9bf0ec7bb Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Tue, 15 May 2018 17:57:23 -0600 Subject: [PATCH 05/88] usbip: usbip_host: fix bad unlock balance during stub_probe() commit c171654caa875919be3c533d3518da8be5be966e upstream. stub_probe() calls put_busid_priv() in an error path when device isn't found in the busid_table. Fix it by making put_busid_priv() safe to be called with null struct bus_id_priv pointer. This problem happens when "usbip bind" is run without loading usbip_host driver and then running modprobe. The first failed bind attempt unbinds the device from the original driver and when usbip_host is modprobed, stub_probe() runs and doesn't find the device in its busid table and calls put_busid_priv(0 with null bus_id_priv pointer. usbip-host 3-10.2: 3-10.2 is not in match_busid table... skip! [ 367.359679] ===================================== [ 367.359681] WARNING: bad unlock balance detected! [ 367.359683] 4.17.0-rc4+ #5 Not tainted [ 367.359685] ------------------------------------- [ 367.359688] modprobe/2768 is trying to release lock ( [ 367.359689] ================================================================== [ 367.359696] BUG: KASAN: null-ptr-deref in print_unlock_imbalance_bug+0x99/0x110 [ 367.359699] Read of size 8 at addr 0000000000000058 by task modprobe/2768 [ 367.359705] CPU: 4 PID: 2768 Comm: modprobe Not tainted 4.17.0-rc4+ #5 Fixes: 22076557b07c ("usbip: usbip_host: fix NULL-ptr deref and use-after-free errors") in usb-linus Signed-off-by: Shuah Khan (Samsung OSG) Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index 82df6ea4d1a6..fa90496ca7a8 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -96,7 +96,8 @@ struct bus_id_priv *get_busid_priv(const char *busid) void put_busid_priv(struct bus_id_priv *bid) { - spin_unlock(&bid->busid_lock); + if (bid) + spin_unlock(&bid->busid_lock); } static int add_match_busid(char *busid) From 639a74bf5f4da1c64884ff157ec62fed2798d5d0 Mon Sep 17 00:00:00 2001 From: Federico Cuello Date: Wed, 9 May 2018 00:13:38 +0200 Subject: [PATCH 06/88] ALSA: usb: mixer: volume quirk for CM102-A+/102S+ commit 21493316a3c4598f308d5a9fa31cc74639c4caff upstream. Currently it's not possible to set volume lower than 26% (it just mutes). Also fixes this warning: Warning! Unlikely big volume range (=9472), cval->res is probably wrong. [13] FU [PCM Playback Volume] ch = 2, val = -9473/-1/1 , and volume works fine for full range. Signed-off-by: Federico Cuello Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index dedf8eb4570e..db8404e31fae 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -905,6 +905,14 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, } break; + case USB_ID(0x0d8c, 0x0103): + if (!strcmp(kctl->id.name, "PCM Playback Volume")) { + usb_audio_info(chip, + "set volume quirk for CM102-A+/102S+\n"); + cval->min = -256; + } + break; + case USB_ID(0x0471, 0x0101): case USB_ID(0x0471, 0x0104): case USB_ID(0x0471, 0x0105): From e303276bbd9e2c603eee7a5ca14362493728d589 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 8 May 2018 09:27:46 +0200 Subject: [PATCH 07/88] ALSA: hda: Add Lenovo C50 All in one to the power_save blacklist commit c8beccc19b92f5172994c0732db689c08f4f98e5 upstream. Power-saving is causing loud plops on the Lenovo C50 All in one, add it to the blacklist. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1572975 Signed-off-by: Hans de Goede Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 7d3f88d90eec..4e9112001306 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2061,6 +2061,8 @@ static struct snd_pci_quirk power_save_blacklist[] = { SND_PCI_QUIRK(0x1849, 0x0c0c, "Asrock B85M-ITX", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0), + /* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */ + SND_PCI_QUIRK(0x17aa, 0x36a7, "Lenovo C50 All in one", 0), /* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */ SND_PCI_QUIRK(0x17aa, 0x2227, "Lenovo X1 Carbon 3rd Gen", 0), {} From c5d8237ef6067635ed5cb6b787540c52afe6bc8f Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sat, 5 May 2018 13:38:03 -0500 Subject: [PATCH 08/88] ALSA: control: fix a redundant-copy issue commit 3f12888dfae2a48741c4caa9214885b3aaf350f9 upstream. In snd_ctl_elem_add_compat(), the fields of the struct 'data' need to be copied from the corresponding fields of the struct 'data32' in userspace. This is achieved by invoking copy_from_user() and get_user() functions. The problem here is that the 'type' field is copied twice. One is by copy_from_user() and one is by get_user(). Given that the 'type' field is not used between the two copies, the second copy is *completely* redundant and should be removed for better performance and cleanup. Also, these two copies can cause inconsistent data: as the struct 'data32' resides in userspace and a malicious userspace process can race to change the 'type' field between the two copies to cause inconsistent data. Depending on how the data is used in the future, such an inconsistency may cause potential security risks. For above reasons, we should take out the second copy. Signed-off-by: Wenwen Wang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/control_compat.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c index 1fa70766ffab..84ee29c3b1a0 100644 --- a/sound/core/control_compat.c +++ b/sound/core/control_compat.c @@ -400,8 +400,7 @@ static int snd_ctl_elem_add_compat(struct snd_ctl_file *file, if (copy_from_user(&data->id, &data32->id, sizeof(data->id)) || copy_from_user(&data->type, &data32->type, 3 * sizeof(u32))) goto error; - if (get_user(data->owner, &data32->owner) || - get_user(data->type, &data32->type)) + if (get_user(data->owner, &data32->owner)) goto error; switch (data->type) { case SNDRV_CTL_ELEM_TYPE_BOOLEAN: From a436539bc16fdc6cb69cfa069ca0388df8d53d4b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 19 Apr 2018 19:53:32 +0300 Subject: [PATCH 09/88] spi: pxa2xx: Allow 64-bit DMA commit efc4a13724b852ddaa3358402a8dec024ffbcb17 upstream. Currently the 32-bit device address only is supported for DMA. However, starting from Intel Sunrisepoint PCH the DMA address of the device FIFO can be 64-bit. Change the respective variable to be compatible with DMA engine expectations, i.e. to phys_addr_t. Fixes: 34cadd9c1bcb ("spi: pxa2xx: Add support for Intel Sunrisepoint") Signed-off-by: Andy Shevchenko Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-pxa2xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h index ce31b8199bb3..b8e004d1467b 100644 --- a/drivers/spi/spi-pxa2xx.h +++ b/drivers/spi/spi-pxa2xx.h @@ -38,7 +38,7 @@ struct driver_data { /* SSP register addresses */ void __iomem *ioaddr; - u32 ssdr_physical; + phys_addr_t ssdr_physical; /* SSP masks*/ u32 dma_cr1; From 1c384327383de13223186ec138c57cb49fa1efea Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Thu, 26 Apr 2018 14:48:00 -0400 Subject: [PATCH 10/88] spi: bcm-qspi: Avoid setting MSPI_CDRAM_PCS for spi-nor master commit 5eb9a07a4ae1008b67d8bcd47bddb3dae97456b7 upstream. Added fix for probing of spi-nor device non-zero chip selects. Set MSPI_CDRAM_PCS (peripheral chip select) with spi master for MSPI controller and not for MSPI/BSPI spi-nor master controller. Ensure setting of cs bit in chip select register on chip select change. Fixes: fa236a7ef24048 ("spi: bcm-qspi: Add Broadcom MSPI driver") Signed-off-by: Kamal Dasu Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-bcm-qspi.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 7d629b4e1ecc..77966e291ae6 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -543,16 +543,19 @@ static void bcm_qspi_disable_bspi(struct bcm_qspi *qspi) static void bcm_qspi_chip_select(struct bcm_qspi *qspi, int cs) { - u32 data = 0; + u32 rd = 0; + u32 wr = 0; - if (qspi->curr_cs == cs) - return; if (qspi->base[CHIP_SELECT]) { - data = bcm_qspi_read(qspi, CHIP_SELECT, 0); - data = (data & ~0xff) | (1 << cs); - bcm_qspi_write(qspi, CHIP_SELECT, 0, data); + rd = bcm_qspi_read(qspi, CHIP_SELECT, 0); + wr = (rd & ~0xff) | (1 << cs); + if (rd == wr) + return; + bcm_qspi_write(qspi, CHIP_SELECT, 0, wr); usleep_range(10, 20); } + + dev_dbg(&qspi->pdev->dev, "using cs:%d\n", cs); qspi->curr_cs = cs; } @@ -770,8 +773,13 @@ static int write_to_hw(struct bcm_qspi *qspi, struct spi_device *spi) dev_dbg(&qspi->pdev->dev, "WR %04x\n", val); } mspi_cdram = MSPI_CDRAM_CONT_BIT; - mspi_cdram |= (~(1 << spi->chip_select) & - MSPI_CDRAM_PCS); + + if (has_bspi(qspi)) + mspi_cdram &= ~1; + else + mspi_cdram |= (~(1 << spi->chip_select) & + MSPI_CDRAM_PCS); + mspi_cdram |= ((tp.trans->bits_per_word <= 8) ? 0 : MSPI_CDRAM_BITSE_BIT); From ad04996f0bb0a2bfc8468c04dacc40aed76de8ae Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Thu, 26 Apr 2018 14:48:01 -0400 Subject: [PATCH 11/88] spi: bcm-qspi: Always read and set BSPI_MAST_N_BOOT_CTRL commit 602805fb618b018b7a41fbb3f93c1992b078b1ae upstream. Always confirm the BSPI_MAST_N_BOOT_CTRL bit when enabling or disabling BSPI transfers. Fixes: 4e3b2d236fe00 ("spi: bcm-qspi: Add BSPI spi-nor flash controller driver") Signed-off-by: Kamal Dasu Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-bcm-qspi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 77966e291ae6..adc3f56d4773 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -514,7 +514,7 @@ static int bcm_qspi_bspi_set_mode(struct bcm_qspi *qspi, static void bcm_qspi_enable_bspi(struct bcm_qspi *qspi) { - if (!has_bspi(qspi) || (qspi->bspi_enabled)) + if (!has_bspi(qspi)) return; qspi->bspi_enabled = 1; @@ -529,7 +529,7 @@ static void bcm_qspi_enable_bspi(struct bcm_qspi *qspi) static void bcm_qspi_disable_bspi(struct bcm_qspi *qspi) { - if (!has_bspi(qspi) || (!qspi->bspi_enabled)) + if (!has_bspi(qspi)) return; qspi->bspi_enabled = 0; From 9488d11728a6d945ce589cac6b6760cdb361e9c6 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 11 May 2018 15:20:14 +0100 Subject: [PATCH 12/88] KVM: arm/arm64: VGIC/ITS: protect kvm_read_guest() calls with SRCU lock commit bf308242ab98b5d1648c3663e753556bef9bec01 upstream. kvm_read_guest() will eventually look up in kvm_memslots(), which requires either to hold the kvm->slots_lock or to be inside a kvm->srcu critical section. In contrast to x86 and s390 we don't take the SRCU lock on every guest exit, so we have to do it individually for each kvm_read_guest() call. Provide a wrapper which does that and use that everywhere. Note that ending the SRCU critical section before returning from the kvm_read_guest() wrapper is safe, because the data has been *copied*, so we don't need to rely on valid references to the memslot anymore. Cc: Stable # 4.8+ Reported-by: Jan Glauber Signed-off-by: Andre Przywara Acked-by: Christoffer Dall Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_mmu.h | 16 ++++++++++++++++ arch/arm64/include/asm/kvm_mmu.h | 16 ++++++++++++++++ virt/kvm/arm/vgic/vgic-its.c | 15 ++++++++------- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index d10e36235438..7f66b1b3aca1 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -223,6 +223,22 @@ static inline unsigned int kvm_get_vmid_bits(void) return 8; } +/* + * We are not in the kvm->srcu critical section most of the time, so we take + * the SRCU read lock here. Since we copy the data from the user page, we + * can immediately drop the lock again. + */ +static inline int kvm_read_guest_lock(struct kvm *kvm, + gpa_t gpa, void *data, unsigned long len) +{ + int srcu_idx = srcu_read_lock(&kvm->srcu); + int ret = kvm_read_guest(kvm, gpa, data, len); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return ret; +} + static inline void *kvm_get_hyp_vector(void) { return kvm_ksym_ref(__kvm_hyp_vector); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 80bf33715ecb..eac73a640ea7 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -313,6 +313,22 @@ static inline unsigned int kvm_get_vmid_bits(void) return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; } +/* + * We are not in the kvm->srcu critical section most of the time, so we take + * the SRCU read lock here. Since we copy the data from the user page, we + * can immediately drop the lock again. + */ +static inline int kvm_read_guest_lock(struct kvm *kvm, + gpa_t gpa, void *data, unsigned long len) +{ + int srcu_idx = srcu_read_lock(&kvm->srcu); + int ret = kvm_read_guest(kvm, gpa, data, len); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return ret; +} + #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR #include diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 31f562507915..1ebbf233de9a 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -208,8 +208,8 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, u8 prop; int ret; - ret = kvm_read_guest(kvm, propbase + irq->intid - GIC_LPI_OFFSET, - &prop, 1); + ret = kvm_read_guest_lock(kvm, propbase + irq->intid - GIC_LPI_OFFSET, + &prop, 1); if (ret) return ret; @@ -339,8 +339,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) * this very same byte in the last iteration. Reuse that. */ if (byte_offset != last_byte_offset) { - ret = kvm_read_guest(vcpu->kvm, pendbase + byte_offset, - &pendmask, 1); + ret = kvm_read_guest_lock(vcpu->kvm, + pendbase + byte_offset, + &pendmask, 1); if (ret) { kfree(intids); return ret; @@ -628,7 +629,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id) return false; /* Each 1st level entry is represented by a 64-bit value. */ - if (kvm_read_guest(its->dev->kvm, + if (kvm_read_guest_lock(its->dev->kvm, BASER_ADDRESS(baser) + index * sizeof(indirect_ptr), &indirect_ptr, sizeof(indirect_ptr))) return false; @@ -1152,8 +1153,8 @@ static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) cbaser = CBASER_ADDRESS(its->cbaser); while (its->cwriter != its->creadr) { - int ret = kvm_read_guest(kvm, cbaser + its->creadr, - cmd_buf, ITS_CMD_SIZE); + int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr, + cmd_buf, ITS_CMD_SIZE); /* * If kvm_read_guest() fails, this could be due to the guest * programming a bogus value in CBASER or something else going From 0c8b8d37c85805ebc47a3e6a5f799bad4de8328e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 10 Jan 2018 17:10:12 +1100 Subject: [PATCH 13/88] powerpc: Don't preempt_disable() in show_cpuinfo() commit 349524bc0da698ec77f2057cf4a4948eb6349265 upstream. This causes warnings from cpufreq mutex code. This is also rather unnecessary and ineffective. If we really want to prevent concurrent unplug, we could take the unplug read lock but I don't see this being critical. Fixes: cd77b5ce208c ("powerpc/powernv/cpufreq: Fix the frequency read by /proc/cpuinfo") Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman Acked-by: Michal Suchanek Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/setup-common.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index f516ac508ae3..bf0f712ac0e0 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -228,14 +228,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned short maj; unsigned short min; - /* We only show online cpus: disable preempt (overzealous, I - * knew) to prevent cpu going down. */ - preempt_disable(); - if (!cpu_online(cpu_id)) { - preempt_enable(); - return 0; - } - #ifdef CONFIG_SMP pvr = per_cpu(cpu_pvr, cpu_id); #else @@ -340,9 +332,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_SMP seq_printf(m, "\n"); #endif - - preempt_enable(); - /* If this is the last cpu, print the summary */ if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids) show_cpuinfo_summary(m); From 20a30619b3315c1f11565cfbc0dbe2ea92a8a003 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 14 Dec 2016 15:04:10 -0800 Subject: [PATCH 14/88] signals: avoid unnecessary taking of sighand->siglock commit c7be96af89d4b53211862d8599b2430e8900ed92 upstream. When running certain database workload on a high-end system with many CPUs, it was found that spinlock contention in the sigprocmask syscalls became a significant portion of the overall CPU cycles as shown below. 9.30% 9.30% 905387 dataserver /proc/kcore 0x7fff8163f4d2 [k] _raw_spin_lock_irq | ---_raw_spin_lock_irq | |--99.34%-- __set_current_blocked | sigprocmask | sys_rt_sigprocmask | system_call_fastpath | | | |--50.63%-- __swapcontext | | | | | |--99.91%-- upsleepgeneric | | | |--49.36%-- __setcontext | | ktskRun Looking further into the swapcontext function in glibc, it was found that the function always call sigprocmask() without checking if there are changes in the signal mask. A check was added to the __set_current_blocked() function to avoid taking the sighand->siglock spinlock if there is no change in the signal mask. This will prevent unneeded spinlock contention when many threads are trying to call sigprocmask(). With this patch applied, the spinlock contention in sigprocmask() was gone. Link: http://lkml.kernel.org/r/1474979209-11867-1-git-send-email-Waiman.Long@hpe.com Signed-off-by: Waiman Long Acked-by: Oleg Nesterov Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Stas Sergeev Cc: Scott J Norton Cc: Douglas Hatch Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- include/linux/signal.h | 17 +++++++++++++++++ kernel/signal.c | 7 +++++++ 2 files changed, 24 insertions(+) diff --git a/include/linux/signal.h b/include/linux/signal.h index b63f63eaa39c..5308304993be 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -97,6 +97,23 @@ static inline int sigisemptyset(sigset_t *set) } } +static inline int sigequalsets(const sigset_t *set1, const sigset_t *set2) +{ + switch (_NSIG_WORDS) { + case 4: + return (set1->sig[3] == set2->sig[3]) && + (set1->sig[2] == set2->sig[2]) && + (set1->sig[1] == set2->sig[1]) && + (set1->sig[0] == set2->sig[0]); + case 2: + return (set1->sig[1] == set2->sig[1]) && + (set1->sig[0] == set2->sig[0]); + case 1: + return set1->sig[0] == set2->sig[0]; + } + return 0; +} + #define sigmask(sig) (1UL << ((sig) - 1)) #ifndef __HAVE_ARCH_SIG_SETOPS diff --git a/kernel/signal.c b/kernel/signal.c index 7ebe236a5364..17428fec19b0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2495,6 +2495,13 @@ void __set_current_blocked(const sigset_t *newset) { struct task_struct *tsk = current; + /* + * In case the signal mask hasn't changed, there is nothing we need + * to do. The current->blocked shouldn't be modified by other task. + */ + if (sigequalsets(&tsk->blocked, newset)) + return; + spin_lock_irq(&tsk->sighand->siglock); __set_task_blocked(tsk, newset); spin_unlock_irq(&tsk->sighand->siglock); From 1af681da78b748eba17ad244d08e8bbe1980bf5c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 9 May 2018 14:36:09 -0400 Subject: [PATCH 15/88] tracing/x86/xen: Remove zero data size trace events trace_xen_mmu_flush_tlb{_all} commit 45dd9b0666a162f8e4be76096716670cf1741f0e upstream. Doing an audit of trace events, I discovered two trace events in the xen subsystem that use a hack to create zero data size trace events. This is not what trace events are for. Trace events add memory footprint overhead, and if all you need to do is see if a function is hit or not, simply make that function noinline and use function tracer filtering. Worse yet, the hack used was: __array(char, x, 0) Which creates a static string of zero in length. There's assumptions about such constructs in ftrace that this is a dynamic string that is nul terminated. This is not the case with these tracepoints and can cause problems in various parts of ftrace. Nuke the trace events! Link: http://lkml.kernel.org/r/20180509144605.5a220327@gandalf.local.home Cc: stable@vger.kernel.org Fixes: 95a7d76897c1e ("xen/mmu: Use Xen specific TLB flush instead of the generic one.") Reviewed-by: Juergen Gross Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/mmu.c | 4 ---- include/trace/events/xen.h | 16 ---------------- 2 files changed, 20 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 418f1b8576cf..c92f75f7ae33 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1317,8 +1317,6 @@ void xen_flush_tlb_all(void) struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb_all(0); - preempt_disable(); mcs = xen_mc_entry(sizeof(*op)); @@ -1336,8 +1334,6 @@ static void xen_flush_tlb(void) struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb(0); - preempt_disable(); mcs = xen_mc_entry(sizeof(*op)); diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index bce990f5a35d..d6be935caa50 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -377,22 +377,6 @@ DECLARE_EVENT_CLASS(xen_mmu_pgd, DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_pin); DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_unpin); -TRACE_EVENT(xen_mmu_flush_tlb_all, - TP_PROTO(int x), - TP_ARGS(x), - TP_STRUCT__entry(__array(char, x, 0)), - TP_fast_assign((void)x), - TP_printk("%s", "") - ); - -TRACE_EVENT(xen_mmu_flush_tlb, - TP_PROTO(int x), - TP_ARGS(x), - TP_STRUCT__entry(__array(char, x, 0)), - TP_fast_assign((void)x), - TP_printk("%s", "") - ); - TRACE_EVENT(xen_mmu_flush_tlb_single, TP_PROTO(unsigned long addr), TP_ARGS(addr), From f32bb2aad27e5dac434a8fa1e12b1d6c25fe49f8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Apr 2018 09:30:27 +0200 Subject: [PATCH 16/88] netfilter: nf_tables: can't fail after linking rule into active rule list commit 569ccae68b38654f04b6842b034aa33857f605fe upstream. rules in nftables a free'd using kfree, but protected by rcu, i.e. we must wait for a grace period to elapse. Normal removal patch does this, but nf_tables_newrule() doesn't obey this rule during error handling. It calls nft_trans_rule_add() *after* linking rule, and, if that fails to allocate memory, it unlinks the rule and then kfree() it -- this is unsafe. Switch order -- first add rule to transaction list, THEN link it to public list. Note: nft_trans_rule_add() uses GFP_KERNEL; it will not fail so this is not a problem in practice (spotted only during code review). Fixes: 0628b123c96d12 ("netfilter: nfnetlink: add batch support and use it from nf_tables") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 59 +++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fa3ef25441e5..762f31fb5b67 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2200,41 +2200,46 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } if (nlh->nlmsg_flags & NLM_F_REPLACE) { - if (nft_is_active_next(net, old_rule)) { - trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE, - old_rule); - if (trans == NULL) { - err = -ENOMEM; - goto err2; - } - nft_deactivate_next(net, old_rule); - chain->use--; - list_add_tail_rcu(&rule->list, &old_rule->list); - } else { + if (!nft_is_active_next(net, old_rule)) { err = -ENOENT; goto err2; } - } else if (nlh->nlmsg_flags & NLM_F_APPEND) - if (old_rule) - list_add_rcu(&rule->list, &old_rule->list); - else - list_add_tail_rcu(&rule->list, &chain->rules); - else { - if (old_rule) - list_add_tail_rcu(&rule->list, &old_rule->list); - else - list_add_rcu(&rule->list, &chain->rules); - } + trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE, + old_rule); + if (trans == NULL) { + err = -ENOMEM; + goto err2; + } + nft_deactivate_next(net, old_rule); + chain->use--; - if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { - err = -ENOMEM; - goto err3; + if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { + err = -ENOMEM; + goto err2; + } + + list_add_tail_rcu(&rule->list, &old_rule->list); + } else { + if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { + err = -ENOMEM; + goto err2; + } + + if (nlh->nlmsg_flags & NLM_F_APPEND) { + if (old_rule) + list_add_rcu(&rule->list, &old_rule->list); + else + list_add_tail_rcu(&rule->list, &chain->rules); + } else { + if (old_rule) + list_add_tail_rcu(&rule->list, &old_rule->list); + else + list_add_rcu(&rule->list, &chain->rules); + } } chain->use++; return 0; -err3: - list_del_rcu(&rule->list); err2: nf_tables_rule_destroy(&ctx, rule); err1: From 895c53e10b91432d0e9a9662448f0c7fc40f79e1 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Sat, 28 Apr 2018 16:56:06 +0300 Subject: [PATCH 17/88] i2c: designware: fix poll-after-enable regression commit 06cb616b1bca7080824acfedb3d4c898e7a64836 upstream. Not all revisions of DW I2C controller implement the enable status register. On platforms where that's the case (e.g. BG2CD and SPEAr ARM SoCs), waiting for enable will time out as reading the unimplemented register yields zero. It was observed that reading the IC_ENABLE_STATUS register once suffices to avoid getting it stuck on Bay Trail hardware, so replace polling with one dummy read of the register. Fixes: fba4adbbf670 ("i2c: designware: must wait for enable") Signed-off-by: Alexander Monakov Tested-by: Ben Gardner Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-designware-core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c index 340e037b3224..884c1ec61ac9 100644 --- a/drivers/i2c/busses/i2c-designware-core.c +++ b/drivers/i2c/busses/i2c-designware-core.c @@ -507,7 +507,10 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev) i2c_dw_disable_int(dev); /* Enable the adapter */ - __i2c_dw_enable_and_wait(dev, true); + __i2c_dw_enable(dev, true); + + /* Dummy read to avoid the register getting stuck on Bay Trail */ + dw_readl(dev, DW_IC_ENABLE_STATUS); /* Clear and enable interrupts */ dw_readl(dev, DW_IC_CLR_INTR); From 96c83fb2de4d45a9da51668e4a2f9aa0ac2c74ba Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 15 May 2018 01:59:47 +1000 Subject: [PATCH 18/88] powerpc/powernv: Fix NVRAM sleep in invalid context when crashing commit c1d2a31397ec51f0370f6bd17b19b39152c263cb upstream. Similarly to opal_event_shutdown, opal_nvram_write can be called in the crash path with irqs disabled. Special case the delay to avoid sleeping in invalid context. Fixes: 3b8070335f75 ("powerpc/powernv: Fix OPAL NVRAM driver OPAL_BUSY loops") Cc: stable@vger.kernel.org # v3.2 Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/opal-nvram.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c index 1bceb95f422d..5584247f5029 100644 --- a/arch/powerpc/platforms/powernv/opal-nvram.c +++ b/arch/powerpc/platforms/powernv/opal-nvram.c @@ -44,6 +44,10 @@ static ssize_t opal_nvram_read(char *buf, size_t count, loff_t *index) return count; } +/* + * This can be called in the panic path with interrupts off, so use + * mdelay in that case. + */ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index) { s64 rc = OPAL_BUSY; @@ -58,10 +62,16 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index) while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_write_nvram(__pa(buf), count, off); if (rc == OPAL_BUSY_EVENT) { - msleep(OPAL_BUSY_DELAY_MS); + if (in_interrupt() || irqs_disabled()) + mdelay(OPAL_BUSY_DELAY_MS); + else + msleep(OPAL_BUSY_DELAY_MS); opal_poll_events(NULL); } else if (rc == OPAL_BUSY) { - msleep(OPAL_BUSY_DELAY_MS); + if (in_interrupt() || irqs_disabled()) + mdelay(OPAL_BUSY_DELAY_MS); + else + msleep(OPAL_BUSY_DELAY_MS); } } From 270693b978f345c34509676c0729d72c19b6d87e Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Fri, 18 May 2018 16:09:13 -0700 Subject: [PATCH 19/88] mm: don't allow deferred pages with NEED_PER_CPU_KM commit ab1e8d8960b68f54af42b6484b5950bd13a4054b upstream. It is unsafe to do virtual to physical translations before mm_init() is called if struct page is needed in order to determine the memory section number (see SECTION_IN_PAGE_FLAGS). This is because only in mm_init() we initialize struct pages for all the allocated memory when deferred struct pages are used. My recent fix in commit c9e97a1997 ("mm: initialize pages on demand during boot") exposed this problem, because it greatly reduced number of pages that are initialized before mm_init(), but the problem existed even before my fix, as Fengguang Wu found. Below is a more detailed explanation of the problem. We initialize struct pages in four places: 1. Early in boot a small set of struct pages is initialized to fill the first section, and lower zones. 2. During mm_init() we initialize "struct pages" for all the memory that is allocated, i.e reserved in memblock. 3. Using on-demand logic when pages are allocated after mm_init call (when memblock is finished) 4. After smp_init() when the rest free deferred pages are initialized. The problem occurs if we try to do va to phys translation of a memory between steps 1 and 2. Because we have not yet initialized struct pages for all the reserved pages, it is inherently unsafe to do va to phys if the translation itself requires access of "struct page" as in case of this combination: CONFIG_SPARSE && !CONFIG_SPARSE_VMEMMAP The following path exposes the problem: start_kernel() trap_init() setup_cpu_entry_areas() setup_cpu_entry_area(cpu) get_cpu_gdt_paddr(cpu) per_cpu_ptr_to_phys(addr) pcpu_addr_to_page(addr) virt_to_page(addr) pfn_to_page(__pa(addr) >> PAGE_SHIFT) We disable this path by not allowing NEED_PER_CPU_KM with deferred struct pages feature. The problems are discussed in these threads: http://lkml.kernel.org/r/20180418135300.inazvpxjxowogyge@wfg-t540p.sh.intel.com http://lkml.kernel.org/r/20180419013128.iurzouiqxvcnpbvz@wfg-t540p.sh.intel.com http://lkml.kernel.org/r/20180426202619.2768-1-pasha.tatashin@oracle.com Link: http://lkml.kernel.org/r/20180515175124.1770-1-pasha.tatashin@oracle.com Fixes: 3a80a7fa7989 ("mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set") Signed-off-by: Pavel Tatashin Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Steven Sistare Cc: Daniel Jordan Cc: Mel Gorman Cc: Fengguang Wu Cc: Dennis Zhou Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/Kconfig b/mm/Kconfig index 86e3e0e74d20..ea074a9d4958 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -666,6 +666,7 @@ config DEFERRED_STRUCT_PAGE_INIT depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT depends on NO_BOOTMEM && MEMORY_HOTPLUG depends on !FLATMEM + depends on !NEED_PER_CPU_KM help Ordinarily all struct pages are initialised during early boot in a single thread. On very large machines this can take a considerable From 252bbeb9688a3c6ede9ea997e93ecb5d05e60209 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 2 May 2018 08:48:43 +0200 Subject: [PATCH 20/88] s390/qdio: fix access to uninitialized qdio_q fields commit e521813468f786271a87e78e8644243bead48fad upstream. Ever since CQ/QAOB support was added, calling qdio_free() straight after qdio_alloc() results in qdio_release_memory() accessing uninitialized memory (ie. q->u.out.use_cq and q->u.out.aobs). Followed by a kmem_cache_free() on the random AOB addresses. For older kernels that don't have 6e30c549f6ca, the same applies if qdio_establish() fails in the DEV_STATE_ONLINE check. While initializing q->u.out.use_cq would be enough to fix this particular bug, the more future-proof change is to just zero-alloc the whole struct. Fixes: 104ea556ee7f ("qdio: support asynchronous delivery of storage blocks") Cc: #v3.2+ Signed-off-by: Julian Wiedmann Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- drivers/s390/cio/qdio_setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 48b3866a9ded..3da5f3f3cf8b 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -140,7 +140,7 @@ static int __qdio_allocate_qs(struct qdio_q **irq_ptr_qs, int nr_queues) int i; for (i = 0; i < nr_queues; i++) { - q = kmem_cache_alloc(qdio_q_cache, GFP_KERNEL); + q = kmem_cache_zalloc(qdio_q_cache, GFP_KERNEL); if (!q) return -ENOMEM; From a03e14f09b40a1a10ce464d906814bb85ecd71ab Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 3 May 2018 15:56:15 +0200 Subject: [PATCH 21/88] s390/cpum_sf: ensure sample frequency of perf event attributes is non-zero commit 4bbaf2584b86b0772413edeac22ff448f36351b1 upstream. Correct a trinity finding for the perf_event_open() system call with a perf event attribute structure that uses a frequency but has the sampling frequency set to zero. This causes a FP divide exception during the sample rate initialization for the hardware sampling facility. Fixes: 8c069ff4bd606 ("s390/perf: add support for the CPU-Measurement Sampling Facility") Cc: stable@vger.kernel.org # 3.14+ Reviewed-by: Heiko Carstens Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/perf_cpum_sf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index fcc634c1479a..96e4fcad57bf 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -739,6 +739,10 @@ static int __hw_perf_event_init(struct perf_event *event) */ rate = 0; if (attr->freq) { + if (!attr->sample_freq) { + err = -EINVAL; + goto out; + } rate = freq_to_sample_rate(&si, attr->sample_freq); rate = hw_limit_rate(&si, rate); attr->freq = 0; From c79b01b8d4cb7c37c5da2299152c2ca9f22acc76 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 2 May 2018 08:28:34 +0200 Subject: [PATCH 22/88] s390/qdio: don't release memory in qdio_setup_irq() commit 2e68adcd2fb21b7188ba449f0fab3bee2910e500 upstream. Calling qdio_release_memory() on error is just plain wrong. It frees the main qdio_irq struct, when following code still uses it. Also, no other error path in qdio_establish() does this. So trust callers to clean up via qdio_free() if some step of the QDIO initialization fails. Fixes: 779e6e1c724d ("[S390] qdio: new qdio driver.") Cc: #v2.6.27+ Signed-off-by: Julian Wiedmann Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- drivers/s390/cio/qdio_setup.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 3da5f3f3cf8b..35286907c636 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -456,7 +456,6 @@ int qdio_setup_irq(struct qdio_initialize *init_data) { struct ciw *ciw; struct qdio_irq *irq_ptr = init_data->cdev->private->qdio_data; - int rc; memset(&irq_ptr->qib, 0, sizeof(irq_ptr->qib)); memset(&irq_ptr->siga_flag, 0, sizeof(irq_ptr->siga_flag)); @@ -493,16 +492,14 @@ int qdio_setup_irq(struct qdio_initialize *init_data) ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_EQUEUE); if (!ciw) { DBF_ERROR("%4x NO EQ", irq_ptr->schid.sch_no); - rc = -EINVAL; - goto out_err; + return -EINVAL; } irq_ptr->equeue = *ciw; ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_AQUEUE); if (!ciw) { DBF_ERROR("%4x NO AQ", irq_ptr->schid.sch_no); - rc = -EINVAL; - goto out_err; + return -EINVAL; } irq_ptr->aqueue = *ciw; @@ -510,9 +507,6 @@ int qdio_setup_irq(struct qdio_initialize *init_data) irq_ptr->orig_handler = init_data->cdev->handler; init_data->cdev->handler = qdio_int_handler; return 0; -out_err: - qdio_release_memory(irq_ptr); - return rc; } void qdio_print_subchannel_info(struct qdio_irq *irq_ptr, From 633b4eb03bab15247214eae6a544dabf01688511 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 24 Apr 2018 11:18:49 +0200 Subject: [PATCH 23/88] s390: remove indirect branch from do_softirq_own_stack commit 9f18fff63cfd6f559daa1eaae60640372c65f84b upstream. The inline assembly to call __do_softirq on the irq stack uses an indirect branch. This can be replaced with a normal relative branch. Cc: stable@vger.kernel.org # 4.16 Fixes: f19fbd5ed6 ("s390: introduce execute-trampolines for branches") Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/irq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 285d6561076d..7ff976737bb1 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -173,10 +173,9 @@ void do_softirq_own_stack(void) new -= STACK_FRAME_OVERHEAD; ((struct stack_frame *) new)->back_chain = old; asm volatile(" la 15,0(%0)\n" - " basr 14,%2\n" + " brasl 14,__do_softirq\n" " la 15,0(%1)\n" - : : "a" (new), "a" (old), - "a" (__do_softirq) + : : "a" (new), "a" (old) : "0", "1", "2", "3", "4", "5", "14", "cc", "memory" ); } else { From a8330db3fb9b434471b861927e8e965835005929 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 9 May 2018 10:13:51 -0700 Subject: [PATCH 24/88] x86/pkeys: Override pkey when moving away from PROT_EXEC commit 0a0b152083cfc44ec1bb599b57b7aab41327f998 upstream. I got a bug report that the following code (roughly) was causing a SIGSEGV: mprotect(ptr, size, PROT_EXEC); mprotect(ptr, size, PROT_NONE); mprotect(ptr, size, PROT_READ); *ptr = 100; The problem is hit when the mprotect(PROT_EXEC) is implicitly assigned a protection key to the VMA, and made that key ACCESS_DENY|WRITE_DENY. The PROT_NONE mprotect() failed to remove the protection key, and the PROT_NONE-> PROT_READ left the PTE usable, but the pkey still in place and left the memory inaccessible. To fix this, we ensure that we always "override" the pkee at mprotect() if the VMA does not have execute-only permissions, but the VMA has the execute-only pkey. We had a check for PROT_READ/WRITE, but it did not work for PROT_NONE. This entirely removes the PROT_* checks, which ensures that PROT_NONE now works. Reported-by: Shakeel Butt Signed-off-by: Dave Hansen Cc: Andrew Morton Cc: Dave Hansen Cc: Linus Torvalds Cc: Michael Ellermen Cc: Peter Zijlstra Cc: Ram Pai Cc: Shuah Khan Cc: Thomas Gleixner Cc: linux-mm@kvack.org Cc: stable@vger.kernel.org Fixes: 62b5f7d013f ("mm/core, x86/mm/pkeys: Add execute-only protection keys support") Link: http://lkml.kernel.org/r/20180509171351.084C5A71@viggo.jf.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pkeys.h | 12 +++++++++++- arch/x86/mm/pkeys.c | 21 +++++++++++---------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index b3b09b98896d..bcb5745f927d 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_PKEYS_H #define _ASM_X86_PKEYS_H +#define ARCH_DEFAULT_PKEY 0 + #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, @@ -14,7 +16,7 @@ extern int __execute_only_pkey(struct mm_struct *mm); static inline int execute_only_pkey(struct mm_struct *mm) { if (!boot_cpu_has(X86_FEATURE_OSPKE)) - return 0; + return ARCH_DEFAULT_PKEY; return __execute_only_pkey(mm); } @@ -55,6 +57,14 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) return false; if (pkey >= arch_max_pkey()) return false; + /* + * The exec-only pkey is set in the allocation map, but + * is not available to any of the user interfaces like + * mprotect_pkey(). + */ + if (pkey == mm->context.execute_only_pkey) + return false; + return mm_pkey_allocation_map(mm) & (1U << pkey); } diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index f88ce0e5efd9..0bbec041c003 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -95,26 +95,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey */ if (pkey != -1) return pkey; - /* - * Look for a protection-key-drive execute-only mapping - * which is now being given permissions that are not - * execute-only. Move it back to the default pkey. - */ - if (vma_is_pkey_exec_only(vma) && - (prot & (PROT_READ|PROT_WRITE))) { - return 0; - } + /* * The mapping is execute-only. Go try to get the * execute-only protection key. If we fail to do that, * fall through as if we do not have execute-only - * support. + * support in this mm. */ if (prot == PROT_EXEC) { pkey = execute_only_pkey(vma->vm_mm); if (pkey > 0) return pkey; + } else if (vma_is_pkey_exec_only(vma)) { + /* + * Protections are *not* PROT_EXEC, but the mapping + * is using the exec-only pkey. This mapping was + * PROT_EXEC and will no longer be. Move back to + * the default pkey. + */ + return ARCH_DEFAULT_PKEY; } + /* * This is a vanilla, non-pkey mprotect (or we failed to * setup execute-only), inherit the pkey from the VMA we From 7925d9da8d189025b190ad43ff476cba240144f2 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 9 May 2018 10:13:58 -0700 Subject: [PATCH 25/88] x86/pkeys: Do not special case protection key 0 commit 2fa9d1cfaf0e02f8abef0757002bff12dfcfa4e6 upstream. mm_pkey_is_allocated() treats pkey 0 as unallocated. That is inconsistent with the manpages, and also inconsistent with mm->context.pkey_allocation_map. Stop special casing it and only disallow values that are actually bad (< 0). The end-user visible effect of this is that you can now use mprotect_pkey() to set pkey=0. This is a bit nicer than what Ram proposed[1] because it is simpler and removes special-casing for pkey 0. On the other hand, it does allow applications to pkey_free() pkey-0, but that's just a silly thing to do, so we are not going to protect against it. The scenario that could happen is similar to what happens if you free any other pkey that is in use: it might get reallocated later and used to protect some other data. The most likely scenario is that pkey-0 comes back from pkey_alloc(), an access-disable or write-disable bit is set in PKRU for it, and the next stack access will SIGSEGV. It's not horribly different from if you mprotect()'d your stack or heap to be unreadable or unwritable, which is generally very foolish, but also not explicitly prevented by the kernel. 1. http://lkml.kernel.org/r/1522112702-27853-1-git-send-email-linuxram@us.ibm.com Signed-off-by: Dave Hansen Cc: Andrew Morton p Cc: Dave Hansen Cc: Linus Torvalds Cc: Michael Ellermen Cc: Peter Zijlstra Cc: Ram Pai Cc: Shuah Khan Cc: Thomas Gleixner Cc: linux-mm@kvack.org Cc: stable@vger.kernel.org Fixes: 58ab9a088dda ("x86/pkeys: Check against max pkey to avoid overflows") Link: http://lkml.kernel.org/r/20180509171358.47FD785E@viggo.jf.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mmu_context.h | 2 +- arch/x86/include/asm/pkeys.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 5a295bb97103..733650874b30 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -113,7 +113,7 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { - /* pkey 0 is the default and always allocated */ + /* pkey 0 is the default and allocated implicitly */ mm->context.pkey_allocation_map = 0x1; /* -1 means unallocated or invalid */ mm->context.execute_only_pkey = -1; diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index bcb5745f927d..c50d6dcf4a22 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -50,10 +50,10 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) { /* * "Allocated" pkeys are those that have been returned - * from pkey_alloc(). pkey 0 is special, and never - * returned from pkey_alloc(). + * from pkey_alloc() or pkey 0 which is allocated + * implicitly when the mm is created. */ - if (pkey <= 0) + if (pkey < 0) return false; if (pkey >= arch_max_pkey()) return false; From dc7de9b203e844ecb143b32cc86a54bb528b8c5d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 4 May 2018 07:59:58 +0200 Subject: [PATCH 26/88] efi: Avoid potential crashes, fix the 'struct efi_pci_io_protocol_32' definition for mixed mode commit 0b3225ab9407f557a8e20f23f37aa7236c10a9b1 upstream. Mixed mode allows a kernel built for x86_64 to interact with 32-bit EFI firmware, but requires us to define all struct definitions carefully when it comes to pointer sizes. 'struct efi_pci_io_protocol_32' currently uses a 'void *' for the 'romimage' field, which will be interpreted as a 64-bit field on such kernels, potentially resulting in bogus memory references and subsequent crashes. Tested-by: Hans de Goede Signed-off-by: Ard Biesheuvel Cc: Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180504060003.19618-13-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/eboot.c | 6 ++++-- include/linux/efi.h | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index cc69e37548db..c0ad1bb27fa2 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -330,7 +330,8 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom) if (status != EFI_SUCCESS) goto free_struct; - memcpy(rom->romdata, pci->romimage, pci->romsize); + memcpy(rom->romdata, (void *)(unsigned long)pci->romimage, + pci->romsize); return status; free_struct: @@ -436,7 +437,8 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom) if (status != EFI_SUCCESS) goto free_struct; - memcpy(rom->romdata, pci->romimage, pci->romsize); + memcpy(rom->romdata, (void *)(unsigned long)pci->romimage, + pci->romsize); return status; free_struct: diff --git a/include/linux/efi.h b/include/linux/efi.h index cba7177cbec7..80b1b8faf503 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -380,8 +380,8 @@ typedef struct { u32 attributes; u32 get_bar_attributes; u32 set_bar_attributes; - uint64_t romsize; - void *romimage; + u64 romsize; + u32 romimage; } efi_pci_io_protocol_32; typedef struct { @@ -400,8 +400,8 @@ typedef struct { u64 attributes; u64 get_bar_attributes; u64 set_bar_attributes; - uint64_t romsize; - void *romimage; + u64 romsize; + u64 romimage; } efi_pci_io_protocol_64; typedef struct { From 10b408d6324b60fe2a949c2bac63fe9d5bd00851 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 13 May 2018 05:04:16 +0100 Subject: [PATCH 27/88] ARM: 8771/1: kprobes: Prohibit kprobes on do_undefinstr commit eb0146daefdde65665b7f076fbff7b49dade95b9 upstream. Prohibit kprobes on do_undefinstr because kprobes on arm is implemented by undefined instruction. This means if we probe do_undefinstr(), it can cause infinit recursive exception. Fixes: 24ba613c9d6c ("ARM kprobes: core code") Signed-off-by: Masami Hiramatsu Cc: stable@vger.kernel.org Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/traps.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 1b304897aa12..aa316a7562b1 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -415,7 +416,8 @@ void unregister_undef_hook(struct undef_hook *hook) raw_spin_unlock_irqrestore(&undef_lock, flags); } -static int call_undef_hook(struct pt_regs *regs, unsigned int instr) +static nokprobe_inline +int call_undef_hook(struct pt_regs *regs, unsigned int instr) { struct undef_hook *hook; unsigned long flags; @@ -488,6 +490,7 @@ die_sig: arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6); } +NOKPROBE_SYMBOL(do_undefinstr) /* * Handle FIQ similarly to NMI on x86 systems. From f58b66165d556abcf7eb0375bcc85e9fec2de614 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 15 May 2018 19:52:50 +0000 Subject: [PATCH 28/88] tick/broadcast: Use for_each_cpu() specially on UP kernels commit 5596fe34495cf0f645f417eb928ef224df3e3cb4 upstream. for_each_cpu() unintuitively reports CPU0 as set independent of the actual cpumask content on UP kernels. This causes an unexpected PIT interrupt storm on a UP kernel running in an SMP virtual machine on Hyper-V, and as a result, the virtual machine can suffer from a strange random delay of 1~20 minutes during boot-up, and sometimes it can hang forever. Protect if by checking whether the cpumask is empty before entering the for_each_cpu() loop. [ tglx: Use !IS_ENABLED(CONFIG_SMP) instead of #ifdeffery ] Signed-off-by: Dexuan Cui Signed-off-by: Thomas Gleixner Cc: Josh Poulson Cc: "Michael Kelley (EOSG)" Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: stable@vger.kernel.org Cc: Rakib Mullick Cc: Jork Loeser Cc: Greg Kroah-Hartman Cc: Andrew Morton Cc: KY Srinivasan Cc: Linus Torvalds Cc: Alexey Dobriyan Cc: Dmitry Vyukov Link: https://lkml.kernel.org/r/KL1P15301MB000678289FE55BA365B3279ABF990@KL1P15301MB0006.APCP153.PROD.OUTLOOK.COM Link: https://lkml.kernel.org/r/KL1P15301MB0006FA63BC22BEB64902EAA0BF930@KL1P15301MB0006.APCP153.PROD.OUTLOOK.COM Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-broadcast.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index d2a20e83ebae..22d7454b387b 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -610,6 +610,14 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) now = ktime_get(); /* Find all expired events */ for_each_cpu(cpu, tick_broadcast_oneshot_mask) { + /* + * Required for !SMP because for_each_cpu() reports + * unconditionally CPU0 as set on UP kernels. + */ + if (!IS_ENABLED(CONFIG_SMP) && + cpumask_empty(tick_broadcast_oneshot_mask)) + break; + td = &per_cpu(tick_cpu_device, cpu); if (td->evtdev->next_event.tv64 <= now.tv64) { cpumask_set_cpu(cpu, tmpmask); From 70b4b1451086218a32122cf9fc7f92ccfb9ae4dc Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 13 May 2018 05:03:54 +0100 Subject: [PATCH 29/88] ARM: 8769/1: kprobes: Fix to use get_kprobe_ctlblk after irq-disabed commit 69af7e23a6870df2ea6fa79ca16493d59b3eebeb upstream. Since get_kprobe_ctlblk() uses smp_processor_id() to access per-cpu variable, it hits smp_processor_id sanity check as below. [ 7.006928] BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 [ 7.007859] caller is debug_smp_processor_id+0x20/0x24 [ 7.008438] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.16.0-rc1-00192-g4eb17253e4b5 #1 [ 7.008890] Hardware name: Generic DT based system [ 7.009917] [] (unwind_backtrace) from [] (show_stack+0x20/0x24) [ 7.010473] [] (show_stack) from [] (dump_stack+0x84/0x98) [ 7.010990] [] (dump_stack) from [] (check_preemption_disabled+0x138/0x13c) [ 7.011592] [] (check_preemption_disabled) from [] (debug_smp_processor_id+0x20/0x24) [ 7.012214] [] (debug_smp_processor_id) from [] (optimized_callback+0x2c/0xe4) [ 7.013077] [] (optimized_callback) from [] (0xbf0021b0) To fix this issue, call get_kprobe_ctlblk() right after irq-disabled since that disables preemption. Fixes: 0dc016dbd820 ("ARM: kprobes: enable OPTPROBES for ARM 32") Signed-off-by: Masami Hiramatsu Cc: stable@vger.kernel.org Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/probes/kprobes/opt-arm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index bcdecc25461b..ddc5a82eb10d 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -165,13 +165,14 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { unsigned long flags; struct kprobe *p = &op->kp; - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe_ctlblk *kcb; /* Save skipped registers */ regs->ARM_pc = (unsigned long)op->kp.addr; regs->ARM_ORIG_r0 = ~0UL; local_irq_save(flags); + kcb = get_kprobe_ctlblk(); if (kprobe_running()) { kprobes_inc_nmissed_count(&op->kp); From b1690451d93fcd8af8edfa3b7fc6b966d330b0a8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 13 May 2018 05:04:10 +0100 Subject: [PATCH 30/88] ARM: 8770/1: kprobes: Prohibit probing on optimized_callback commit 70948c05fdde0aac32f9667856a88725c192fa40 upstream. Prohibit probing on optimized_callback() because it is called from kprobes itself. If we put a kprobes on it, that will cause a recursive call loop. Mark it NOKPROBE_SYMBOL. Fixes: 0dc016dbd820 ("ARM: kprobes: enable OPTPROBES for ARM 32") Signed-off-by: Masami Hiramatsu Cc: stable@vger.kernel.org Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/probes/kprobes/opt-arm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index ddc5a82eb10d..b2aa9b32bff2 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -192,6 +192,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) local_irq_restore(flags); } +NOKPROBE_SYMBOL(optimized_callback) int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig) { From 21cc684a31ef5f9290aaef48b1c9146b019d987d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 13 May 2018 05:04:29 +0100 Subject: [PATCH 31/88] ARM: 8772/1: kprobes: Prohibit kprobes on get_user functions commit 0d73c3f8e7f6ee2aab1bb350f60c180f5ae21a2c upstream. Since do_undefinstr() uses get_user to get the undefined instruction, it can be called before kprobes processes recursive check. This can cause an infinit recursive exception. Prohibit probing on get_user functions. Fixes: 24ba613c9d6c ("ARM kprobes: core code") Signed-off-by: Masami Hiramatsu Cc: stable@vger.kernel.org Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/assembler.h | 10 ++++++++++ arch/arm/lib/getuser.S | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 12f99fd2e3b2..3aed4492c9a7 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -534,4 +534,14 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm +#ifdef CONFIG_KPROBES +#define _ASM_NOKPROBE(entry) \ + .pushsection "_kprobe_blacklist", "aw" ; \ + .balign 4 ; \ + .long entry; \ + .popsection +#else +#define _ASM_NOKPROBE(entry) +#endif + #endif /* __ASM_ASSEMBLER_H__ */ diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index df73914e81c8..746e7801dcdf 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -38,6 +38,7 @@ ENTRY(__get_user_1) mov r0, #0 ret lr ENDPROC(__get_user_1) +_ASM_NOKPROBE(__get_user_1) ENTRY(__get_user_2) check_uaccess r0, 2, r1, r2, __get_user_bad @@ -58,6 +59,7 @@ rb .req r0 mov r0, #0 ret lr ENDPROC(__get_user_2) +_ASM_NOKPROBE(__get_user_2) ENTRY(__get_user_4) check_uaccess r0, 4, r1, r2, __get_user_bad @@ -65,6 +67,7 @@ ENTRY(__get_user_4) mov r0, #0 ret lr ENDPROC(__get_user_4) +_ASM_NOKPROBE(__get_user_4) ENTRY(__get_user_8) check_uaccess r0, 8, r1, r2, __get_user_bad8 @@ -78,6 +81,7 @@ ENTRY(__get_user_8) mov r0, #0 ret lr ENDPROC(__get_user_8) +_ASM_NOKPROBE(__get_user_8) #ifdef __ARMEB__ ENTRY(__get_user_32t_8) @@ -91,6 +95,7 @@ ENTRY(__get_user_32t_8) mov r0, #0 ret lr ENDPROC(__get_user_32t_8) +_ASM_NOKPROBE(__get_user_32t_8) ENTRY(__get_user_64t_1) check_uaccess r0, 1, r1, r2, __get_user_bad8 @@ -98,6 +103,7 @@ ENTRY(__get_user_64t_1) mov r0, #0 ret lr ENDPROC(__get_user_64t_1) +_ASM_NOKPROBE(__get_user_64t_1) ENTRY(__get_user_64t_2) check_uaccess r0, 2, r1, r2, __get_user_bad8 @@ -114,6 +120,7 @@ rb .req r0 mov r0, #0 ret lr ENDPROC(__get_user_64t_2) +_ASM_NOKPROBE(__get_user_64t_2) ENTRY(__get_user_64t_4) check_uaccess r0, 4, r1, r2, __get_user_bad8 @@ -121,6 +128,7 @@ ENTRY(__get_user_64t_4) mov r0, #0 ret lr ENDPROC(__get_user_64t_4) +_ASM_NOKPROBE(__get_user_64t_4) #endif __get_user_bad8: @@ -131,6 +139,8 @@ __get_user_bad: ret lr ENDPROC(__get_user_bad) ENDPROC(__get_user_bad8) +_ASM_NOKPROBE(__get_user_bad) +_ASM_NOKPROBE(__get_user_bad8) .pushsection __ex_table, "a" .long 1b, __get_user_bad From 92291247b6069bce1c9c695b0f3496be4303b9fc Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 11 May 2018 16:42:42 +0100 Subject: [PATCH 32/88] Btrfs: fix xattr loss after power failure commit 9a8fca62aacc1599fea8e813d01e1955513e4fad upstream. If a file has xattrs, we fsync it, to ensure we clear the flags BTRFS_INODE_NEEDS_FULL_SYNC and BTRFS_INODE_COPY_EVERYTHING from its inode, the current transaction commits and then we fsync it (without either of those bits being set in its inode), we end up not logging all its xattrs. This results in deleting all xattrs when replying the log after a power failure. Trivial reproducer $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ touch /mnt/foobar $ setfattr -n user.xa -v qwerty /mnt/foobar $ xfs_io -c "fsync" /mnt/foobar $ sync $ xfs_io -c "pwrite -S 0xab 0 64K" /mnt/foobar $ xfs_io -c "fsync" /mnt/foobar $ mount /dev/sdb /mnt $ getfattr --absolute-names --dump /mnt/foobar $ So fix this by making sure all xattrs are logged if we log a file's inode item and neither the flags BTRFS_INODE_NEEDS_FULL_SYNC nor BTRFS_INODE_COPY_EVERYTHING were set in the inode. Fixes: 36283bf777d9 ("Btrfs: fix fsync xattr loss in the fast fsync path") Cc: # 4.2+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 52401732cddc..c65350e5119c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4614,6 +4614,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 logged_isize = 0; bool need_log_inode_item = true; + bool xattrs_logged = false; path = btrfs_alloc_path(); if (!path) @@ -4918,6 +4919,7 @@ next_key: err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); if (err) goto out_unlock; + xattrs_logged = true; if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { btrfs_release_path(path); btrfs_release_path(dst_path); @@ -4930,6 +4932,11 @@ log_extents: btrfs_release_path(dst_path); if (need_log_inode_item) { err = log_inode_item(trans, log, dst_path, inode); + if (!err && !xattrs_logged) { + err = btrfs_log_all_xattrs(trans, root, inode, path, + dst_path); + btrfs_release_path(path); + } if (err) goto out_unlock; } From b2d748b3a8d4e936a7e6e5fc9f04e2f9696efcc5 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 17 May 2018 15:16:51 +0800 Subject: [PATCH 33/88] btrfs: fix crash when trying to resume balance without the resume flag commit 02ee654d3a04563c67bfe658a05384548b9bb105 upstream. We set the BTRFS_BALANCE_RESUME flag in the btrfs_recover_balance() only, which isn't called during the remount. So when resuming from the paused balance we hit the bug: kernel: kernel BUG at fs/btrfs/volumes.c:3890! :: kernel: balance_kthread+0x51/0x60 [btrfs] kernel: kthread+0x111/0x130 :: kernel: RIP: btrfs_balance+0x12e1/0x1570 [btrfs] RSP: ffffba7d0090bde8 Reproducer: On a mounted filesystem: btrfs balance start --full-balance /btrfs btrfs balance pause /btrfs mount -o remount,ro /dev/sdb /btrfs mount -o remount,rw /dev/sdb /btrfs To fix this set the BTRFS_BALANCE_RESUME flag in btrfs_resume_balance_async(). CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4730ba2cc049..c2495cde26f6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3966,6 +3966,15 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) return 0; } + /* + * A ro->rw remount sequence should continue with the paused balance + * regardless of who pauses it, system or the user as of now, so set + * the resume flag. + */ + spin_lock(&fs_info->balance_lock); + fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME; + spin_unlock(&fs_info->balance_lock); + tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); return PTR_ERR_OR_ZERO(tsk); } From 944e0fc51a89c9827b98813d65dc083274777c7f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 20 May 2018 20:51:10 +0100 Subject: [PATCH 34/88] x86/amd: don't set X86_BUG_SYSRET_SS_ATTRS when running under Xen commit def9331a12977770cc6132d79f8e6565871e8e38 upstream When running as Xen pv guest X86_BUG_SYSRET_SS_ATTRS must not be set on AMD cpus. This bug/feature bit is kind of special as it will be used very early when switching threads. Setting the bit and clearing it a little bit later leaves a critical window where things can go wrong. This time window has enlarged a little bit by using setup_clear_cpu_cap() instead of the hypervisor's set_cpu_features callback. It seems this larger window now makes it rather easy to hit the problem. The proper solution is to never set the bit in case of Xen. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Acked-by: Thomas Gleixner Signed-off-by: Juergen Gross Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 5 +++-- arch/x86/xen/enlighten.c | 4 +--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c375bc672f82..747f8a20b629 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -824,8 +824,9 @@ static void init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); - /* AMD CPUs don't reset SS attributes on SYSRET */ - set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ + if (!cpu_has(c, X86_FEATURE_XENPV)) + set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2bea87cc0ff2..081437b5f381 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1977,10 +1977,8 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); static void xen_set_cpu_features(struct cpuinfo_x86 *c) { - if (xen_pv_domain()) { - clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + if (xen_pv_domain()) set_cpu_cap(c, X86_FEATURE_XENPV); - } } static void xen_pin_vcpu(int cpu) From 298d5db170f7d8430498417fa96e7472b620dcea Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 16 May 2018 01:37:36 +0800 Subject: [PATCH 35/88] btrfs: fix reading stale metadata blocks after degraded raid1 mounts commit 02a3307aa9c20b4f6626255b028f07f6cfa16feb upstream. If a btree block, aka. extent buffer, is not available in the extent buffer cache, it'll be read out from the disk instead, i.e. btrfs_search_slot() read_block_for_search() # hold parent and its lock, go to read child btrfs_release_path() read_tree_block() # read child Unfortunately, the parent lock got released before reading child, so commit 5bdd3536cbbe ("Btrfs: Fix block generation verification race") had used 0 as parent transid to read the child block. It forces read_tree_block() not to check if parent transid is different with the generation id of the child that it reads out from disk. A simple PoC is included in btrfs/124, 0. A two-disk raid1 btrfs, 1. Right after mkfs.btrfs, block A is allocated to be device tree's root. 2. Mount this filesystem and put it in use, after a while, device tree's root got COW but block A hasn't been allocated/overwritten yet. 3. Umount it and reload the btrfs module to remove both disks from the global @fs_devices list. 4. mount -odegraded dev1 and write some data, so now block A is allocated to be a leaf in checksum tree. Note that only dev1 has the latest metadata of this filesystem. 5. Umount it and mount it again normally (with both disks), since raid1 can pick up one disk by the writer task's pid, if btrfs_search_slot() needs to read block A, dev2 which does NOT have the latest metadata might be read for block A, then we got a stale block A. 6. As parent transid is not checked, block A is marked as uptodate and put into the extent buffer cache, so the future search won't bother to read disk again, which means it'll make changes on this stale one and make it dirty and flush it onto disk. To avoid the problem, parent transid needs to be passed to read_tree_block(). In order to get a valid parent transid, we need to hold the parent's lock until finishing reading child. This patch needs to be slightly adapted for stable kernels, the &first_key parameter added to read_tree_block() is from 4.16+ (581c1760415c4). The fix is to replace 0 by 'gen'. Fixes: 5bdd3536cbbe ("Btrfs: Fix block generation verification race") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Liu Bo Reviewed-by: Filipe Manana Reviewed-by: Qu Wenruo [ update changelog ] Signed-off-by: David Sterba Signed-off-by: Nikolay Borisov Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f6ba165d3f81..409b12392474 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2486,10 +2486,8 @@ read_block_for_search(struct btrfs_trans_handle *trans, if (p->reada != READA_NONE) reada_for_search(root, p, level, slot, key->objectid); - btrfs_release_path(p); - ret = -EAGAIN; - tmp = read_tree_block(root, blocknr, 0); + tmp = read_tree_block(root, blocknr, gen); if (!IS_ERR(tmp)) { /* * If the read above didn't mark this buffer up to date, @@ -2503,6 +2501,8 @@ read_block_for_search(struct btrfs_trans_handle *trans, } else { ret = PTR_ERR(tmp); } + + btrfs_release_path(p); return ret; } From 741c026d1a0c594f7ad509f44488ef29582fed74 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 1 May 2018 15:55:51 +0200 Subject: [PATCH 36/88] x86/nospec: Simplify alternative_msr_write() commit 1aa7a5735a41418d8e01fa7c9565eb2657e2ea3f upstream The macro is not type safe and I did look for why that "g" constraint for the asm doesn't work: it's because the asm is more fundamentally wrong. It does movl %[val], %%eax but "val" isn't a 32-bit value, so then gcc will pass it in a register, and generate code like movl %rsi, %eax and gas will complain about a nonsensical 'mov' instruction (it's moving a 64-bit register to a 32-bit one). Passing it through memory will just hide the real bug - gcc still thinks the memory location is 64-bit, but the "movl" will only load the first 32 bits and it all happens to work because x86 is little-endian. Convert it to a type safe inline function with a little trick which hands the feature into the ALTERNATIVE macro. Signed-off-by: Linus Torvalds Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index f928ad9b143f..870acfc2fd34 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -241,15 +241,16 @@ static inline void vmexit_fill_RSB(void) #endif } -#define alternative_msr_write(_msr, _val, _feature) \ - asm volatile(ALTERNATIVE("", \ - "movl %[msr], %%ecx\n\t" \ - "movl %[val], %%eax\n\t" \ - "movl $0, %%edx\n\t" \ - "wrmsr", \ - _feature) \ - : : [msr] "i" (_msr), [val] "i" (_val) \ - : "eax", "ecx", "edx", "memory") +static __always_inline +void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) +{ + asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) + : : "c" (msr), + "a" (val), + "d" (val >> 32), + [feature] "i" (feature) + : "memory"); +} static inline void indirect_branch_prediction_barrier(void) { From 88659d5fd9bea7f6afb227c6d404de750b368b45 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:16 -0400 Subject: [PATCH 37/88] x86/bugs: Concentrate bug detection into a separate function commit 4a28bfe3267b68e22c663ac26185aa16c9b879ef upstream Combine the various logic which goes through all those x86_cpu_id matching structures in one function. Suggested-by: Borislav Petkov Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 301bbd1f2373..357c589c7ce6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -879,21 +879,27 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { {} }; -static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) +static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; + if (x86_match_cpu(cpu_no_speculation)) + return; + + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + if (x86_match_cpu(cpu_no_meltdown)) - return false; + return; if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); /* Rogue Data Cache Load? No! */ if (ia32_cap & ARCH_CAP_RDCL_NO) - return false; + return; - return true; + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); } /* @@ -942,12 +948,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); - if (!x86_match_cpu(cpu_no_speculation)) { - if (cpu_vulnerable_to_meltdown(c)) - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - } + cpu_set_bug_bits(c); fpu__init_system(c); From 3effee64a9993dc5587fb39f0da4455769e53d26 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:17 -0400 Subject: [PATCH 38/88] x86/bugs: Concentrate bug reporting into a separate function commit d1059518b4789cabe34bb4b714d07e6089c82ca1 upstream Those SysFS functions have a similar preamble, as such make common code to handle them. Suggested-by: Borislav Petkov Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 48 ++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b8b0b6e78371..4d9c5fed56b2 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -313,30 +313,48 @@ retpoline_auto: #undef pr_fmt #ifdef CONFIG_SYSFS + +ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, + char *buf, unsigned int bug) +{ + if (!boot_cpu_has_bug(bug)) + return sprintf(buf, "Not affected\n"); + + switch (bug) { + case X86_BUG_CPU_MELTDOWN: + if (boot_cpu_has(X86_FEATURE_KAISER)) + return sprintf(buf, "Mitigation: PTI\n"); + + break; + + case X86_BUG_SPECTRE_V1: + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + + case X86_BUG_SPECTRE_V2: + return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + spectre_v2_module_string()); + + default: + break; + } + + return sprintf(buf, "Vulnerable\n"); +} + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { - if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) - return sprintf(buf, "Not affected\n"); - if (boot_cpu_has(X86_FEATURE_KAISER)) - return sprintf(buf, "Mitigation: PTI\n"); - return sprintf(buf, "Vulnerable\n"); + return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN); } ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) - return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1); } ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - return sprintf(buf, "Not affected\n"); - - return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - spectre_v2_module_string()); + return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2); } #endif From 0f5dd651397b264903e8becc511af6cf384c273e Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:18 -0400 Subject: [PATCH 39/88] x86/bugs: Read SPEC_CTRL MSR during boot and re-use reserved bits commit 1b86883ccb8d5d9506529d42dbe1a5257cb30b18 upstream The 336996-Speculative-Execution-Side-Channel-Mitigations.pdf refers to all the other bits as reserved. The Intel SDM glossary defines reserved as implementation specific - aka unknown. As such at bootup this must be taken it into account and proper masking for the bits in use applied. A copy of this document is available at https://bugzilla.kernel.org/show_bug.cgi?id=199511 [ tglx: Made x86_spec_ctrl_base __ro_after_init ] Suggested-by: Jon Masters Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 24 ++++++++++++++++++++---- arch/x86/kernel/cpu/bugs.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 870acfc2fd34..9ec3d4d448cd 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -217,6 +217,17 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; +/* + * The Intel specification for the SPEC_CTRL MSR requires that we + * preserve any already set reserved bits at boot time (e.g. for + * future additions that this kernel is not currently aware of). + * We then set any additional mitigation bits that we want + * ourselves and always use this as the base for SPEC_CTRL. + * We also use this when handling guest entry/exit as below. + */ +extern void x86_spec_ctrl_set(u64); +extern u64 x86_spec_ctrl_get_default(void); + extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; @@ -254,8 +265,9 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) static inline void indirect_branch_prediction_barrier(void) { - alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, - X86_FEATURE_USE_IBPB); + u64 val = PRED_CMD_IBPB; + + alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); } /* @@ -266,14 +278,18 @@ static inline void indirect_branch_prediction_barrier(void) */ #define firmware_restrict_branch_speculation_start() \ do { \ + u64 val = x86_spec_ctrl_get_default() | SPEC_CTRL_IBRS; \ + \ preempt_disable(); \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ do { \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ + u64 val = x86_spec_ctrl_get_default(); \ + \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ preempt_enable(); \ } while (0) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4d9c5fed56b2..6ff972aa7307 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -27,6 +27,12 @@ static void __init spectre_v2_select_mitigation(void); +/* + * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any + * writes to SPEC_CTRL contain whatever reserved bits have been set. + */ +static u64 __ro_after_init x86_spec_ctrl_base; + void __init check_bugs(void) { identify_boot_cpu(); @@ -36,6 +42,13 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); } + /* + * Read the SPEC_CTRL MSR to account for reserved bits which may + * have unknown values. + */ + if (boot_cpu_has(X86_FEATURE_IBRS)) + rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + /* Select the proper spectre mitigation before patching alternatives */ spectre_v2_select_mitigation(); @@ -94,6 +107,21 @@ static const char *spectre_v2_strings[] = { static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; +void x86_spec_ctrl_set(u64 val) +{ + if (val & ~SPEC_CTRL_IBRS) + WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); + else + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); +} +EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); + +u64 x86_spec_ctrl_get_default(void) +{ + return x86_spec_ctrl_base; +} +EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); + #ifdef RETPOLINE static bool spectre_v2_bad_module; From cf21f58ae6f264e0a10d9736be97342627cf9837 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:19 -0400 Subject: [PATCH 40/88] x86/bugs, KVM: Support the combination of guest and host IBRS commit 5cf687548705412da47c9cec342fd952d71ed3d5 upstream A guest may modify the SPEC_CTRL MSR from the value used by the kernel. Since the kernel doesn't use IBRS, this means a value of zero is what is needed in the host. But the 336996-Speculative-Execution-Side-Channel-Mitigations.pdf refers to the other bits as reserved so the kernel should respect the boot time SPEC_CTRL value and use that. This allows to deal with future extensions to the SPEC_CTRL interface if any at all. Note: This uses wrmsrl() instead of native_wrmsl(). I does not make any difference as paravirt will over-write the callq *0xfff.. with the wrmsrl assembler code. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 10 ++++++++++ arch/x86/kernel/cpu/bugs.c | 18 ++++++++++++++++++ arch/x86/kvm/svm.c | 6 ++---- arch/x86/kvm/vmx.c | 6 ++---- 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 9ec3d4d448cd..d1c2630922da 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -228,6 +228,16 @@ enum spectre_v2_mitigation { extern void x86_spec_ctrl_set(u64); extern u64 x86_spec_ctrl_get_default(void); +/* + * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR + * the guest has, while on VMEXIT we restore the host view. This + * would be easier if SPEC_CTRL were architecturally maskable or + * shadowable for guests but this is not (currently) the case. + * Takes the guest view of SPEC_CTRL MSR as a parameter. + */ +extern void x86_spec_ctrl_set_guest(u64); +extern void x86_spec_ctrl_restore_host(u64); + extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6ff972aa7307..f5cad2f9af44 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -122,6 +122,24 @@ u64 x86_spec_ctrl_get_default(void) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); +void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) +{ + if (!boot_cpu_has(X86_FEATURE_IBRS)) + return; + if (x86_spec_ctrl_base != guest_spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); +} +EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); + +void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) +{ + if (!boot_cpu_has(X86_FEATURE_IBRS)) + return; + if (x86_spec_ctrl_base != guest_spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); +} +EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); + #ifdef RETPOLINE static bool spectre_v2_bad_module; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index aaa93b4b0380..eeb8cd3193ea 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4917,8 +4917,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - if (svm->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + x86_spec_ctrl_set_guest(svm->spec_ctrl); asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5030,8 +5029,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - if (svm->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); + x86_spec_ctrl_restore_host(svm->spec_ctrl); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b978aeccda78..266db0b2e138 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8916,8 +8916,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - if (vmx->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + x86_spec_ctrl_set_guest(vmx->spec_ctrl); vmx->__launched = vmx->loaded_vmcs->launched; asm( @@ -9055,8 +9054,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - if (vmx->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); + x86_spec_ctrl_restore_host(vmx->spec_ctrl); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); From 24e4dd97af40afa4d45e85a32d9c2cc81425a62e Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:20 -0400 Subject: [PATCH 41/88] x86/bugs: Expose /sys/../spec_store_bypass commit c456442cd3a59eeb1d60293c26cbe2ff2c4e42cf upstream Add the sysfs file for the new vulerability. It does not do much except show the words 'Vulnerable' for recent x86 cores. Intel cores prior to family 6 are known not to be vulnerable, and so are some Atoms and some Xeon Phi. It assumes that older Cyrix, Centaur, etc. cores are immune. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-system-cpu | 1 + arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 5 ++++ arch/x86/kernel/cpu/common.c | 23 +++++++++++++++++++ drivers/base/cpu.c | 8 +++++++ include/linux/cpu.h | 2 ++ 6 files changed, 40 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index dfd56ec7a850..6d75a9c00e8a 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -355,6 +355,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/meltdown /sys/devices/system/cpu/vulnerabilities/spectre_v1 /sys/devices/system/cpu/vulnerabilities/spectre_v2 + /sys/devices/system/cpu/vulnerabilities/spec_store_bypass Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a2485311164b..a688adbc427c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -335,5 +335,6 @@ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f5cad2f9af44..64e17a962bb6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -403,4 +403,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c { return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2); } + +ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 357c589c7ce6..4f1050aedc26 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -879,10 +879,33 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { {} }; +static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, + { X86_VENDOR_CENTAUR, 5, }, + { X86_VENDOR_INTEL, 5, }, + { X86_VENDOR_NSC, 5, }, + { X86_VENDOR_ANY, 4, }, + {} +}; + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; + if (!x86_match_cpu(cpu_no_spec_store_bypass)) + setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); + if (x86_match_cpu(cpu_no_speculation)) return; diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 56b6c8508a89..cbb1cc6bbdb4 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -519,14 +519,22 @@ ssize_t __weak cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Not affected\n"); } +ssize_t __weak cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); +static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, &dev_attr_spectre_v1.attr, &dev_attr_spectre_v2.attr, + &dev_attr_spec_store_bypass.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 2f475ad89a0d..917829b27350 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -50,6 +50,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, From a80714172abca6413d2d6505be64723ae73a903b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 28 Apr 2018 22:34:17 +0200 Subject: [PATCH 42/88] x86/cpufeatures: Add X86_FEATURE_RDS commit 0cc5fa00b0a88dad140b4e5c2cead9951ad36822 upstream Add the CPU feature bit CPUID.7.0.EDX[31] which indicates whether the CPU supports Reduced Data Speculation. [ tglx: Split it out from a later patch ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a688adbc427c..0c05c6cf5a12 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -306,6 +306,7 @@ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_RDS (18*32+31) /* Reduced Data Speculation */ /* * BUG word(s) From 6f70a553666dd8c4fa370eaaa41380eec593229c Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:21 -0400 Subject: [PATCH 43/88] x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation commit 24f7fc83b9204d20f878c57cb77d261ae825e033 upstream Contemporary high performance processors use a common industry-wide optimization known as "Speculative Store Bypass" in which loads from addresses to which a recent store has occurred may (speculatively) see an older value. Intel refers to this feature as "Memory Disambiguation" which is part of their "Smart Memory Access" capability. Memory Disambiguation can expose a cache side-channel attack against such speculatively read values. An attacker can create exploit code that allows them to read memory outside of a sandbox environment (for example, malicious JavaScript in a web page), or to perform more complex attacks against code running within the same privilege level, e.g. via the stack. As a first step to mitigate against such attacks, provide two boot command line control knobs: nospec_store_bypass_disable spec_store_bypass_disable=[off,auto,on] By default affected x86 processors will power on with Speculative Store Bypass enabled. Hence the provided kernel parameters are written from the point of view of whether to enable a mitigation or not. The parameters are as follows: - auto - Kernel detects whether your CPU model contains an implementation of Speculative Store Bypass and picks the most appropriate mitigation. - on - disable Speculative Store Bypass - off - enable Speculative Store Bypass [ tglx: Reordered the checks so that the whole evaluation is not done when the CPU does not support RDS ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 33 +++++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/nospec-branch.h | 6 ++ arch/x86/kernel/cpu/bugs.c | 103 +++++++++++++++++++++++++++ 4 files changed, 143 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 5f9e51436a99..792ac917411a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2699,6 +2699,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. allow data leaks with this option, which is equivalent to spectre_v2=off. + nospec_store_bypass_disable + [HW] Disable all mitigations for the Speculative Store Bypass vulnerability + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. @@ -3973,6 +3976,36 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Not specifying this option is equivalent to spectre_v2=auto. + spec_store_bypass_disable= + [HW] Control Speculative Store Bypass (SSB) Disable mitigation + (Speculative Store Bypass vulnerability) + + Certain CPUs are vulnerable to an exploit against a + a common industry wide performance optimization known + as "Speculative Store Bypass" in which recent stores + to the same memory location may not be observed by + later loads during speculative execution. The idea + is that such stores are unlikely and that they can + be detected prior to instruction retirement at the + end of a particular speculation execution window. + + In vulnerable processors, the speculatively forwarded + store can be used in a cache side channel attack, for + example to read memory to which the attacker does not + directly have access (e.g. inside sandboxed code). + + This parameter controls whether the Speculative Store + Bypass optimization is used. + + on - Unconditionally disable Speculative Store Bypass + off - Unconditionally enable Speculative Store Bypass + auto - Kernel detects whether the CPU model contains an + implementation of Speculative Store Bypass and + picks the most appropriate mitigation + + Not specifying this option is equivalent to + spec_store_bypass_disable=auto. + spia_io_base= [HW,MTD] spia_fio_base= spia_pedr= diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0c05c6cf5a12..013f3defc6fa 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -204,6 +204,7 @@ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index d1c2630922da..7b9eacfc03f3 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -238,6 +238,12 @@ extern u64 x86_spec_ctrl_get_default(void); extern void x86_spec_ctrl_set_guest(u64); extern void x86_spec_ctrl_restore_host(u64); +/* The Speculative Store Bypass disable variants */ +enum ssb_mitigation { + SPEC_STORE_BYPASS_NONE, + SPEC_STORE_BYPASS_DISABLE, +}; + extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 64e17a962bb6..75146d99b3e1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -26,6 +26,7 @@ #include static void __init spectre_v2_select_mitigation(void); +static void __init ssb_select_mitigation(void); /* * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any @@ -52,6 +53,12 @@ void __init check_bugs(void) /* Select the proper spectre mitigation before patching alternatives */ spectre_v2_select_mitigation(); + /* + * Select proper mitigation for any exposure to the Speculative Store + * Bypass vulnerability. + */ + ssb_select_mitigation(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -356,6 +363,99 @@ retpoline_auto: } } +#undef pr_fmt +#define pr_fmt(fmt) "Speculative Store Bypass: " fmt + +static enum ssb_mitigation ssb_mode = SPEC_STORE_BYPASS_NONE; + +/* The kernel command line selection */ +enum ssb_mitigation_cmd { + SPEC_STORE_BYPASS_CMD_NONE, + SPEC_STORE_BYPASS_CMD_AUTO, + SPEC_STORE_BYPASS_CMD_ON, +}; + +static const char *ssb_strings[] = { + [SPEC_STORE_BYPASS_NONE] = "Vulnerable", + [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled" +}; + +static const struct { + const char *option; + enum ssb_mitigation_cmd cmd; +} ssb_mitigation_options[] = { + { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ + { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ + { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ +}; + +static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) +{ + enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO; + char arg[20]; + int ret, i; + + if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) { + return SPEC_STORE_BYPASS_CMD_NONE; + } else { + ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable", + arg, sizeof(arg)); + if (ret < 0) + return SPEC_STORE_BYPASS_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) { + if (!match_option(arg, ret, ssb_mitigation_options[i].option)) + continue; + + cmd = ssb_mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(ssb_mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", arg); + return SPEC_STORE_BYPASS_CMD_AUTO; + } + } + + return cmd; +} + +static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) +{ + enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; + enum ssb_mitigation_cmd cmd; + + if (!boot_cpu_has(X86_FEATURE_RDS)) + return mode; + + cmd = ssb_parse_cmdline(); + if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) && + (cmd == SPEC_STORE_BYPASS_CMD_NONE || + cmd == SPEC_STORE_BYPASS_CMD_AUTO)) + return mode; + + switch (cmd) { + case SPEC_STORE_BYPASS_CMD_AUTO: + case SPEC_STORE_BYPASS_CMD_ON: + mode = SPEC_STORE_BYPASS_DISABLE; + break; + case SPEC_STORE_BYPASS_CMD_NONE: + break; + } + + if (mode != SPEC_STORE_BYPASS_NONE) + setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); + return mode; +} + +static void ssb_select_mitigation() +{ + ssb_mode = __ssb_select_mitigation(); + + if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + pr_info("%s\n", ssb_strings[ssb_mode]); +} + #undef pr_fmt #ifdef CONFIG_SYSFS @@ -382,6 +482,9 @@ ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", spectre_v2_module_string()); + case X86_BUG_SPEC_STORE_BYPASS: + return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); + default: break; } From 19e3a2bec95e966921689ae39117f9dbbaffd99b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:22 -0400 Subject: [PATCH 44/88] x86/bugs/intel: Set proper CPU features and setup RDS commit 772439717dbf703b39990be58d8d4e3e4ad0598a upstream Intel CPUs expose methods to: - Detect whether RDS capability is available via CPUID.7.0.EDX[31], - The SPEC_CTRL MSR(0x48), bit 2 set to enable RDS. - MSR_IA32_ARCH_CAPABILITIES, Bit(4) no need to enable RRS. With that in mind if spec_store_bypass_disable=[auto,on] is selected set at boot-time the SPEC_CTRL MSR to enable RDS if the platform requires it. Note that this does not fix the KVM case where the SPEC_CTRL is exposed to guests which can muck with it, see patch titled : KVM/SVM/VMX/x86/spectre_v2: Support the combination of guest and host IBRS. And for the firmware (IBRS to be set), see patch titled: x86/spectre_v2: Read SPEC_CTRL MSR during boot and re-use reserved bits [ tglx: Distangled it from the intel implementation and kept the call order ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 6 ++++++ arch/x86/kernel/cpu/bugs.c | 30 ++++++++++++++++++++++++++++-- arch/x86/kernel/cpu/common.c | 10 ++++++---- arch/x86/kernel/cpu/cpu.h | 3 +++ arch/x86/kernel/cpu/intel.c | 1 + 5 files changed, 44 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index c768bc1550a1..87103e8f4785 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -40,6 +40,7 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ +#define SPEC_CTRL_RDS (1 << 2) /* Reduced Data Speculation */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ @@ -61,6 +62,11 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ +#define ARCH_CAP_RDS_NO (1 << 4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Reduced Data Speculation control + * required. + */ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 75146d99b3e1..7dd16f4ec3c4 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -116,7 +116,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; void x86_spec_ctrl_set(u64 val) { - if (val & ~SPEC_CTRL_IBRS) + if (val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_RDS)) WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); else wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); @@ -443,8 +443,28 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) break; } - if (mode != SPEC_STORE_BYPASS_NONE) + /* + * We have three CPU feature flags that are in play here: + * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. + * - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass + * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation + */ + if (mode != SPEC_STORE_BYPASS_NONE) { setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); + /* + * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses + * a completely different MSR and bit dependent on family. + */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + x86_spec_ctrl_base |= SPEC_CTRL_RDS; + x86_spec_ctrl_set(SPEC_CTRL_RDS); + break; + case X86_VENDOR_AMD: + break; + } + } + return mode; } @@ -458,6 +478,12 @@ static void ssb_select_mitigation() #undef pr_fmt +void x86_spec_ctrl_setup_ap(void) +{ + if (boot_cpu_has(X86_FEATURE_IBRS)) + x86_spec_ctrl_set(x86_spec_ctrl_base & (SPEC_CTRL_IBRS | SPEC_CTRL_RDS)); +} + #ifdef CONFIG_SYSFS ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4f1050aedc26..ab6b3adaa3d7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -903,7 +903,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; - if (!x86_match_cpu(cpu_no_spec_store_bypass)) + if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + + if (!x86_match_cpu(cpu_no_spec_store_bypass) && + !(ia32_cap & ARCH_CAP_RDS_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (x86_match_cpu(cpu_no_speculation)) @@ -915,9 +919,6 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (x86_match_cpu(cpu_no_meltdown)) return; - if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); - /* Rogue Data Cache Load? No! */ if (ia32_cap & ARCH_CAP_RDCL_NO) return; @@ -1339,6 +1340,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) #endif mtrr_ap_init(); validate_apic_and_package_id(c); + x86_spec_ctrl_setup_ap(); } struct msr_range { diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 2584265d4745..3b19d82f7932 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -46,4 +46,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); + +extern void x86_spec_ctrl_setup_ap(void); + #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 8fb1d6522f8e..f15aea653e8a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -154,6 +154,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_STIBP); setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); + setup_clear_cpu_cap(X86_FEATURE_RDS); } /* From 99b13116965f16b2e608e7796cd59198eee5bf06 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:23 -0400 Subject: [PATCH 45/88] x86/bugs: Whitelist allowed SPEC_CTRL MSR values commit 1115a859f33276fe8afb31c60cf9d8e657872558 upstream Intel and AMD SPEC_CTRL (0x48) MSR semantics may differ in the future (or in fact use different MSRs for the same functionality). As such a run-time mechanism is required to whitelist the appropriate MSR values. [ tglx: Made the variable __ro_after_init ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7dd16f4ec3c4..b92c4699f2ea 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -34,6 +34,12 @@ static void __init ssb_select_mitigation(void); */ static u64 __ro_after_init x86_spec_ctrl_base; +/* + * The vendor and possibly platform specific bits which can be modified in + * x86_spec_ctrl_base. + */ +static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; + void __init check_bugs(void) { identify_boot_cpu(); @@ -116,7 +122,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; void x86_spec_ctrl_set(u64 val) { - if (val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_RDS)) + if (val & x86_spec_ctrl_mask) WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); else wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); @@ -458,6 +464,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: x86_spec_ctrl_base |= SPEC_CTRL_RDS; + x86_spec_ctrl_mask &= ~SPEC_CTRL_RDS; x86_spec_ctrl_set(SPEC_CTRL_RDS); break; case X86_VENDOR_AMD: @@ -481,7 +488,7 @@ static void ssb_select_mitigation() void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_IBRS)) - x86_spec_ctrl_set(x86_spec_ctrl_base & (SPEC_CTRL_IBRS | SPEC_CTRL_RDS)); + x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); } #ifdef CONFIG_SYSFS From f854434b37bbf8953900226acd6139081f60d3da Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 20 May 2018 20:52:05 +0100 Subject: [PATCH 46/88] x86/bugs/AMD: Add support to disable RDS on Fam[15,16,17]h if requested commit 764f3c21588a059cd783c6ba0734d4db2d72822d upstream AMD does not need the Speculative Store Bypass mitigation to be enabled. The parameters for this are already available and can be done via MSR C001_1020. Each family uses a different bit in that MSR for this. [ tglx: Expose the bit mask via a variable and move the actual MSR fiddling into the bugs code as that's the right thing to do and also required to prepare for dynamic enable/disable ] Suggested-by: Borislav Petkov Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/nospec-branch.h | 4 ++++ arch/x86/kernel/cpu/amd.c | 26 ++++++++++++++++++++++++++ arch/x86/kernel/cpu/bugs.c | 27 ++++++++++++++++++++++++++- arch/x86/kernel/cpu/common.c | 4 ++++ 5 files changed, 61 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 013f3defc6fa..87970694cac1 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -205,6 +205,7 @@ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ +#define X86_FEATURE_AMD_RDS (7*32+24) /* "" AMD RDS implementation */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 7b9eacfc03f3..3a1541cf32de 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -244,6 +244,10 @@ enum ssb_mitigation { SPEC_STORE_BYPASS_DISABLE, }; +/* AMD specific Speculative Store Bypass MSR data */ +extern u64 x86_amd_ls_cfg_base; +extern u64 x86_amd_ls_cfg_rds_mask; + extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 747f8a20b629..7551d9ad243a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -542,6 +543,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) rdmsrl(MSR_FAM10H_NODE_ID, value); nodes_per_socket = ((value >> 3) & 7) + 1; } + + if (c->x86 >= 0x15 && c->x86 <= 0x17) { + unsigned int bit; + + switch (c->x86) { + case 0x15: bit = 54; break; + case 0x16: bit = 33; break; + case 0x17: bit = 10; break; + default: return; + } + /* + * Try to cache the base value so further operations can + * avoid RMW. If that faults, do not enable RDS. + */ + if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { + setup_force_cpu_cap(X86_FEATURE_RDS); + setup_force_cpu_cap(X86_FEATURE_AMD_RDS); + x86_amd_ls_cfg_rds_mask = 1ULL << bit; + } + } } static void early_init_amd(struct cpuinfo_x86 *c) @@ -827,6 +848,11 @@ static void init_amd(struct cpuinfo_x86 *c) /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ if (!cpu_has(c, X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + + if (boot_cpu_has(X86_FEATURE_AMD_RDS)) { + set_cpu_cap(c, X86_FEATURE_RDS); + set_cpu_cap(c, X86_FEATURE_AMD_RDS); + } } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b92c4699f2ea..b3696cc48fd6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -40,6 +40,13 @@ static u64 __ro_after_init x86_spec_ctrl_base; */ static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; +/* + * AMD specific MSR info for Speculative Store Bypass control. + * x86_amd_ls_cfg_rds_mask is initialized in identify_boot_cpu(). + */ +u64 __ro_after_init x86_amd_ls_cfg_base; +u64 __ro_after_init x86_amd_ls_cfg_rds_mask; + void __init check_bugs(void) { identify_boot_cpu(); @@ -51,7 +58,8 @@ void __init check_bugs(void) /* * Read the SPEC_CTRL MSR to account for reserved bits which may - * have unknown values. + * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD + * init code as it is not enumerated and depends on the family. */ if (boot_cpu_has(X86_FEATURE_IBRS)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); @@ -153,6 +161,14 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); +static void x86_amd_rds_enable(void) +{ + u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_rds_mask; + + if (boot_cpu_has(X86_FEATURE_AMD_RDS)) + wrmsrl(MSR_AMD64_LS_CFG, msrval); +} + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -442,6 +458,11 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) switch (cmd) { case SPEC_STORE_BYPASS_CMD_AUTO: + /* + * AMD platforms by default don't need SSB mitigation. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + break; case SPEC_STORE_BYPASS_CMD_ON: mode = SPEC_STORE_BYPASS_DISABLE; break; @@ -468,6 +489,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) x86_spec_ctrl_set(SPEC_CTRL_RDS); break; case X86_VENDOR_AMD: + x86_amd_rds_enable(); break; } } @@ -489,6 +511,9 @@ void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_IBRS)) x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); + + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) + x86_amd_rds_enable(); } #ifdef CONFIG_SYSFS diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ab6b3adaa3d7..beb1da89ea7d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -895,6 +895,10 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { { X86_VENDOR_CENTAUR, 5, }, { X86_VENDOR_INTEL, 5, }, { X86_VENDOR_NSC, 5, }, + { X86_VENDOR_AMD, 0x12, }, + { X86_VENDOR_AMD, 0x11, }, + { X86_VENDOR_AMD, 0x10, }, + { X86_VENDOR_AMD, 0xf, }, { X86_VENDOR_ANY, 4, }, {} }; From 99318eca2c7ab3250b9614043b9ac6077ff2cb46 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:25 -0400 Subject: [PATCH 47/88] x86/KVM/VMX: Expose SPEC_CTRL Bit(2) to the guest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit da39556f66f5cfe8f9c989206974f1cb16ca5d7c upstream Expose the CPUID.7.EDX[31] bit to the guest, and also guard against various combinations of SPEC_CTRL MSR values. The handling of the MSR (to take into account the host value of SPEC_CTRL Bit(2)) is taken care of in patch: KVM/SVM/VMX/x86/spectre_v2: Support the combination of guest and host IBRS Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar [dwmw2: Handle 4.9 guest CPUID differences, rename guest_cpu_has_ibrs() → guest_cpu_has_spec_ctrl()] Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/cpuid.h | 4 ++-- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx.c | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 93f924de06cf..a9409f06ddc9 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - F(SPEC_CTRL) | F(ARCH_CAPABILITIES); + F(SPEC_CTRL) | F(RDS) | F(ARCH_CAPABILITIES); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index d1beb7156704..24187d07c2cf 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -171,7 +171,7 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); } -static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu) +static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; @@ -179,7 +179,7 @@ static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu) if (best && (best->ebx & bit(X86_FEATURE_IBRS))) return true; best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); + return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_RDS))); } static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index eeb8cd3193ea..57b886bf5ac5 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3545,7 +3545,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has_ibrs(vcpu)) + !guest_cpuid_has_spec_ctrl(vcpu)) return 1; msr_info->data = svm->spec_ctrl; @@ -3643,7 +3643,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && - !guest_cpuid_has_ibrs(vcpu)) + !guest_cpuid_has_spec_ctrl(vcpu)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 266db0b2e138..67ed4e9f9e5b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3020,7 +3020,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has_ibrs(vcpu)) + !guest_cpuid_has_spec_ctrl(vcpu)) return 1; msr_info->data = to_vmx(vcpu)->spec_ctrl; @@ -3137,11 +3137,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has_ibrs(vcpu)) + !guest_cpuid_has_spec_ctrl(vcpu)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_RDS)) return 1; vmx->spec_ctrl = data; From 7a2d2358ba9b6de29be0a98c8290479df32604b6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 29 Apr 2018 15:01:37 +0200 Subject: [PATCH 48/88] x86/speculation: Create spec-ctrl.h to avoid include hell commit 28a2775217b17208811fa43a9e96bd1fdf417b86 upstream Having everything in nospec-branch.h creates a hell of dependencies when adding the prctl based switching mechanism. Move everything which is not required in nospec-branch.h to spec-ctrl.h and fix up the includes in the relevant files. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 14 -------------- arch/x86/include/asm/spec-ctrl.h | 21 +++++++++++++++++++++ arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/vmx.c | 2 +- 6 files changed, 25 insertions(+), 18 deletions(-) create mode 100644 arch/x86/include/asm/spec-ctrl.h diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 3a1541cf32de..1119f14bc883 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -228,26 +228,12 @@ enum spectre_v2_mitigation { extern void x86_spec_ctrl_set(u64); extern u64 x86_spec_ctrl_get_default(void); -/* - * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR - * the guest has, while on VMEXIT we restore the host view. This - * would be easier if SPEC_CTRL were architecturally maskable or - * shadowable for guests but this is not (currently) the case. - * Takes the guest view of SPEC_CTRL MSR as a parameter. - */ -extern void x86_spec_ctrl_set_guest(u64); -extern void x86_spec_ctrl_restore_host(u64); - /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, SPEC_STORE_BYPASS_DISABLE, }; -/* AMD specific Speculative Store Bypass MSR data */ -extern u64 x86_amd_ls_cfg_base; -extern u64 x86_amd_ls_cfg_rds_mask; - extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h new file mode 100644 index 000000000000..3ad64420a06e --- /dev/null +++ b/arch/x86/include/asm/spec-ctrl.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SPECCTRL_H_ +#define _ASM_X86_SPECCTRL_H_ + +#include + +/* + * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR + * the guest has, while on VMEXIT we restore the host view. This + * would be easier if SPEC_CTRL were architecturally maskable or + * shadowable for guests but this is not (currently) the case. + * Takes the guest view of SPEC_CTRL MSR as a parameter. + */ +extern void x86_spec_ctrl_set_guest(u64); +extern void x86_spec_ctrl_restore_host(u64); + +/* AMD specific Speculative Store Bypass MSR data */ +extern u64 x86_amd_ls_cfg_base; +extern u64 x86_amd_ls_cfg_rds_mask; + +#endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7551d9ad243a..a176c81a74b2 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b3696cc48fd6..46d01fd4c3c8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 57b886bf5ac5..516ddfff8c8b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include #include "trace.h" diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 67ed4e9f9e5b..0eb3863d3159 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -50,7 +50,7 @@ #include #include #include -#include +#include #include "trace.h" #include "pmu.h" From 4812ffbbfcac35270b82292e84e8e7187088c8b8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 29 Apr 2018 15:20:11 +0200 Subject: [PATCH 49/88] prctl: Add speculation control prctls commit b617cfc858161140d69cc0b5cc211996b557a1c7 upstream Add two new prctls to control aspects of speculation related vulnerabilites and their mitigations to provide finer grained control over performance impacting mitigations. PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature which is selected with arg2 of prctl(2). The return value uses bit 0-2 with the following meaning: Bit Define Description 0 PR_SPEC_PRCTL Mitigation can be controlled per task by PR_SET_SPECULATION_CTRL 1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is disabled 2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is enabled If all bits are 0 the CPU is not affected by the speculation misfeature. If PR_SPEC_PRCTL is set, then the per task control of the mitigation is available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation misfeature will fail. PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which is selected by arg2 of prctl(2) per task. arg3 is used to hand in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE. The common return values are: EINVAL prctl is not implemented by the architecture or the unused prctl() arguments are not 0 ENODEV arg2 is selecting a not supported speculation misfeature PR_SET_SPECULATION_CTRL has these additional return values: ERANGE arg3 is incorrect, i.e. it's not either PR_SPEC_ENABLE or PR_SPEC_DISABLE ENXIO prctl control of the selected speculation misfeature is disabled The first supported controlable speculation misfeature is PR_SPEC_STORE_BYPASS. Add the define so this can be shared between architectures. Based on an initial patch from Tim Chen and mostly rewritten. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- Documentation/spec_ctrl.txt | 86 +++++++++++++++++++++++++++++++++++++ include/linux/nospec.h | 5 +++ include/uapi/linux/prctl.h | 11 +++++ kernel/sys.c | 22 ++++++++++ 4 files changed, 124 insertions(+) create mode 100644 Documentation/spec_ctrl.txt diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt new file mode 100644 index 000000000000..ddbebcd01208 --- /dev/null +++ b/Documentation/spec_ctrl.txt @@ -0,0 +1,86 @@ +=================== +Speculation Control +=================== + +Quite some CPUs have speculation related misfeatures which are in fact +vulnerabilites causing data leaks in various forms even accross privilege +domains. + +The kernel provides mitigation for such vulnerabilities in various +forms. Some of these mitigations are compile time configurable and some on +the kernel command line. + +There is also a class of mitigations which are very expensive, but they can +be restricted to a certain set of processes or tasks in controlled +environments. The mechanism to control these mitigations is via +:manpage:`prctl(2)`. + +There are two prctl options which are related to this: + + * PR_GET_SPECULATION_CTRL + + * PR_SET_SPECULATION_CTRL + +PR_GET_SPECULATION_CTRL +----------------------- + +PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature +which is selected with arg2 of prctl(2). The return value uses bits 0-2 with +the following meaning: + +==== ================ =================================================== +Bit Define Description +==== ================ =================================================== +0 PR_SPEC_PRCTL Mitigation can be controlled per task by + PR_SET_SPECULATION_CTRL +1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is + disabled +2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is + enabled +==== ================ =================================================== + +If all bits are 0 the CPU is not affected by the speculation misfeature. + +If PR_SPEC_PRCTL is set, then the per task control of the mitigation is +available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation +misfeature will fail. + +PR_SET_SPECULATION_CTRL +----------------------- +PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which +is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand +in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE. + +Common error codes +------------------ +======= ================================================================= +Value Meaning +======= ================================================================= +EINVAL The prctl is not implemented by the architecture or unused + prctl(2) arguments are not 0 + +ENODEV arg2 is selecting a not supported speculation misfeature +======= ================================================================= + +PR_SET_SPECULATION_CTRL error codes +----------------------------------- +======= ================================================================= +Value Meaning +======= ================================================================= +0 Success + +ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor + PR_SPEC_DISABLE + +ENXIO Control of the selected speculation misfeature is not possible. + See PR_GET_SPECULATION_CTRL. +======= ================================================================= + +Speculation misfeature controls +------------------------------- +- PR_SPEC_STORE_BYPASS: Speculative Store Bypass + + Invocations: + * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); diff --git a/include/linux/nospec.h b/include/linux/nospec.h index e791ebc65c9c..700bb8a4e4ea 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -55,4 +55,9 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, \ (typeof(_i)) (_i & _mask); \ }) + +/* Speculation control prctl */ +int arch_prctl_spec_ctrl_get(unsigned long which); +int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl); + #endif /* _LINUX_NOSPEC_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index a8d0759a9e40..3b316be71c56 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -197,4 +197,15 @@ struct prctl_mm_map { # define PR_CAP_AMBIENT_LOWER 3 # define PR_CAP_AMBIENT_CLEAR_ALL 4 +/* Per task speculation control */ +#define PR_GET_SPECULATION_CTRL 52 +#define PR_SET_SPECULATION_CTRL 53 +/* Speculation control variants */ +# define PR_SPEC_STORE_BYPASS 0 +/* Return and control values for PR_SET/GET_SPECULATION_CTRL */ +# define PR_SPEC_NOT_AFFECTED 0 +# define PR_SPEC_PRCTL (1UL << 0) +# define PR_SPEC_ENABLE (1UL << 1) +# define PR_SPEC_DISABLE (1UL << 2) + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 89d5be418157..312c985c4643 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -53,6 +53,8 @@ #include #include +#include + #include /* Move somewhere else to avoid recompiling? */ #include @@ -2072,6 +2074,16 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) } #endif +int __weak arch_prctl_spec_ctrl_get(unsigned long which) +{ + return -EINVAL; +} + +int __weak arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) +{ + return -EINVAL; +} + SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { @@ -2270,6 +2282,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_GET_FP_MODE: error = GET_FP_MODE(me); break; + case PR_GET_SPECULATION_CTRL: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_prctl_spec_ctrl_get(arg2); + break; + case PR_SET_SPECULATION_CTRL: + if (arg4 || arg5) + return -EINVAL; + error = arch_prctl_spec_ctrl_set(arg2, arg3); + break; default: error = -EINVAL; break; From fd01e82efa269b7e295533ec7b2d93aa8adf670a Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 14 Feb 2017 00:11:02 -0800 Subject: [PATCH 50/88] x86/process: Optimize TIF checks in __switch_to_xtra() commit af8b3cd3934ec60f4c2a420d19a9d416554f140b upstream Help the compiler to avoid reevaluating the thread flags for each checked bit by reordering the bit checks and providing an explicit xor for evaluation. With default defconfigs for each arch, x86_64: arch/x86/kernel/process.o text data bss dec hex 3056 8577 16 11649 2d81 Before 3024 8577 16 11617 2d61 After i386: arch/x86/kernel/process.o text data bss dec hex 2957 8673 8 11638 2d76 Before 2925 8673 8 11606 2d56 After Originally-by: Thomas Gleixner Signed-off-by: Kyle Huey Cc: Peter Zijlstra Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/20170214081104.9244-2-khuey@kylehuey.com Signed-off-by: Thomas Gleixner [dwmw2: backported to make TIF_RDS handling simpler. No deferred TR reload.] Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/process.c | 66 ++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index a55b32007785..0e1999e99470 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -192,48 +192,56 @@ int set_tsc_mode(unsigned int val) return 0; } -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) +static inline void switch_to_bitmap(struct tss_struct *tss, + struct thread_struct *prev, + struct thread_struct *next, + unsigned long tifp, unsigned long tifn) { - struct thread_struct *prev, *next; - - prev = &prev_p->thread; - next = &next_p->thread; - - if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ - test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) - debugctl |= DEBUGCTLMSR_BTF; - - update_debugctlmsr(debugctl); - } - - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ - test_tsk_thread_flag(next_p, TIF_NOTSC)) { - /* prev and next are different */ - if (test_tsk_thread_flag(next_p, TIF_NOTSC)) - hard_disable_TSC(); - else - hard_enable_TSC(); - } - - if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { + if (tifn & _TIF_IO_BITMAP) { /* * Copy the relevant range of the IO bitmap. * Normally this is 128 bytes or less: */ memcpy(tss->io_bitmap, next->io_bitmap_ptr, max(prev->io_bitmap_max, next->io_bitmap_max)); - } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { + } else if (tifp & _TIF_IO_BITMAP) { /* * Clear any possible leftover bits: */ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } +} + +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss) +{ + struct thread_struct *prev, *next; + unsigned long tifp, tifn; + + prev = &prev_p->thread; + next = &next_p->thread; + + tifn = READ_ONCE(task_thread_info(next_p)->flags); + tifp = READ_ONCE(task_thread_info(prev_p)->flags); + switch_to_bitmap(tss, prev, next, tifp, tifn); + propagate_user_return_notify(prev_p, next_p); + + if ((tifp ^ tifn) & _TIF_BLOCKSTEP) { + unsigned long debugctl = get_debugctlmsr(); + + debugctl &= ~DEBUGCTLMSR_BTF; + if (tifn & _TIF_BLOCKSTEP) + debugctl |= DEBUGCTLMSR_BTF; + update_debugctlmsr(debugctl); + } + + if ((tifp ^ tifn) & _TIF_NOTSC) { + if (tifn & _TIF_NOTSC) + hard_disable_TSC(); + else + hard_enable_TSC(); + } } /* From 439f2ef884306976f22b42f709c1ccdf04278987 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 14 Feb 2017 00:11:03 -0800 Subject: [PATCH 51/88] x86/process: Correct and optimize TIF_BLOCKSTEP switch commit b9894a2f5bd18b1691cb6872c9afe32b148d0132 upstream The debug control MSR is "highly magical" as the blockstep bit can be cleared by hardware under not well documented circumstances. So a task switch relying on the bit set by the previous task (according to the previous tasks thread flags) can trip over this and not update the flag for the next task. To fix this its required to handle DEBUGCTLMSR_BTF when either the previous or the next or both tasks have the TIF_BLOCKSTEP flag set. While at it avoid branching within the TIF_BLOCKSTEP case and evaluating boot_cpu_data twice in kernels without CONFIG_X86_DEBUGCTLMSR. x86_64: arch/x86/kernel/process.o text data bss dec hex 3024 8577 16 11617 2d61 Before 3008 8577 16 11601 2d51 After i386: No change [ tglx: Made the shift value explicit, use a local variable to make the code readable and massaged changelog] Originally-by: Thomas Gleixner Signed-off-by: Kyle Huey Cc: Peter Zijlstra Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/20170214081104.9244-3-khuey@kylehuey.com Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/process.c | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 87103e8f4785..076e86838245 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -141,6 +141,7 @@ /* DEBUGCTLMSR bits (others vary by model): */ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ #define DEBUGCTLMSR_TR (1UL << 6) #define DEBUGCTLMSR_BTS (1UL << 7) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0e1999e99470..496eef653d23 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -227,13 +227,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, propagate_user_return_notify(prev_p, next_p); - if ((tifp ^ tifn) & _TIF_BLOCKSTEP) { - unsigned long debugctl = get_debugctlmsr(); + if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) && + arch_has_block_step()) { + unsigned long debugctl, msk; + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); debugctl &= ~DEBUGCTLMSR_BTF; - if (tifn & _TIF_BLOCKSTEP) - debugctl |= DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); + msk = tifn & _TIF_BLOCKSTEP; + debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT; + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); } if ((tifp ^ tifn) & _TIF_NOTSC) { From 5ed7788df973455378e987fe221bef0661fbe03a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Feb 2017 00:11:04 -0800 Subject: [PATCH 52/88] x86/process: Optimize TIF_NOTSC switch commit 5a920155e388ec22a22e0532fb695b9215c9b34d upstream Provide and use a toggle helper instead of doing it with a branch. x86_64: arch/x86/kernel/process.o text data bss dec hex 3008 8577 16 11601 2d51 Before 2976 8577 16 11569 2d31 After i386: arch/x86/kernel/process.o text data bss dec hex 2925 8673 8 11606 2d56 Before 2893 8673 8 11574 2d36 After Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/20170214081104.9244-4-khuey@kylehuey.com Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/tlbflush.h | 10 ++++++++++ arch/x86/kernel/process.c | 22 ++++------------------ 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 99185a064978..686a58d793e5 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -111,6 +111,16 @@ static inline void cr4_clear_bits(unsigned long mask) } } +static inline void cr4_toggle_bits(unsigned long mask) +{ + unsigned long cr4; + + cr4 = this_cpu_read(cpu_tlbstate.cr4); + cr4 ^= mask; + this_cpu_write(cpu_tlbstate.cr4, cr4); + __write_cr4(cr4); +} + /* Read the CR4 shadow. */ static inline unsigned long cr4_read_shadow(void) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 496eef653d23..b7e3822238ad 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -134,11 +134,6 @@ void flush_thread(void) fpu__clear(&tsk->thread.fpu); } -static void hard_disable_TSC(void) -{ - cr4_set_bits(X86_CR4_TSD); -} - void disable_TSC(void) { preempt_disable(); @@ -147,15 +142,10 @@ void disable_TSC(void) * Must flip the CPU state synchronously with * TIF_NOTSC in the current running context. */ - hard_disable_TSC(); + cr4_set_bits(X86_CR4_TSD); preempt_enable(); } -static void hard_enable_TSC(void) -{ - cr4_clear_bits(X86_CR4_TSD); -} - static void enable_TSC(void) { preempt_disable(); @@ -164,7 +154,7 @@ static void enable_TSC(void) * Must flip the CPU state synchronously with * TIF_NOTSC in the current running context. */ - hard_enable_TSC(); + cr4_clear_bits(X86_CR4_TSD); preempt_enable(); } @@ -238,12 +228,8 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); } - if ((tifp ^ tifn) & _TIF_NOTSC) { - if (tifn & _TIF_NOTSC) - hard_disable_TSC(); - else - hard_enable_TSC(); - } + if ((tifp ^ tifn) & _TIF_NOTSC) + cr4_toggle_bits(X86_CR4_TSD); } /* From 89c6e9b599c573802de1b2fff6a9ccd99c3c4e57 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 29 Apr 2018 15:21:42 +0200 Subject: [PATCH 53/88] x86/process: Allow runtime control of Speculative Store Bypass commit 885f82bfbc6fefb6664ea27965c3ab9ac4194b8c upstream The Speculative Store Bypass vulnerability can be mitigated with the Reduced Data Speculation (RDS) feature. To allow finer grained control of this eventually expensive mitigation a per task mitigation control is required. Add a new TIF_RDS flag and put it into the group of TIF flags which are evaluated for mismatch in switch_to(). If these bits differ in the previous and the next task, then the slow path function __switch_to_xtra() is invoked. Implement the TIF_RDS dependent mitigation control in the slow path. If the prctl for controlling Speculative Store Bypass is disabled or no task uses the prctl then there is no overhead in the switch_to() fast path. Update the KVM related speculation control functions to take TID_RDS into account as well. Based on a patch from Tim Chen. Completely rewritten. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 3 ++- arch/x86/include/asm/spec-ctrl.h | 17 +++++++++++++++++ arch/x86/include/asm/thread_info.h | 6 ++++-- arch/x86/kernel/cpu/bugs.c | 26 +++++++++++++++++++++----- arch/x86/kernel/process.c | 22 ++++++++++++++++++++++ 5 files changed, 66 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 076e86838245..5dd28d0e9f0e 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -40,7 +40,8 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ -#define SPEC_CTRL_RDS (1 << 2) /* Reduced Data Speculation */ +#define SPEC_CTRL_RDS_SHIFT 2 /* Reduced Data Speculation bit */ +#define SPEC_CTRL_RDS (1 << SPEC_CTRL_RDS_SHIFT) /* Reduced Data Speculation */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 3ad64420a06e..45ef00ad5105 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_SPECCTRL_H_ #define _ASM_X86_SPECCTRL_H_ +#include #include /* @@ -18,4 +19,20 @@ extern void x86_spec_ctrl_restore_host(u64); extern u64 x86_amd_ls_cfg_base; extern u64 x86_amd_ls_cfg_rds_mask; +/* The Intel SPEC CTRL MSR base value cache */ +extern u64 x86_spec_ctrl_base; + +static inline u64 rds_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_RDS < SPEC_CTRL_RDS_SHIFT); + return (tifn & _TIF_RDS) >> (TIF_RDS - SPEC_CTRL_RDS_SHIFT); +} + +static inline u64 rds_tif_to_amd_ls_cfg(u64 tifn) +{ + return (tifn & _TIF_RDS) ? x86_amd_ls_cfg_rds_mask : 0ULL; +} + +extern void speculative_store_bypass_update(void); + #endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 89978b9c667a..661afac34502 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -83,6 +83,7 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +#define TIF_RDS 5 /* Reduced data speculation */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ @@ -104,8 +105,9 @@ struct thread_info { #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_RDS (1 << TIF_RDS) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -139,7 +141,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_RDS) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 46d01fd4c3c8..4f0957600076 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -32,7 +32,7 @@ static void __init ssb_select_mitigation(void); * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any * writes to SPEC_CTRL contain whatever reserved bits have been set. */ -static u64 __ro_after_init x86_spec_ctrl_base; +u64 __ro_after_init x86_spec_ctrl_base; /* * The vendor and possibly platform specific bits which can be modified in @@ -139,25 +139,41 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); u64 x86_spec_ctrl_get_default(void) { - return x86_spec_ctrl_base; + u64 msrval = x86_spec_ctrl_base; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + msrval |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + return msrval; } EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) { + u64 host = x86_spec_ctrl_base; + if (!boot_cpu_has(X86_FEATURE_IBRS)) return; - if (x86_spec_ctrl_base != guest_spec_ctrl) + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + + if (host != guest_spec_ctrl) wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); } EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) { + u64 host = x86_spec_ctrl_base; + if (!boot_cpu_has(X86_FEATURE_IBRS)) return; - if (x86_spec_ctrl_base != guest_spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + + if (host != guest_spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, host); } EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b7e3822238ad..9c48e18d4aeb 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -33,6 +33,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -202,6 +203,24 @@ static inline void switch_to_bitmap(struct tss_struct *tss, } } +static __always_inline void __speculative_store_bypass_update(unsigned long tifn) +{ + u64 msr; + + if (static_cpu_has(X86_FEATURE_AMD_RDS)) { + msr = x86_amd_ls_cfg_base | rds_tif_to_amd_ls_cfg(tifn); + wrmsrl(MSR_AMD64_LS_CFG, msr); + } else { + msr = x86_spec_ctrl_base | rds_tif_to_spec_ctrl(tifn); + wrmsrl(MSR_IA32_SPEC_CTRL, msr); + } +} + +void speculative_store_bypass_update(void) +{ + __speculative_store_bypass_update(current_thread_info()->flags); +} + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss) { @@ -230,6 +249,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOTSC) cr4_toggle_bits(X86_CR4_TSD); + + if ((tifp ^ tifn) & _TIF_RDS) + __speculative_store_bypass_update(tifn); } /* From a078e3e81964c31079627dd32c3ea714d5b1531e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 29 Apr 2018 15:26:40 +0200 Subject: [PATCH 54/88] x86/speculation: Add prctl for Speculative Store Bypass mitigation commit a73ec77ee17ec556fe7f165d00314cb7c047b1ac upstream Add prctl based control for Speculative Store Bypass mitigation and make it the default mitigation for Intel and AMD. Andi Kleen provided the following rationale (slightly redacted): There are multiple levels of impact of Speculative Store Bypass: 1) JITed sandbox. It cannot invoke system calls, but can do PRIME+PROBE and may have call interfaces to other code 2) Native code process. No protection inside the process at this level. 3) Kernel. 4) Between processes. The prctl tries to protect against case (1) doing attacks. If the untrusted code can do random system calls then control is already lost in a much worse way. So there needs to be system call protection in some way (using a JIT not allowing them or seccomp). Or rather if the process can subvert its environment somehow to do the prctl it can already execute arbitrary code, which is much worse than SSB. To put it differently, the point of the prctl is to not allow JITed code to read data it shouldn't read from its JITed sandbox. If it already has escaped its sandbox then it can already read everything it wants in its address space, and do much worse. The ability to control Speculative Store Bypass allows to enable the protection selectively without affecting overall system performance. Based on an initial patch from Tim Chen. Completely rewritten. Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 6 +- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 83 ++++++++++++++++++++++++---- 3 files changed, 79 insertions(+), 11 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 792ac917411a..543923b35260 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -4001,7 +4001,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. off - Unconditionally enable Speculative Store Bypass auto - Kernel detects whether the CPU model contains an implementation of Speculative Store Bypass and - picks the most appropriate mitigation + picks the most appropriate mitigation. + prctl - Control Speculative Store Bypass per thread + via prctl. Speculative Store Bypass is enabled + for a process by default. The state of the control + is inherited on fork. Not specifying this option is equivalent to spec_store_bypass_disable=auto. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 1119f14bc883..71ad01422655 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -232,6 +232,7 @@ extern u64 x86_spec_ctrl_get_default(void); enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, SPEC_STORE_BYPASS_DISABLE, + SPEC_STORE_BYPASS_PRCTL, }; extern char __indirect_thunk_start[]; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4f0957600076..b7d9adf22585 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include @@ -411,20 +413,23 @@ enum ssb_mitigation_cmd { SPEC_STORE_BYPASS_CMD_NONE, SPEC_STORE_BYPASS_CMD_AUTO, SPEC_STORE_BYPASS_CMD_ON, + SPEC_STORE_BYPASS_CMD_PRCTL, }; static const char *ssb_strings[] = { [SPEC_STORE_BYPASS_NONE] = "Vulnerable", - [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled" + [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", + [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl" }; static const struct { const char *option; enum ssb_mitigation_cmd cmd; } ssb_mitigation_options[] = { - { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ - { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ - { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ + { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ + { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ + { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ + { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ }; static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) @@ -474,14 +479,15 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) switch (cmd) { case SPEC_STORE_BYPASS_CMD_AUTO: - /* - * AMD platforms by default don't need SSB mitigation. - */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - break; + /* Choose prctl as the default mode */ + mode = SPEC_STORE_BYPASS_PRCTL; + break; case SPEC_STORE_BYPASS_CMD_ON: mode = SPEC_STORE_BYPASS_DISABLE; break; + case SPEC_STORE_BYPASS_CMD_PRCTL: + mode = SPEC_STORE_BYPASS_PRCTL; + break; case SPEC_STORE_BYPASS_CMD_NONE: break; } @@ -492,7 +498,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) * - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation */ - if (mode != SPEC_STORE_BYPASS_NONE) { + if (mode == SPEC_STORE_BYPASS_DISABLE) { setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); /* * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses @@ -523,6 +529,63 @@ static void ssb_select_mitigation() #undef pr_fmt +static int ssb_prctl_set(unsigned long ctrl) +{ + bool rds = !!test_tsk_thread_flag(current, TIF_RDS); + + if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) + return -ENXIO; + + if (ctrl == PR_SPEC_ENABLE) + clear_tsk_thread_flag(current, TIF_RDS); + else + set_tsk_thread_flag(current, TIF_RDS); + + if (rds != !!test_tsk_thread_flag(current, TIF_RDS)) + speculative_store_bypass_update(); + + return 0; +} + +static int ssb_prctl_get(void) +{ + switch (ssb_mode) { + case SPEC_STORE_BYPASS_DISABLE: + return PR_SPEC_DISABLE; + case SPEC_STORE_BYPASS_PRCTL: + if (test_tsk_thread_flag(current, TIF_RDS)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + default: + if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + return PR_SPEC_ENABLE; + return PR_SPEC_NOT_AFFECTED; + } +} + +int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) +{ + if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE) + return -ERANGE; + + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_set(ctrl); + default: + return -ENODEV; + } +} + +int arch_prctl_spec_ctrl_get(unsigned long which) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_get(); + default: + return -ENODEV; + } +} + void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_IBRS)) From 4272f528da381673a8e7845c93daa88b8aa4f4e9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 1 May 2018 15:19:04 -0700 Subject: [PATCH 55/88] nospec: Allow getting/setting on non-current task commit 7bbf1373e228840bb0295a2ca26d548ef37f448e upstream Adjust arch_prctl_get/set_spec_ctrl() to operate on tasks other than current. This is needed both for /proc/$pid/status queries and for seccomp (since thread-syncing can trigger seccomp in non-current threads). Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 27 ++++++++++++++++----------- include/linux/nospec.h | 7 +++++-- kernel/sys.c | 9 +++++---- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b7d9adf22585..3760931cd40a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -529,31 +529,35 @@ static void ssb_select_mitigation() #undef pr_fmt -static int ssb_prctl_set(unsigned long ctrl) +static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) { - bool rds = !!test_tsk_thread_flag(current, TIF_RDS); + bool rds = !!test_tsk_thread_flag(task, TIF_RDS); if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) return -ENXIO; if (ctrl == PR_SPEC_ENABLE) - clear_tsk_thread_flag(current, TIF_RDS); + clear_tsk_thread_flag(task, TIF_RDS); else - set_tsk_thread_flag(current, TIF_RDS); + set_tsk_thread_flag(task, TIF_RDS); - if (rds != !!test_tsk_thread_flag(current, TIF_RDS)) + /* + * If being set on non-current task, delay setting the CPU + * mitigation until it is next scheduled. + */ + if (task == current && rds != !!test_tsk_thread_flag(task, TIF_RDS)) speculative_store_bypass_update(); return 0; } -static int ssb_prctl_get(void) +static int ssb_prctl_get(struct task_struct *task) { switch (ssb_mode) { case SPEC_STORE_BYPASS_DISABLE: return PR_SPEC_DISABLE; case SPEC_STORE_BYPASS_PRCTL: - if (test_tsk_thread_flag(current, TIF_RDS)) + if (test_tsk_thread_flag(task, TIF_RDS)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; default: @@ -563,24 +567,25 @@ static int ssb_prctl_get(void) } } -int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) { if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE) return -ERANGE; switch (which) { case PR_SPEC_STORE_BYPASS: - return ssb_prctl_set(ctrl); + return ssb_prctl_set(task, ctrl); default: return -ENODEV; } } -int arch_prctl_spec_ctrl_get(unsigned long which) +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) { switch (which) { case PR_SPEC_STORE_BYPASS: - return ssb_prctl_get(); + return ssb_prctl_get(task); default: return -ENODEV; } diff --git a/include/linux/nospec.h b/include/linux/nospec.h index 700bb8a4e4ea..a908c954484d 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -7,6 +7,8 @@ #define _LINUX_NOSPEC_H #include +struct task_struct; + /** * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise * @index: array element index @@ -57,7 +59,8 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, }) /* Speculation control prctl */ -int arch_prctl_spec_ctrl_get(unsigned long which); -int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl); +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl); #endif /* _LINUX_NOSPEC_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 312c985c4643..143cd63f1d47 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2074,12 +2074,13 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) } #endif -int __weak arch_prctl_spec_ctrl_get(unsigned long which) +int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which) { return -EINVAL; } -int __weak arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl) +int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, + unsigned long ctrl) { return -EINVAL; } @@ -2285,12 +2286,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_GET_SPECULATION_CTRL: if (arg3 || arg4 || arg5) return -EINVAL; - error = arch_prctl_spec_ctrl_get(arg2); + error = arch_prctl_spec_ctrl_get(me, arg2); break; case PR_SET_SPECULATION_CTRL: if (arg4 || arg5) return -EINVAL; - error = arch_prctl_spec_ctrl_set(arg2, arg3); + error = arch_prctl_spec_ctrl_set(me, arg2, arg3); break; default: error = -EINVAL; From 51ef9af2a35bbc21334c801fd15cbfe01210760f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 1 May 2018 15:31:45 -0700 Subject: [PATCH 56/88] proc: Provide details on speculation flaw mitigations commit fae1fa0fc6cca8beee3ab8ed71d54f9a78fa3f64 upstream As done with seccomp and no_new_privs, also show speculation flaw mitigation state in /proc/$pid/status. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- fs/proc/array.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index 794b52a6c20d..64f3f2084f07 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -345,8 +346,29 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) { #ifdef CONFIG_SECCOMP seq_put_decimal_ull(m, "Seccomp:\t", p->seccomp.mode); - seq_putc(m, '\n'); #endif + seq_printf(m, "\nSpeculation Store Bypass:\t"); + switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) { + case -EINVAL: + seq_printf(m, "unknown"); + break; + case PR_SPEC_NOT_AFFECTED: + seq_printf(m, "not vulnerable"); + break; + case PR_SPEC_PRCTL | PR_SPEC_DISABLE: + seq_printf(m, "thread mitigated"); + break; + case PR_SPEC_PRCTL | PR_SPEC_ENABLE: + seq_printf(m, "thread vulnerable"); + break; + case PR_SPEC_DISABLE: + seq_printf(m, "globally mitigated"); + break; + default: + seq_printf(m, "vulnerable"); + break; + } + seq_putc(m, '\n'); } static inline void task_context_switch_counts(struct seq_file *m, From 0a112f104548667f5618477ff0f2a54ee626addd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 1 May 2018 15:07:31 -0700 Subject: [PATCH 57/88] seccomp: Enable speculation flaw mitigations commit 5c3070890d06ff82eecb808d02d2ca39169533ef upstream When speculation flaw mitigations are opt-in (via prctl), using seccomp will automatically opt-in to these protections, since using seccomp indicates at least some level of sandboxing is desired. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- kernel/seccomp.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index af182a6df25b..1d3078b45a70 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include #include @@ -214,6 +216,19 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) return true; } +/* + * If a given speculation mitigation is opt-in (prctl()-controlled), + * select it, by disabling speculation (enabling mitigation). + */ +static inline void spec_mitigate(struct task_struct *task, + unsigned long which) +{ + int state = arch_prctl_spec_ctrl_get(task, which); + + if (state > 0 && (state & PR_SPEC_PRCTL)) + arch_prctl_spec_ctrl_set(task, which, PR_SPEC_DISABLE); +} + static inline void seccomp_assign_mode(struct task_struct *task, unsigned long seccomp_mode) { @@ -225,6 +240,8 @@ static inline void seccomp_assign_mode(struct task_struct *task, * filter) is set. */ smp_mb__before_atomic(); + /* Assume seccomp processes want speculation flaw mitigation. */ + spec_mitigate(task, PR_SPEC_STORE_BYPASS); set_tsk_thread_flag(task, TIF_SECCOMP); } From ea055f7d43fb3a9d56e80d0116104555d6dde3f7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 3 May 2018 15:03:30 -0700 Subject: [PATCH 58/88] x86/bugs: Make boot modes __ro_after_init commit f9544b2b076ca90d887c5ae5d74fab4c21bb7c13 upstream There's no reason for these to be changed after boot. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3760931cd40a..65114d2edea8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -128,7 +128,8 @@ static const char *spectre_v2_strings[] = { #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt -static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; +static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + SPECTRE_V2_NONE; void x86_spec_ctrl_set(u64 val) { @@ -406,7 +407,7 @@ retpoline_auto: #undef pr_fmt #define pr_fmt(fmt) "Speculative Store Bypass: " fmt -static enum ssb_mitigation ssb_mode = SPEC_STORE_BYPASS_NONE; +static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE; /* The kernel command line selection */ enum ssb_mitigation_cmd { From 036608d62a838aeb63cae0adaf8ac773cb53148c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 3 May 2018 22:09:15 +0200 Subject: [PATCH 59/88] prctl: Add force disable speculation commit 356e4bfff2c5489e016fdb925adbf12a1e3950ee upstream For certain use cases it is desired to enforce mitigations so they cannot be undone afterwards. That's important for loader stubs which want to prevent a child from disabling the mitigation again. Will also be used for seccomp(). The extra state preserving of the prctl state for SSB is a preparatory step for EBPF dymanic speculation control. Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- Documentation/spec_ctrl.txt | 34 +++++++++++++++++++++------------- arch/x86/kernel/cpu/bugs.c | 35 +++++++++++++++++++++++++---------- fs/proc/array.c | 3 +++ include/linux/sched.h | 9 +++++++++ include/uapi/linux/prctl.h | 1 + 5 files changed, 59 insertions(+), 23 deletions(-) diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt index ddbebcd01208..1b3690d30943 100644 --- a/Documentation/spec_ctrl.txt +++ b/Documentation/spec_ctrl.txt @@ -25,19 +25,21 @@ PR_GET_SPECULATION_CTRL ----------------------- PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature -which is selected with arg2 of prctl(2). The return value uses bits 0-2 with +which is selected with arg2 of prctl(2). The return value uses bits 0-3 with the following meaning: -==== ================ =================================================== -Bit Define Description -==== ================ =================================================== -0 PR_SPEC_PRCTL Mitigation can be controlled per task by - PR_SET_SPECULATION_CTRL -1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is - disabled -2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is - enabled -==== ================ =================================================== +==== ===================== =================================================== +Bit Define Description +==== ===================== =================================================== +0 PR_SPEC_PRCTL Mitigation can be controlled per task by + PR_SET_SPECULATION_CTRL +1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is + disabled +2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is + enabled +3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A + subsequent prctl(..., PR_SPEC_ENABLE) will fail. +==== ===================== =================================================== If all bits are 0 the CPU is not affected by the speculation misfeature. @@ -47,9 +49,11 @@ misfeature will fail. PR_SET_SPECULATION_CTRL ----------------------- + PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand -in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE. +in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE or +PR_SPEC_FORCE_DISABLE. Common error codes ------------------ @@ -70,10 +74,13 @@ Value Meaning 0 Success ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor - PR_SPEC_DISABLE + PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE ENXIO Control of the selected speculation misfeature is not possible. See PR_GET_SPECULATION_CTRL. + +EPERM Speculation was disabled with PR_SPEC_FORCE_DISABLE and caller + tried to enable it again. ======= ================================================================= Speculation misfeature controls @@ -84,3 +91,4 @@ Speculation misfeature controls * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 65114d2edea8..fdbd8e54c549 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -532,21 +532,37 @@ static void ssb_select_mitigation() static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) { - bool rds = !!test_tsk_thread_flag(task, TIF_RDS); + bool update; if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) return -ENXIO; - if (ctrl == PR_SPEC_ENABLE) - clear_tsk_thread_flag(task, TIF_RDS); - else - set_tsk_thread_flag(task, TIF_RDS); + switch (ctrl) { + case PR_SPEC_ENABLE: + /* If speculation is force disabled, enable is not allowed */ + if (task_spec_ssb_force_disable(task)) + return -EPERM; + task_clear_spec_ssb_disable(task); + update = test_and_clear_tsk_thread_flag(task, TIF_RDS); + break; + case PR_SPEC_DISABLE: + task_set_spec_ssb_disable(task); + update = !test_and_set_tsk_thread_flag(task, TIF_RDS); + break; + case PR_SPEC_FORCE_DISABLE: + task_set_spec_ssb_disable(task); + task_set_spec_ssb_force_disable(task); + update = !test_and_set_tsk_thread_flag(task, TIF_RDS); + break; + default: + return -ERANGE; + } /* * If being set on non-current task, delay setting the CPU * mitigation until it is next scheduled. */ - if (task == current && rds != !!test_tsk_thread_flag(task, TIF_RDS)) + if (task == current && update) speculative_store_bypass_update(); return 0; @@ -558,7 +574,9 @@ static int ssb_prctl_get(struct task_struct *task) case SPEC_STORE_BYPASS_DISABLE: return PR_SPEC_DISABLE; case SPEC_STORE_BYPASS_PRCTL: - if (test_tsk_thread_flag(task, TIF_RDS)) + if (task_spec_ssb_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ssb_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; default: @@ -571,9 +589,6 @@ static int ssb_prctl_get(struct task_struct *task) int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, unsigned long ctrl) { - if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE) - return -ERANGE; - switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_set(task, ctrl); diff --git a/fs/proc/array.c b/fs/proc/array.c index 64f3f2084f07..3e37195485fc 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -355,6 +355,9 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) case PR_SPEC_NOT_AFFECTED: seq_printf(m, "not vulnerable"); break; + case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE: + seq_printf(m, "thread force mitigated"); + break; case PR_SPEC_PRCTL | PR_SPEC_DISABLE: seq_printf(m, "thread mitigated"); break; diff --git a/include/linux/sched.h b/include/linux/sched.h index c549c8c9245c..5ebef8c86c26 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2354,6 +2354,8 @@ static inline void memalloc_noio_restore(unsigned int flags) #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ #define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */ +#define PFA_SPEC_SSB_DISABLE 4 /* Speculative Store Bypass disabled */ +#define PFA_SPEC_SSB_FORCE_DISABLE 5 /* Speculative Store Bypass force disabled*/ #define TASK_PFA_TEST(name, func) \ @@ -2380,6 +2382,13 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) TASK_PFA_TEST(LMK_WAITING, lmk_waiting) TASK_PFA_SET(LMK_WAITING, lmk_waiting) +TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) +TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) +TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) + +TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) +TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) + /* * task->jobctl flags */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 3b316be71c56..64776b72e1eb 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -207,5 +207,6 @@ struct prctl_mm_map { # define PR_SPEC_PRCTL (1UL << 0) # define PR_SPEC_ENABLE (1UL << 1) # define PR_SPEC_DISABLE (1UL << 2) +# define PR_SPEC_FORCE_DISABLE (1UL << 3) #endif /* _LINUX_PRCTL_H */ From c71def81cd07e1bd74da468ae6abe1ce62e3157b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 4 May 2018 09:40:03 +0200 Subject: [PATCH 60/88] seccomp: Use PR_SPEC_FORCE_DISABLE commit b849a812f7eb92e96d1c8239b06581b2cfd8b275 upstream Use PR_SPEC_FORCE_DISABLE in seccomp() because seccomp does not allow to widen restrictions. Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- kernel/seccomp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 1d3078b45a70..a0bd6ea1ff90 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -226,7 +226,7 @@ static inline void spec_mitigate(struct task_struct *task, int state = arch_prctl_spec_ctrl_get(task, which); if (state > 0 && (state & PR_SPEC_PRCTL)) - arch_prctl_spec_ctrl_set(task, which, PR_SPEC_DISABLE); + arch_prctl_spec_ctrl_set(task, which, PR_SPEC_FORCE_DISABLE); } static inline void seccomp_assign_mode(struct task_struct *task, From ab677c2addbb128f334c4906f27a0285a67d2180 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 3 May 2018 14:56:12 -0700 Subject: [PATCH 61/88] seccomp: Add filter flag to opt-out of SSB mitigation commit 00a02d0c502a06d15e07b857f8ff921e3e402675 upstream If a seccomp user is not interested in Speculative Store Bypass mitigation by default, it can set the new SECCOMP_FILTER_FLAG_SPEC_ALLOW flag when adding filters. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- include/linux/seccomp.h | 3 +- include/uapi/linux/seccomp.h | 4 +- kernel/seccomp.c | 19 +++-- tools/testing/selftests/seccomp/seccomp_bpf.c | 78 ++++++++++++++++++- 4 files changed, 93 insertions(+), 11 deletions(-) diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index ecc296c137cd..50c460a956f1 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -3,7 +3,8 @@ #include -#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC) +#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ + SECCOMP_FILTER_FLAG_SPEC_ALLOW) #ifdef CONFIG_SECCOMP diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 0f238a43ff1e..e4acb615792b 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -15,7 +15,9 @@ #define SECCOMP_SET_MODE_FILTER 1 /* Valid flags for SECCOMP_SET_MODE_FILTER */ -#define SECCOMP_FILTER_FLAG_TSYNC 1 +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) +/* In v4.14+ SECCOMP_FILTER_FLAG_LOG is (1UL << 1) */ +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) /* * All BPF programs must return a 32-bit value. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index a0bd6ea1ff90..62a60e7454a9 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -230,7 +230,8 @@ static inline void spec_mitigate(struct task_struct *task, } static inline void seccomp_assign_mode(struct task_struct *task, - unsigned long seccomp_mode) + unsigned long seccomp_mode, + unsigned long flags) { assert_spin_locked(&task->sighand->siglock); @@ -240,8 +241,9 @@ static inline void seccomp_assign_mode(struct task_struct *task, * filter) is set. */ smp_mb__before_atomic(); - /* Assume seccomp processes want speculation flaw mitigation. */ - spec_mitigate(task, PR_SPEC_STORE_BYPASS); + /* Assume default seccomp processes want spec flaw mitigation. */ + if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0) + spec_mitigate(task, PR_SPEC_STORE_BYPASS); set_tsk_thread_flag(task, TIF_SECCOMP); } @@ -309,7 +311,7 @@ static inline pid_t seccomp_can_sync_threads(void) * without dropping the locks. * */ -static inline void seccomp_sync_threads(void) +static inline void seccomp_sync_threads(unsigned long flags) { struct task_struct *thread, *caller; @@ -350,7 +352,8 @@ static inline void seccomp_sync_threads(void) * allow one thread to transition the other. */ if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) - seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); + seccomp_assign_mode(thread, SECCOMP_MODE_FILTER, + flags); } } @@ -469,7 +472,7 @@ static long seccomp_attach_filter(unsigned int flags, /* Now that the new filter is in place, synchronize to all threads. */ if (flags & SECCOMP_FILTER_FLAG_TSYNC) - seccomp_sync_threads(); + seccomp_sync_threads(flags); return 0; } @@ -729,7 +732,7 @@ static long seccomp_set_mode_strict(void) #ifdef TIF_NOTSC disable_TSC(); #endif - seccomp_assign_mode(current, seccomp_mode); + seccomp_assign_mode(current, seccomp_mode, 0); ret = 0; out: @@ -787,7 +790,7 @@ static long seccomp_set_mode_filter(unsigned int flags, /* Do not free the successfully attached filter. */ prepared = NULL; - seccomp_assign_mode(current, seccomp_mode); + seccomp_assign_mode(current, seccomp_mode, flags); out: spin_unlock_irq(¤t->sighand->siglock); if (flags & SECCOMP_FILTER_FLAG_TSYNC) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index f68998149351..d5be7b5ff899 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1692,7 +1692,11 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) #endif #ifndef SECCOMP_FILTER_FLAG_TSYNC -#define SECCOMP_FILTER_FLAG_TSYNC 1 +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) +#endif + +#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) #endif #ifndef seccomp @@ -1791,6 +1795,78 @@ TEST(seccomp_syscall_mode_lock) } } +/* + * Test detection of known and unknown filter flags. Userspace needs to be able + * to check if a filter flag is supported by the current kernel and a good way + * of doing that is by attempting to enter filter mode, with the flag bit in + * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates + * that the flag is valid and EINVAL indicates that the flag is invalid. + */ +TEST(detect_seccomp_filter_flags) +{ + unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, + SECCOMP_FILTER_FLAG_SPEC_ALLOW }; + unsigned int flag, all_flags; + int i; + long ret; + + /* Test detection of known-good filter flags */ + for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { + int bits = 0; + + flag = flags[i]; + /* Make sure the flag is a single bit! */ + while (flag) { + if (flag & 0x1) + bits ++; + flag >>= 1; + } + ASSERT_EQ(1, bits); + flag = flags[i]; + + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", + flag); + } + + all_flags |= flag; + } + + /* Test detection of all known-good filter flags */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", + all_flags); + } + + /* Test detection of an unknown filter flag */ + flag = -1; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", + flag); + } + + /* + * Test detection of an unknown filter flag that may simply need to be + * added to this test + */ + flag = flags[ARRAY_SIZE(flags) - 1] << 1; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", + flag); + } +} + TEST(TSYNC_first) { struct sock_filter filter[] = { From 094c2767c4f02c36eabc27309d78b04f4a216e88 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 4 May 2018 15:12:06 +0200 Subject: [PATCH 62/88] seccomp: Move speculation migitation control to arch code commit 8bf37d8c067bb7eb8e7c381bdadf9bd89182b6bc upstream The migitation control is simpler to implement in architecture code as it avoids the extra function call to check the mode. Aside of that having an explicit seccomp enabled mode in the architecture mitigations would require even more workarounds. Move it into architecture code and provide a weak function in the seccomp code. Remove the 'which' argument as this allows the architecture to decide which mitigations are relevant for seccomp. Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 29 ++++++++++++++++++----------- include/linux/nospec.h | 2 ++ kernel/seccomp.c | 15 ++------------- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fdbd8e54c549..131617dffe21 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -568,6 +568,24 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) return 0; } +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_set(task, ctrl); + default: + return -ENODEV; + } +} + +#ifdef CONFIG_SECCOMP +void arch_seccomp_spec_mitigate(struct task_struct *task) +{ + ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); +} +#endif + static int ssb_prctl_get(struct task_struct *task) { switch (ssb_mode) { @@ -586,17 +604,6 @@ static int ssb_prctl_get(struct task_struct *task) } } -int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, - unsigned long ctrl) -{ - switch (which) { - case PR_SPEC_STORE_BYPASS: - return ssb_prctl_set(task, ctrl); - default: - return -ENODEV; - } -} - int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) { switch (which) { diff --git a/include/linux/nospec.h b/include/linux/nospec.h index a908c954484d..0c5ef54fd416 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -62,5 +62,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, unsigned long ctrl); +/* Speculation control for seccomp enforced mitigation */ +void arch_seccomp_spec_mitigate(struct task_struct *task); #endif /* _LINUX_NOSPEC_H */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 62a60e7454a9..3975856d476c 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -216,18 +216,7 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) return true; } -/* - * If a given speculation mitigation is opt-in (prctl()-controlled), - * select it, by disabling speculation (enabling mitigation). - */ -static inline void spec_mitigate(struct task_struct *task, - unsigned long which) -{ - int state = arch_prctl_spec_ctrl_get(task, which); - - if (state > 0 && (state & PR_SPEC_PRCTL)) - arch_prctl_spec_ctrl_set(task, which, PR_SPEC_FORCE_DISABLE); -} +void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { } static inline void seccomp_assign_mode(struct task_struct *task, unsigned long seccomp_mode, @@ -243,7 +232,7 @@ static inline void seccomp_assign_mode(struct task_struct *task, smp_mb__before_atomic(); /* Assume default seccomp processes want spec flaw mitigation. */ if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0) - spec_mitigate(task, PR_SPEC_STORE_BYPASS); + arch_seccomp_spec_mitigate(task); set_tsk_thread_flag(task, TIF_SECCOMP); } From 05a85a396f3989e9ac953785d9dccfc7cd0110f2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 3 May 2018 14:37:54 -0700 Subject: [PATCH 63/88] x86/speculation: Make "seccomp" the default mode for Speculative Store Bypass commit f21b53b20c754021935ea43364dbf53778eeba32 upstream Unless explicitly opted out of, anything running under seccomp will have SSB mitigations enabled. Choosing the "prctl" mode will disable this. [ tglx: Adjusted it to the new arch_seccomp_spec_mitigate() mechanism ] Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 26 ++++++++++++++-------- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 32 ++++++++++++++++++++-------- 3 files changed, 41 insertions(+), 18 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 543923b35260..52240a63132e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3997,19 +3997,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted. This parameter controls whether the Speculative Store Bypass optimization is used. - on - Unconditionally disable Speculative Store Bypass - off - Unconditionally enable Speculative Store Bypass - auto - Kernel detects whether the CPU model contains an - implementation of Speculative Store Bypass and - picks the most appropriate mitigation. - prctl - Control Speculative Store Bypass per thread - via prctl. Speculative Store Bypass is enabled - for a process by default. The state of the control - is inherited on fork. + on - Unconditionally disable Speculative Store Bypass + off - Unconditionally enable Speculative Store Bypass + auto - Kernel detects whether the CPU model contains an + implementation of Speculative Store Bypass and + picks the most appropriate mitigation. If the + CPU is not vulnerable, "off" is selected. If the + CPU is vulnerable the default mitigation is + architecture and Kconfig dependent. See below. + prctl - Control Speculative Store Bypass per thread + via prctl. Speculative Store Bypass is enabled + for a process by default. The state of the control + is inherited on fork. + seccomp - Same as "prctl" above, but all seccomp threads + will disable SSB unless they explicitly opt out. Not specifying this option is equivalent to spec_store_bypass_disable=auto. + Default mitigations: + X86: If CONFIG_SECCOMP=y "seccomp", otherwise "prctl" + spia_io_base= [HW,MTD] spia_fio_base= spia_pedr= diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 71ad01422655..328ea3cb769f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -233,6 +233,7 @@ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, SPEC_STORE_BYPASS_DISABLE, SPEC_STORE_BYPASS_PRCTL, + SPEC_STORE_BYPASS_SECCOMP, }; extern char __indirect_thunk_start[]; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 131617dffe21..9a3bb658d6dc 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -415,22 +415,25 @@ enum ssb_mitigation_cmd { SPEC_STORE_BYPASS_CMD_AUTO, SPEC_STORE_BYPASS_CMD_ON, SPEC_STORE_BYPASS_CMD_PRCTL, + SPEC_STORE_BYPASS_CMD_SECCOMP, }; static const char *ssb_strings[] = { [SPEC_STORE_BYPASS_NONE] = "Vulnerable", [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", - [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl" + [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", + [SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp", }; static const struct { const char *option; enum ssb_mitigation_cmd cmd; } ssb_mitigation_options[] = { - { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ - { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ - { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ - { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ + { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ + { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ + { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ + { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ + { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */ }; static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) @@ -480,8 +483,15 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) switch (cmd) { case SPEC_STORE_BYPASS_CMD_AUTO: - /* Choose prctl as the default mode */ - mode = SPEC_STORE_BYPASS_PRCTL; + case SPEC_STORE_BYPASS_CMD_SECCOMP: + /* + * Choose prctl+seccomp as the default mode if seccomp is + * enabled. + */ + if (IS_ENABLED(CONFIG_SECCOMP)) + mode = SPEC_STORE_BYPASS_SECCOMP; + else + mode = SPEC_STORE_BYPASS_PRCTL; break; case SPEC_STORE_BYPASS_CMD_ON: mode = SPEC_STORE_BYPASS_DISABLE; @@ -529,12 +539,14 @@ static void ssb_select_mitigation() } #undef pr_fmt +#define pr_fmt(fmt) "Speculation prctl: " fmt static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) { bool update; - if (ssb_mode != SPEC_STORE_BYPASS_PRCTL) + if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && + ssb_mode != SPEC_STORE_BYPASS_SECCOMP) return -ENXIO; switch (ctrl) { @@ -582,7 +594,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, #ifdef CONFIG_SECCOMP void arch_seccomp_spec_mitigate(struct task_struct *task) { - ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); + if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) + ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -591,6 +604,7 @@ static int ssb_prctl_get(struct task_struct *task) switch (ssb_mode) { case SPEC_STORE_BYPASS_DISABLE: return PR_SPEC_DISABLE; + case SPEC_STORE_BYPASS_SECCOMP: case SPEC_STORE_BYPASS_PRCTL: if (task_spec_ssb_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; From bf3da841edae882de545d2d19b1fae205cab8d98 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 9 May 2018 21:41:38 +0200 Subject: [PATCH 64/88] x86/bugs: Rename _RDS to _SSBD commit 9f65fb29374ee37856dbad847b4e121aab72b510 upstream Intel collateral will reference the SSB mitigation bit in IA32_SPEC_CTL[2] as SSBD (Speculative Store Bypass Disable). Hence changing it. It is unclear yet what the MSR_IA32_ARCH_CAPABILITIES (0x10a) Bit(4) name is going to be. Following the rename it would be SSBD_NO but that rolls out to Speculative Store Bypass Disable No. Also fixed the missing space in X86_FEATURE_AMD_SSBD. [ tglx: Fixup x86_amd_rds_enable() and rds_tif_to_amd_ls_cfg() as well ] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 4 ++-- arch/x86/include/asm/msr-index.h | 10 ++++----- arch/x86/include/asm/spec-ctrl.h | 12 +++++----- arch/x86/include/asm/thread_info.h | 6 ++--- arch/x86/kernel/cpu/amd.c | 14 ++++++------ arch/x86/kernel/cpu/bugs.c | 36 +++++++++++++++--------------- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/process.c | 8 +++---- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/cpuid.h | 2 +- arch/x86/kvm/vmx.c | 2 +- 12 files changed, 50 insertions(+), 50 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 87970694cac1..0ed8ea5419c1 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -205,7 +205,7 @@ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_AMD_RDS (7*32+24) /* "" AMD RDS implementation */ +#define X86_FEATURE_AMD_SSBD (7*32+24) /* "" AMD SSBD implementation */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -308,7 +308,7 @@ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -#define X86_FEATURE_RDS (18*32+31) /* Reduced Data Speculation */ +#define X86_FEATURE_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* * BUG word(s) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 5dd28d0e9f0e..b67d57eb4d0f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -40,8 +40,8 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ -#define SPEC_CTRL_RDS_SHIFT 2 /* Reduced Data Speculation bit */ -#define SPEC_CTRL_RDS (1 << SPEC_CTRL_RDS_SHIFT) /* Reduced Data Speculation */ +#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ +#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ @@ -63,10 +63,10 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ -#define ARCH_CAP_RDS_NO (1 << 4) /* +#define ARCH_CAP_SSBD_NO (1 << 4) /* * Not susceptible to Speculative Store Bypass - * attack, so no Reduced Data Speculation control - * required. + * attack, so no Speculative Store Bypass + * control required. */ #define MSR_IA32_BBL_CR_CTL 0x00000119 diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 45ef00ad5105..dc21209790bf 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -17,20 +17,20 @@ extern void x86_spec_ctrl_restore_host(u64); /* AMD specific Speculative Store Bypass MSR data */ extern u64 x86_amd_ls_cfg_base; -extern u64 x86_amd_ls_cfg_rds_mask; +extern u64 x86_amd_ls_cfg_ssbd_mask; /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; -static inline u64 rds_tif_to_spec_ctrl(u64 tifn) +static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) { - BUILD_BUG_ON(TIF_RDS < SPEC_CTRL_RDS_SHIFT); - return (tifn & _TIF_RDS) >> (TIF_RDS - SPEC_CTRL_RDS_SHIFT); + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } -static inline u64 rds_tif_to_amd_ls_cfg(u64 tifn) +static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) { - return (tifn & _TIF_RDS) ? x86_amd_ls_cfg_rds_mask : 0ULL; + return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; } extern void speculative_store_bypass_update(void); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 661afac34502..2d8788a59b4d 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -83,7 +83,7 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_RDS 5 /* Reduced data speculation */ +#define TIF_SSBD 5 /* Reduced data speculation */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ @@ -107,7 +107,7 @@ struct thread_info { #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_RDS (1 << TIF_RDS) +#define _TIF_SSBD (1 << TIF_SSBD) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -141,7 +141,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_RDS) + (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a176c81a74b2..acb2fcc510d2 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -555,12 +555,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) } /* * Try to cache the base value so further operations can - * avoid RMW. If that faults, do not enable RDS. + * avoid RMW. If that faults, do not enable SSBD. */ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { - setup_force_cpu_cap(X86_FEATURE_RDS); - setup_force_cpu_cap(X86_FEATURE_AMD_RDS); - x86_amd_ls_cfg_rds_mask = 1ULL << bit; + setup_force_cpu_cap(X86_FEATURE_SSBD); + setup_force_cpu_cap(X86_FEATURE_AMD_SSBD); + x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; } } } @@ -849,9 +849,9 @@ static void init_amd(struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); - if (boot_cpu_has(X86_FEATURE_AMD_RDS)) { - set_cpu_cap(c, X86_FEATURE_RDS); - set_cpu_cap(c, X86_FEATURE_AMD_RDS); + if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) { + set_cpu_cap(c, X86_FEATURE_SSBD); + set_cpu_cap(c, X86_FEATURE_AMD_SSBD); } } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9a3bb658d6dc..ae6f9bae17ce 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -44,10 +44,10 @@ static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; /* * AMD specific MSR info for Speculative Store Bypass control. - * x86_amd_ls_cfg_rds_mask is initialized in identify_boot_cpu(). + * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu(). */ u64 __ro_after_init x86_amd_ls_cfg_base; -u64 __ro_after_init x86_amd_ls_cfg_rds_mask; +u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; void __init check_bugs(void) { @@ -145,7 +145,7 @@ u64 x86_spec_ctrl_get_default(void) u64 msrval = x86_spec_ctrl_base; if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - msrval |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); return msrval; } EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); @@ -158,7 +158,7 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) return; if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); @@ -173,18 +173,18 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) return; if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - host |= rds_tif_to_spec_ctrl(current_thread_info()->flags); + host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) wrmsrl(MSR_IA32_SPEC_CTRL, host); } EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); -static void x86_amd_rds_enable(void) +static void x86_amd_ssb_disable(void) { - u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_rds_mask; + u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; - if (boot_cpu_has(X86_FEATURE_AMD_RDS)) + if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) wrmsrl(MSR_AMD64_LS_CFG, msrval); } @@ -472,7 +472,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; enum ssb_mitigation_cmd cmd; - if (!boot_cpu_has(X86_FEATURE_RDS)) + if (!boot_cpu_has(X86_FEATURE_SSBD)) return mode; cmd = ssb_parse_cmdline(); @@ -506,7 +506,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. - * - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass + * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation */ if (mode == SPEC_STORE_BYPASS_DISABLE) { @@ -517,12 +517,12 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: - x86_spec_ctrl_base |= SPEC_CTRL_RDS; - x86_spec_ctrl_mask &= ~SPEC_CTRL_RDS; - x86_spec_ctrl_set(SPEC_CTRL_RDS); + x86_spec_ctrl_base |= SPEC_CTRL_SSBD; + x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD; + x86_spec_ctrl_set(SPEC_CTRL_SSBD); break; case X86_VENDOR_AMD: - x86_amd_rds_enable(); + x86_amd_ssb_disable(); break; } } @@ -555,16 +555,16 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); - update = test_and_clear_tsk_thread_flag(task, TIF_RDS); + update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_RDS); + update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_RDS); + update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); break; default: return -ERANGE; @@ -634,7 +634,7 @@ void x86_spec_ctrl_setup_ap(void) x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) - x86_amd_rds_enable(); + x86_amd_ssb_disable(); } #ifdef CONFIG_SYSFS diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index beb1da89ea7d..d0dd73605cf5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -911,7 +911,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_RDS_NO)) + !(ia32_cap & ARCH_CAP_SSBD_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (x86_match_cpu(cpu_no_speculation)) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index f15aea653e8a..047adaa28f3b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -154,7 +154,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_STIBP); setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); - setup_clear_cpu_cap(X86_FEATURE_RDS); + setup_clear_cpu_cap(X86_FEATURE_SSBD); } /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9c48e18d4aeb..c3442300b8f7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -207,11 +207,11 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn { u64 msr; - if (static_cpu_has(X86_FEATURE_AMD_RDS)) { - msr = x86_amd_ls_cfg_base | rds_tif_to_amd_ls_cfg(tifn); + if (static_cpu_has(X86_FEATURE_AMD_SSBD)) { + msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); wrmsrl(MSR_AMD64_LS_CFG, msr); } else { - msr = x86_spec_ctrl_base | rds_tif_to_spec_ctrl(tifn); + msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); wrmsrl(MSR_IA32_SPEC_CTRL, msr); } } @@ -250,7 +250,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOTSC) cr4_toggle_bits(X86_CR4_TSD); - if ((tifp ^ tifn) & _TIF_RDS) + if ((tifp ^ tifn) & _TIF_SSBD) __speculative_store_bypass_update(tifn); } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a9409f06ddc9..18e6db5010e0 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - F(SPEC_CTRL) | F(RDS) | F(ARCH_CAPABILITIES); + F(SPEC_CTRL) | F(SSBD) | F(ARCH_CAPABILITIES); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 24187d07c2cf..aedeb6cc2871 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -179,7 +179,7 @@ static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu) if (best && (best->ebx & bit(X86_FEATURE_IBRS))) return true; best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_RDS))); + return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SSBD))); } static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0eb3863d3159..874b6615ddf5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3141,7 +3141,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_RDS)) + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) return 1; vmx->spec_ctrl = data; From f8cd89f5e05d49422315e60ec2db9fcb66d25aca Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 9 May 2018 21:41:38 +0200 Subject: [PATCH 65/88] proc: Use underscores for SSBD in 'status' commit e96f46ee8587607a828f783daa6eb5b44d25004d upstream The style for the 'status' file is CamelCase or this. _. Fixes: fae1fa0fc ("proc: Provide details on speculation flaw mitigations") Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- fs/proc/array.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index 3e37195485fc..94f83e74db24 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -347,7 +347,7 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) #ifdef CONFIG_SECCOMP seq_put_decimal_ull(m, "Seccomp:\t", p->seccomp.mode); #endif - seq_printf(m, "\nSpeculation Store Bypass:\t"); + seq_printf(m, "\nSpeculation_Store_Bypass:\t"); switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) { case -EINVAL: seq_printf(m, "unknown"); From f79f0efe8e1816063f83926c946026d83b9b287f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 8 May 2018 15:43:45 +0200 Subject: [PATCH 66/88] Documentation/spec_ctrl: Do some minor cleanups commit dd0792699c4058e63c0715d9a7c2d40226fcdddc upstream Fix some typos, improve formulations, end sentences with a fullstop. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- Documentation/spec_ctrl.txt | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt index 1b3690d30943..32f3d55c54b7 100644 --- a/Documentation/spec_ctrl.txt +++ b/Documentation/spec_ctrl.txt @@ -2,13 +2,13 @@ Speculation Control =================== -Quite some CPUs have speculation related misfeatures which are in fact -vulnerabilites causing data leaks in various forms even accross privilege -domains. +Quite some CPUs have speculation-related misfeatures which are in +fact vulnerabilities causing data leaks in various forms even across +privilege domains. The kernel provides mitigation for such vulnerabilities in various -forms. Some of these mitigations are compile time configurable and some on -the kernel command line. +forms. Some of these mitigations are compile-time configurable and some +can be supplied on the kernel command line. There is also a class of mitigations which are very expensive, but they can be restricted to a certain set of processes or tasks in controlled @@ -32,18 +32,18 @@ the following meaning: Bit Define Description ==== ===================== =================================================== 0 PR_SPEC_PRCTL Mitigation can be controlled per task by - PR_SET_SPECULATION_CTRL + PR_SET_SPECULATION_CTRL. 1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is - disabled + disabled. 2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is - enabled + enabled. 3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A subsequent prctl(..., PR_SPEC_ENABLE) will fail. ==== ===================== =================================================== If all bits are 0 the CPU is not affected by the speculation misfeature. -If PR_SPEC_PRCTL is set, then the per task control of the mitigation is +If PR_SPEC_PRCTL is set, then the per-task control of the mitigation is available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation misfeature will fail. @@ -61,9 +61,9 @@ Common error codes Value Meaning ======= ================================================================= EINVAL The prctl is not implemented by the architecture or unused - prctl(2) arguments are not 0 + prctl(2) arguments are not 0. -ENODEV arg2 is selecting a not supported speculation misfeature +ENODEV arg2 is selecting a not supported speculation misfeature. ======= ================================================================= PR_SET_SPECULATION_CTRL error codes @@ -74,7 +74,7 @@ Value Meaning 0 Success ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor - PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE + PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE. ENXIO Control of the selected speculation misfeature is not possible. See PR_GET_SPECULATION_CTRL. From eb7b5624be3e6249a880310be486245db15a5f5c Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 10 May 2018 22:47:18 +0200 Subject: [PATCH 67/88] x86/bugs: Fix __ssb_select_mitigation() return type commit d66d8ff3d21667b41eddbe86b35ab411e40d8c5f upstream __ssb_select_mitigation() returns one of the members of enum ssb_mitigation, not ssb_mitigation_cmd; fix the prototype to reflect that. Fixes: 24f7fc83b9204 ("x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation") Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ae6f9bae17ce..c7b4d117f652 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -467,7 +467,7 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) return cmd; } -static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void) +static enum ssb_mitigation __init __ssb_select_mitigation(void) { enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; enum ssb_mitigation_cmd cmd; From dbb264a253c8b07259d55fb3373b783fcb641b04 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 10 May 2018 22:47:32 +0200 Subject: [PATCH 68/88] x86/bugs: Make cpu_show_common() static commit 7bb4d366cba992904bffa4820d24e70a3de93e76 upstream cpu_show_common() is not used outside of arch/x86/kernel/cpu/bugs.c, so make it static. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c7b4d117f652..8187642236b6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -639,7 +639,7 @@ void x86_spec_ctrl_setup_ap(void) #ifdef CONFIG_SYSFS -ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, +static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { if (!boot_cpu_has_bug(bug)) From 6fdd277a9326c5ef3fe94999c9c319ad64333fdd Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 11 May 2018 16:50:35 -0400 Subject: [PATCH 69/88] x86/bugs: Fix the parameters alignment and missing void commit ffed645e3be0e32f8e9ab068d257aee8d0fe8eec upstream Fixes: 7bb4d366c ("x86/bugs: Make cpu_show_common() static") Fixes: 24f7fc83b ("x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation") Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 8187642236b6..4f8c88ec8bed 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -530,7 +530,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) return mode; } -static void ssb_select_mitigation() +static void ssb_select_mitigation(void) { ssb_mode = __ssb_select_mitigation(); @@ -640,7 +640,7 @@ void x86_spec_ctrl_setup_ap(void) #ifdef CONFIG_SYSFS static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, - char *buf, unsigned int bug) + char *buf, unsigned int bug) { if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); From 3a684641619ff0e06b8d4cb8c2ffbef304c9bdb1 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Sun, 13 May 2018 17:33:57 -0400 Subject: [PATCH 70/88] x86/cpu: Make alternative_msr_write work for 32-bit code commit 5f2b745f5e1304f438f9b2cd03ebc8120b6e0d3b upstream Cast val and (val >> 32) to (u32), so that they fit in a general-purpose register in both 32-bit and 64-bit code. [ tglx: Made it u32 instead of uintptr_t ] Fixes: c65732e4f721 ("x86/cpu: Restore CPUID_8000_0008_EBX reload") Signed-off-by: Jim Mattson Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Acked-by: Linus Torvalds Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 328ea3cb769f..bc258e644e5e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -265,8 +265,8 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) { asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) : : "c" (msr), - "a" (val), - "d" (val >> 32), + "a" ((u32)val), + "d" ((u32)(val >> 32)), [feature] "i" (feature) : "memory"); } From 69e9b0b1e04001a743927489bb8b9a10344810d8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 May 2018 15:21:01 +0200 Subject: [PATCH 71/88] KVM: SVM: Move spec control call after restore of GS commit 15e6c22fd8e5a42c5ed6d487b7c9fe44c2517765 upstream svm_vcpu_run() invokes x86_spec_ctrl_restore_host() after VMEXIT, but before the host GS is restored. x86_spec_ctrl_restore_host() uses 'current' to determine the host SSBD state of the thread. 'current' is GS based, but host GS is not yet restored and the access causes a triple fault. Move the call after the host GS restore. Fixes: 885f82bfbc6f x86/process: Allow runtime control of Speculative Store Bypass Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Paolo Bonzini Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 516ddfff8c8b..d1a4321d4631 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5011,6 +5011,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, svm->host.gs_base); +#else + loadsegment(fs, svm->host.fs); +#ifndef CONFIG_X86_32_LAZY_GS + loadsegment(gs, svm->host.gs); +#endif +#endif + /* * We do not use IBRS in the kernel. If this vCPU has used the * SPEC_CTRL MSR it may have left it on; save the value and @@ -5031,18 +5043,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) x86_spec_ctrl_restore_host(svm->spec_ctrl); - /* Eliminate branch target predictions from guest mode */ - vmexit_fill_RSB(); - -#ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, svm->host.gs_base); -#else - loadsegment(fs, svm->host.fs); -#ifndef CONFIG_X86_32_LAZY_GS - loadsegment(gs, svm->host.gs); -#endif -#endif - reload_tss(vcpu); local_irq_disable(); From 4a58908fa1476c600548f82effc75bcfa890454a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 2 May 2018 18:15:14 +0200 Subject: [PATCH 72/88] x86/speculation: Use synthetic bits for IBRS/IBPB/STIBP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e7c587da125291db39ddf1f49b18e5970adbac17 upstream Intel and AMD have different CPUID bits hence for those use synthetic bits which get set on the respective vendor's in init_speculation_control(). So that debacles like what the commit message of c65732e4f721 ("x86/cpu: Restore CPUID_8000_0008_EBX reload") talks about don't happen anymore. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Jörg Otte Cc: Linus Torvalds Cc: "Kirill A. Shutemov" Link: https://lkml.kernel.org/r/20180504161815.GG9257@pd.tnic Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 12 ++++++++---- arch/x86/kernel/cpu/common.c | 14 ++++++++++---- arch/x86/kvm/cpuid.c | 10 +++++----- arch/x86/kvm/cpuid.h | 4 ++-- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0ed8ea5419c1..059437ae9fac 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -205,7 +205,10 @@ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_AMD_SSBD (7*32+24) /* "" AMD SSBD implementation */ +#define X86_FEATURE_AMD_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ +#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -263,9 +266,9 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ -#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -301,6 +304,7 @@ #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ + /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d0dd73605cf5..67bfa3cad8a2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -725,17 +725,23 @@ static void init_speculation_control(struct cpuinfo_x86 *c) * and they also have a different bit for STIBP support. Also, * a hypervisor might have set the individual AMD bits even on * Intel CPUs, for finer-grained selection of what's available. - * - * We use the AMD bits in 0x8000_0008 EBX as the generic hardware - * features, which are visible in /proc/cpuinfo and used by the - * kernel. So set those accordingly from the Intel bits. */ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { set_cpu_cap(c, X86_FEATURE_IBRS); set_cpu_cap(c, X86_FEATURE_IBPB); } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) set_cpu_cap(c, X86_FEATURE_STIBP); + + if (cpu_has(c, X86_FEATURE_AMD_IBRS)) + set_cpu_cap(c, X86_FEATURE_IBRS); + + if (cpu_has(c, X86_FEATURE_AMD_IBPB)) + set_cpu_cap(c, X86_FEATURE_IBPB); + + if (cpu_has(c, X86_FEATURE_AMD_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); } void get_cpu_cap(struct cpuinfo_x86 *c) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 18e6db5010e0..910c2dbb38de 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -357,7 +357,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(IBPB) | F(IBRS); + F(AMD_IBPB) | F(AMD_IBRS); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -619,10 +619,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; /* IBRS and IBPB aren't necessarily present in hardware cpuid */ - if (boot_cpu_has(X86_FEATURE_IBPB)) - entry->ebx |= F(IBPB); - if (boot_cpu_has(X86_FEATURE_IBRS)) - entry->ebx |= F(IBRS); + if (boot_cpu_has(X86_FEATURE_AMD_IBPB)) + entry->ebx |= F(AMD_IBPB); + if (boot_cpu_has(X86_FEATURE_AMD_IBRS)) + entry->ebx |= F(AMD_IBRS); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); break; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index aedeb6cc2871..eb47c37db15f 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -165,7 +165,7 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) struct kvm_cpuid_entry2 *best; best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); - if (best && (best->ebx & bit(X86_FEATURE_IBPB))) + if (best && (best->ebx & bit(X86_FEATURE_AMD_IBPB))) return true; best = kvm_find_cpuid_entry(vcpu, 7, 0); return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); @@ -176,7 +176,7 @@ static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu) struct kvm_cpuid_entry2 *best; best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); - if (best && (best->ebx & bit(X86_FEATURE_IBRS))) + if (best && (best->ebx & bit(X86_FEATURE_AMD_IBRS))) return true; best = kvm_find_cpuid_entry(vcpu, 7, 0); return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SSBD))); From a7c343228e5c32802431e6cc5b855ae61eb4db72 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 May 2018 19:13:18 +0200 Subject: [PATCH 73/88] x86/cpufeatures: Disentangle MSR_SPEC_CTRL enumeration from IBRS commit 7eb8956a7fec3c1f0abc2a5517dada99ccc8a961 upstream The availability of the SPEC_CTRL MSR is enumerated by a CPUID bit on Intel and implied by IBRS or STIBP support on AMD. That's just confusing and in case an AMD CPU has IBRS not supported because the underlying problem has been fixed but has another bit valid in the SPEC_CTRL MSR, the thing falls apart. Add a synthetic feature bit X86_FEATURE_MSR_SPEC_CTRL to denote the availability on both Intel and AMD. While at it replace the boot_cpu_has() checks with static_cpu_has() where possible. This prevents late microcode loading from exposing SPEC_CTRL, but late loading is already very limited as it does not reevaluate the mitigation options and other bits and pieces. Having static_cpu_has() is the simplest and least fragile solution. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/kernel/cpu/bugs.c | 18 +++++++++++------- arch/x86/kernel/cpu/common.c | 9 +++++++-- arch/x86/kernel/cpu/intel.c | 1 + 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 059437ae9fac..ca0f33f5260a 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -197,6 +197,8 @@ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ + #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4f8c88ec8bed..59649310f9f3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -63,7 +63,7 @@ void __init check_bugs(void) * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD * init code as it is not enumerated and depends on the family. */ - if (boot_cpu_has(X86_FEATURE_IBRS)) + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); /* Select the proper spectre mitigation before patching alternatives */ @@ -144,7 +144,7 @@ u64 x86_spec_ctrl_get_default(void) { u64 msrval = x86_spec_ctrl_base; - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); return msrval; } @@ -154,10 +154,12 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) { u64 host = x86_spec_ctrl_base; - if (!boot_cpu_has(X86_FEATURE_IBRS)) + /* Is MSR_SPEC_CTRL implemented ? */ + if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) return; - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + /* Intel controls SSB in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) @@ -169,10 +171,12 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) { u64 host = x86_spec_ctrl_base; - if (!boot_cpu_has(X86_FEATURE_IBRS)) + /* Is MSR_SPEC_CTRL implemented ? */ + if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) return; - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + /* Intel controls SSB in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) @@ -630,7 +634,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { - if (boot_cpu_has(X86_FEATURE_IBRS)) + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 67bfa3cad8a2..043622827331 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -729,19 +729,24 @@ static void init_speculation_control(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { set_cpu_cap(c, X86_FEATURE_IBRS); set_cpu_cap(c, X86_FEATURE_IBPB); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); } if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) set_cpu_cap(c, X86_FEATURE_STIBP); - if (cpu_has(c, X86_FEATURE_AMD_IBRS)) + if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + } if (cpu_has(c, X86_FEATURE_AMD_IBPB)) set_cpu_cap(c, X86_FEATURE_IBPB); - if (cpu_has(c, X86_FEATURE_AMD_STIBP)) + if (cpu_has(c, X86_FEATURE_AMD_STIBP)) { set_cpu_cap(c, X86_FEATURE_STIBP); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + } } void get_cpu_cap(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 047adaa28f3b..7f495e846da3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -153,6 +153,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_IBPB); setup_clear_cpu_cap(X86_FEATURE_STIBP); setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); setup_clear_cpu_cap(X86_FEATURE_SSBD); } From f69e91f2c4ce59deb66bd30150e5153c08873ae9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 May 2018 20:21:36 +0200 Subject: [PATCH 74/88] x86/cpufeatures: Disentangle SSBD enumeration commit 52817587e706686fcdb27f14c1b000c92f266c96 upstream The SSBD enumeration is similarly to the other bits magically shared between Intel and AMD though the mechanisms are different. Make X86_FEATURE_SSBD synthetic and set it depending on the vendor specific features or family dependent setup. Change the Intel bit to X86_FEATURE_SPEC_CTRL_SSBD to denote that SSBD is controlled via MSR_SPEC_CTRL and fix up the usage sites. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 5 +++-- arch/x86/kernel/cpu/amd.c | 7 +------ arch/x86/kernel/cpu/bugs.c | 10 +++++----- arch/x86/kernel/cpu/common.c | 3 +++ arch/x86/kernel/cpu/intel.c | 1 + arch/x86/kernel/process.c | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ca0f33f5260a..d071767ed80d 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -198,6 +198,7 @@ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ @@ -207,7 +208,7 @@ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_AMD_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ @@ -314,7 +315,7 @@ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -#define X86_FEATURE_SSBD (18*32+31) /* Speculative Store Bypass Disable */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ /* * BUG word(s) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index acb2fcc510d2..179d5721a509 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -558,8 +558,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) * avoid RMW. If that faults, do not enable SSBD. */ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { + setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD); setup_force_cpu_cap(X86_FEATURE_SSBD); - setup_force_cpu_cap(X86_FEATURE_AMD_SSBD); x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; } } @@ -848,11 +848,6 @@ static void init_amd(struct cpuinfo_x86 *c) /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ if (!cpu_has(c, X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); - - if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) { - set_cpu_cap(c, X86_FEATURE_SSBD); - set_cpu_cap(c, X86_FEATURE_AMD_SSBD); - } } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 59649310f9f3..15a6c58c4226 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -158,8 +158,8 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) return; - /* Intel controls SSB in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) + /* SSBD controlled in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) @@ -175,8 +175,8 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) return; - /* Intel controls SSB in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) + /* SSBD controlled in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); if (host != guest_spec_ctrl) @@ -188,7 +188,7 @@ static void x86_amd_ssb_disable(void) { u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; - if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) wrmsrl(MSR_AMD64_LS_CFG, msrval); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 043622827331..945e84105f8d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -735,6 +735,9 @@ static void init_speculation_control(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) set_cpu_cap(c, X86_FEATURE_STIBP); + if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD)) + set_cpu_cap(c, X86_FEATURE_SSBD); + if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { set_cpu_cap(c, X86_FEATURE_IBRS); set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 7f495e846da3..93781e3f05b2 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -156,6 +156,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); setup_clear_cpu_cap(X86_FEATURE_SSBD); + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD); } /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c3442300b8f7..b3cd08eb664e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -207,7 +207,7 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn { u64 msr; - if (static_cpu_has(X86_FEATURE_AMD_SSBD)) { + if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); wrmsrl(MSR_AMD64_LS_CFG, msr); } else { From 5a63725cd18fcee2af6ec46ccb856b64ad3077b4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 7 Sep 2017 19:08:21 +0200 Subject: [PATCH 75/88] x86/cpu/AMD: Fix erratum 1076 (CPB bit) commit f7f3dc00f61261cdc9ccd8b886f21bc4dffd6fd9 upstream CPUID Fn8000_0007_EDX[CPB] is wrongly 0 on models up to B1. But they do support CPB (AMD's Core Performance Boosting cpufreq CPU feature), so fix that. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sherry Hurwitz Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170907170821.16021-1-bp@alien8.de Signed-off-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 179d5721a509..21367b51c2f3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -749,6 +749,16 @@ static void init_amd_bd(struct cpuinfo_x86 *c) } } +static void init_amd_zn(struct cpuinfo_x86 *c) +{ + /* + * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects + * all up to and including B1. + */ + if (c->x86_model <= 1 && c->x86_stepping <= 1) + set_cpu_cap(c, X86_FEATURE_CPB); +} + static void init_amd(struct cpuinfo_x86 *c) { u32 dummy; @@ -779,6 +789,7 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x10: init_amd_gh(c); break; case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; + case 0x17: init_amd_zn(c); break; } /* Enable workaround for FXSAVE leak */ From 53c434e735fffbf8715a1778ce44387131e0b080 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 May 2018 16:26:00 +0200 Subject: [PATCH 76/88] x86/cpufeatures: Add FEATURE_ZEN commit d1035d971829dcf80e8686ccde26f94b0a069472 upstream Add a ZEN feature bit so family-dependent static_cpu_has() optimizations can be built for ZEN. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/kernel/cpu/amd.c | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index d071767ed80d..ec87b8caa9b9 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -212,6 +212,8 @@ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ + /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 21367b51c2f3..4c2be99fa0fb 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -751,6 +751,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c) static void init_amd_zn(struct cpuinfo_x86 *c) { + set_cpu_cap(c, X86_FEATURE_ZEN); /* * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects * all up to and including B1. From d0cb78f5e4214db86b12a9448d8ccaa005f43cb9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 9 May 2018 21:53:09 +0200 Subject: [PATCH 77/88] x86/speculation: Handle HT correctly on AMD commit 1f50ddb4f4189243c05926b842dc1a0332195f31 upstream The AMD64_LS_CFG MSR is a per core MSR on Family 17H CPUs. That means when hyperthreading is enabled the SSBD bit toggle needs to take both cores into account. Otherwise the following situation can happen: CPU0 CPU1 disable SSB disable SSB enable SSB <- Enables it for the Core, i.e. for CPU0 as well So after the SSB enable on CPU1 the task on CPU0 runs with SSB enabled again. On Intel the SSBD control is per core as well, but the synchronization logic is implemented behind the per thread SPEC_CTRL MSR. It works like this: CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL i.e. if one of the threads enables a mitigation then this affects both and the mitigation is only disabled in the core when both threads disabled it. Add the necessary synchronization logic for AMD family 17H. Unfortunately that requires a spinlock to serialize the access to the MSR, but the locks are only shared between siblings. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/spec-ctrl.h | 6 ++ arch/x86/kernel/process.c | 131 ++++++++++++++++++++++++++++--- arch/x86/kernel/smpboot.c | 5 ++ 3 files changed, 133 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index dc21209790bf..0cb49c4564b0 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -33,6 +33,12 @@ static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; } +#ifdef CONFIG_SMP +extern void speculative_store_bypass_ht_init(void); +#else +static inline void speculative_store_bypass_ht_init(void) { } +#endif + extern void speculative_store_bypass_update(void); #endif diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b3cd08eb664e..1e9d155544ee 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -203,22 +203,135 @@ static inline void switch_to_bitmap(struct tss_struct *tss, } } +#ifdef CONFIG_SMP + +struct ssb_state { + struct ssb_state *shared_state; + raw_spinlock_t lock; + unsigned int disable_state; + unsigned long local_state; +}; + +#define LSTATE_SSB 0 + +static DEFINE_PER_CPU(struct ssb_state, ssb_state); + +void speculative_store_bypass_ht_init(void) +{ + struct ssb_state *st = this_cpu_ptr(&ssb_state); + unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; + + st->local_state = 0; + + /* + * Shared state setup happens once on the first bringup + * of the CPU. It's not destroyed on CPU hotunplug. + */ + if (st->shared_state) + return; + + raw_spin_lock_init(&st->lock); + + /* + * Go over HT siblings and check whether one of them has set up the + * shared state pointer already. + */ + for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) { + if (cpu == this_cpu) + continue; + + if (!per_cpu(ssb_state, cpu).shared_state) + continue; + + /* Link it to the state of the sibling: */ + st->shared_state = per_cpu(ssb_state, cpu).shared_state; + return; + } + + /* + * First HT sibling to come up on the core. Link shared state of + * the first HT sibling to itself. The siblings on the same core + * which come up later will see the shared state pointer and link + * themself to the state of this CPU. + */ + st->shared_state = st; +} + +/* + * Logic is: First HT sibling enables SSBD for both siblings in the core + * and last sibling to disable it, disables it for the whole core. This how + * MSR_SPEC_CTRL works in "hardware": + * + * CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL + */ +static __always_inline void amd_set_core_ssb_state(unsigned long tifn) +{ + struct ssb_state *st = this_cpu_ptr(&ssb_state); + u64 msr = x86_amd_ls_cfg_base; + + if (!static_cpu_has(X86_FEATURE_ZEN)) { + msr |= ssbd_tif_to_amd_ls_cfg(tifn); + wrmsrl(MSR_AMD64_LS_CFG, msr); + return; + } + + if (tifn & _TIF_SSBD) { + /* + * Since this can race with prctl(), block reentry on the + * same CPU. + */ + if (__test_and_set_bit(LSTATE_SSB, &st->local_state)) + return; + + msr |= x86_amd_ls_cfg_ssbd_mask; + + raw_spin_lock(&st->shared_state->lock); + /* First sibling enables SSBD: */ + if (!st->shared_state->disable_state) + wrmsrl(MSR_AMD64_LS_CFG, msr); + st->shared_state->disable_state++; + raw_spin_unlock(&st->shared_state->lock); + } else { + if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state)) + return; + + raw_spin_lock(&st->shared_state->lock); + st->shared_state->disable_state--; + if (!st->shared_state->disable_state) + wrmsrl(MSR_AMD64_LS_CFG, msr); + raw_spin_unlock(&st->shared_state->lock); + } +} +#else +static __always_inline void amd_set_core_ssb_state(unsigned long tifn) +{ + u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); + + wrmsrl(MSR_AMD64_LS_CFG, msr); +} +#endif + +static __always_inline void intel_set_ssb_state(unsigned long tifn) +{ + u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); + + wrmsrl(MSR_IA32_SPEC_CTRL, msr); +} + static __always_inline void __speculative_store_bypass_update(unsigned long tifn) { - u64 msr; - - if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { - msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); - wrmsrl(MSR_AMD64_LS_CFG, msr); - } else { - msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); - wrmsrl(MSR_IA32_SPEC_CTRL, msr); - } + if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + amd_set_core_ssb_state(tifn); + else + intel_set_ssb_state(tifn); } void speculative_store_bypass_update(void) { + preempt_disable(); __speculative_store_bypass_update(current_thread_info()->flags); + preempt_enable(); } void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 83929cc47a4b..cb945146b7c8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -75,6 +75,7 @@ #include #include #include +#include /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -229,6 +230,8 @@ static void notrace start_secondary(void *unused) */ check_tsc_sync_target(); + speculative_store_bypass_ht_init(); + /* * Lock vector_lock and initialize the vectors on this cpu * before setting the cpu online. We must set it online with @@ -1325,6 +1328,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_mtrr_aps_delayed_init(); smp_quirk_init_udelay(); + + speculative_store_bypass_ht_init(); } void arch_enable_nonboot_cpus_begin(void) From 1189cbf52ad35cfd04a715016200ea81dd4c708f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 9 May 2018 23:01:01 +0200 Subject: [PATCH 78/88] x86/bugs, KVM: Extend speculation control for VIRT_SPEC_CTRL commit ccbcd2674472a978b48c91c1fbfb66c0ff959f24 upstream AMD is proposing a VIRT_SPEC_CTRL MSR to handle the Speculative Store Bypass Disable via MSR_AMD64_LS_CFG so that guests do not have to care about the bit position of the SSBD bit and thus facilitate migration. Also, the sibling coordination on Family 17H CPUs can only be done on the host. Extend x86_spec_ctrl_set_guest() and x86_spec_ctrl_restore_host() with an extra argument for the VIRT_SPEC_CTRL MSR. Hand in 0 from VMX and in SVM add a new virt_spec_ctrl member to the CPU data structure which is going to be used in later patches for the actual implementation. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/spec-ctrl.h | 9 ++++++--- arch/x86/kernel/cpu/bugs.c | 20 ++++++++++++++++++-- arch/x86/kvm/svm.c | 11 +++++++++-- arch/x86/kvm/vmx.c | 5 +++-- 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 0cb49c4564b0..6e2874049afd 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -10,10 +10,13 @@ * the guest has, while on VMEXIT we restore the host view. This * would be easier if SPEC_CTRL were architecturally maskable or * shadowable for guests but this is not (currently) the case. - * Takes the guest view of SPEC_CTRL MSR as a parameter. + * Takes the guest view of SPEC_CTRL MSR as a parameter and also + * the guest's version of VIRT_SPEC_CTRL, if emulated. */ -extern void x86_spec_ctrl_set_guest(u64); -extern void x86_spec_ctrl_restore_host(u64); +extern void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, + u64 guest_virt_spec_ctrl); +extern void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, + u64 guest_virt_spec_ctrl); /* AMD specific Speculative Store Bypass MSR data */ extern u64 x86_amd_ls_cfg_base; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 15a6c58c4226..d00e246d4661 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -150,7 +150,15 @@ u64 x86_spec_ctrl_get_default(void) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); -void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) +/** + * x86_spec_ctrl_set_guest - Set speculation control registers for the guest + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) { u64 host = x86_spec_ctrl_base; @@ -167,7 +175,15 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); -void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl) +/** + * x86_spec_ctrl_restore_host - Restore host speculation control registers + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) { u64 host = x86_spec_ctrl_base; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d1a4321d4631..57c96f165ac4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -185,6 +185,12 @@ struct vcpu_svm { } host; u64 spec_ctrl; + /* + * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be + * translated into the appropriate L2_CFG bits on the host to + * perform speculative control. + */ + u64 virt_spec_ctrl; u32 *msrpm; @@ -1561,6 +1567,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 eax = 1; svm->spec_ctrl = 0; + svm->virt_spec_ctrl = 0; if (!init_event) { svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | @@ -4917,7 +4924,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - x86_spec_ctrl_set_guest(svm->spec_ctrl); + x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5041,7 +5048,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - x86_spec_ctrl_restore_host(svm->spec_ctrl); + x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); reload_tss(vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 874b6615ddf5..f1d158a268a2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8916,9 +8916,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - x86_spec_ctrl_set_guest(vmx->spec_ctrl); + x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); vmx->__launched = vmx->loaded_vmcs->launched; + asm( /* Store host registers */ "push %%" _ASM_DX "; push %%" _ASM_BP ";" @@ -9054,7 +9055,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - x86_spec_ctrl_restore_host(vmx->spec_ctrl); + x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); From 7c0b2dc44956533c5aac95f07575feef7b63344c Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 17 May 2018 17:09:18 +0200 Subject: [PATCH 79/88] x86/speculation: Add virtualized speculative store bypass disable support commit 11fb0683493b2da112cd64c9dada221b52463bf7 upstream Some AMD processors only support a non-architectural means of enabling speculative store bypass disable (SSBD). To allow a simplified view of this to a guest, an architectural definition has been created through a new CPUID bit, 0x80000008_EBX[25], and a new MSR, 0xc001011f. With this, a hypervisor can virtualize the existence of this definition and provide an architectural method for using SSBD to a guest. Add the new CPUID feature, the new MSR and update the existing SSBD support to use this MSR when present. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 2 ++ arch/x86/kernel/cpu/bugs.c | 4 +++- arch/x86/kernel/process.c | 13 ++++++++++++- 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ec87b8caa9b9..c278f276c9b3 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -274,6 +274,7 @@ #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b67d57eb4d0f..9fd9dcfe179e 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -323,6 +323,8 @@ #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f + /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d00e246d4661..97987b53acbd 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -204,7 +204,9 @@ static void x86_amd_ssb_disable(void) { u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; - if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) + wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD); + else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) wrmsrl(MSR_AMD64_LS_CFG, msrval); } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 1e9d155544ee..6d9e1ee0f5b2 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -312,6 +312,15 @@ static __always_inline void amd_set_core_ssb_state(unsigned long tifn) } #endif +static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) +{ + /* + * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL, + * so ssbd_tif_to_spec_ctrl() just works. + */ + wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); +} + static __always_inline void intel_set_ssb_state(unsigned long tifn) { u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); @@ -321,7 +330,9 @@ static __always_inline void intel_set_ssb_state(unsigned long tifn) static __always_inline void __speculative_store_bypass_update(unsigned long tifn) { - if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) + amd_set_ssb_virt_state(tifn); + else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) amd_set_core_ssb_state(tifn); else intel_set_ssb_state(tifn); From b7b84401576d3858e9573d69d8287e182444f8e9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 May 2018 20:31:44 +0200 Subject: [PATCH 80/88] x86/speculation: Rework speculative_store_bypass_update() commit 0270be3e34efb05a88bc4c422572ece038ef3608 upstream The upcoming support for the virtual SPEC_CTRL MSR on AMD needs to reuse speculative_store_bypass_update() to avoid code duplication. Add an argument for supplying a thread info (TIF) value and create a wrapper speculative_store_bypass_update_current() which is used at the existing call site. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/spec-ctrl.h | 7 ++++++- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kernel/process.c | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 6e2874049afd..82b6c5a0d61e 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -42,6 +42,11 @@ extern void speculative_store_bypass_ht_init(void); static inline void speculative_store_bypass_ht_init(void) { } #endif -extern void speculative_store_bypass_update(void); +extern void speculative_store_bypass_update(unsigned long tif); + +static inline void speculative_store_bypass_update_current(void) +{ + speculative_store_bypass_update(current_thread_info()->flags); +} #endif diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 97987b53acbd..eddbdc843b3f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -597,7 +597,7 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) * mitigation until it is next scheduled. */ if (task == current && update) - speculative_store_bypass_update(); + speculative_store_bypass_update_current(); return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6d9e1ee0f5b2..00a9047539d7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -338,10 +338,10 @@ static __always_inline void __speculative_store_bypass_update(unsigned long tifn intel_set_ssb_state(tifn); } -void speculative_store_bypass_update(void) +void speculative_store_bypass_update(unsigned long tif) { preempt_disable(); - __speculative_store_bypass_update(current_thread_info()->flags); + __speculative_store_bypass_update(tif); preempt_enable(); } From ea99935b633bd4766a679e51b173197c750fb00b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 12 May 2018 00:14:51 +0200 Subject: [PATCH 81/88] x86/bugs: Unify x86_spec_ctrl_{set_guest,restore_host} commit cc69b34989210f067b2c51d5539b5f96ebcc3a01 upstream Function bodies are very similar and are going to grow more almost identical code. Add a bool arg to determine whether SPEC_CTRL is being set for the guest or restored to the host. No functional changes. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/spec-ctrl.h | 33 +++++++++++++++--- arch/x86/kernel/cpu/bugs.c | 58 ++++++++------------------------ 2 files changed, 43 insertions(+), 48 deletions(-) diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 82b6c5a0d61e..9cecbe5e57ee 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -13,10 +13,35 @@ * Takes the guest view of SPEC_CTRL MSR as a parameter and also * the guest's version of VIRT_SPEC_CTRL, if emulated. */ -extern void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, - u64 guest_virt_spec_ctrl); -extern void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, - u64 guest_virt_spec_ctrl); +extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest); + +/** + * x86_spec_ctrl_set_guest - Set speculation control registers for the guest + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +static inline +void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +{ + x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true); +} + +/** + * x86_spec_ctrl_restore_host - Restore host speculation control registers + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +static inline +void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +{ + x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false); +} /* AMD specific Speculative Store Bypass MSR data */ extern u64 x86_amd_ls_cfg_base; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index eddbdc843b3f..92031508be01 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -150,55 +150,25 @@ u64 x86_spec_ctrl_get_default(void) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); -/** - * x86_spec_ctrl_set_guest - Set speculation control registers for the guest - * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL - * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL - * (may get translated to MSR_AMD64_LS_CFG bits) - * - * Avoids writing to the MSR if the content/bits are the same - */ -void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +void +x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { - u64 host = x86_spec_ctrl_base; + struct thread_info *ti = current_thread_info(); + u64 msr, host = x86_spec_ctrl_base; /* Is MSR_SPEC_CTRL implemented ? */ - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - return; + if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { + /* SSBD controlled in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) + host |= ssbd_tif_to_spec_ctrl(ti->flags); - /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) - host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - - if (host != guest_spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl); + if (host != guest_spec_ctrl) { + msr = setguest ? guest_spec_ctrl : host; + wrmsrl(MSR_IA32_SPEC_CTRL, msr); + } + } } -EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest); - -/** - * x86_spec_ctrl_restore_host - Restore host speculation control registers - * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL - * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL - * (may get translated to MSR_AMD64_LS_CFG bits) - * - * Avoids writing to the MSR if the content/bits are the same - */ -void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) -{ - u64 host = x86_spec_ctrl_base; - - /* Is MSR_SPEC_CTRL implemented ? */ - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - return; - - /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) - host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - - if (host != guest_spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, host); -} -EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host); +EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); static void x86_amd_ssb_disable(void) { From 599288ec9e20d9772e6e8a27aeae021f018c7336 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 12 May 2018 20:49:16 +0200 Subject: [PATCH 82/88] x86/bugs: Expose x86_spec_ctrl_base directly commit fa8ac4988249c38476f6ad678a4848a736373403 upstream x86_spec_ctrl_base is the system wide default value for the SPEC_CTRL MSR. x86_spec_ctrl_get_default() returns x86_spec_ctrl_base and was intended to prevent modification to that variable. Though the variable is read only after init and globaly visible already. Remove the function and export the variable instead. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 16 +++++----------- arch/x86/include/asm/spec-ctrl.h | 3 --- arch/x86/kernel/cpu/bugs.c | 11 +---------- 3 files changed, 6 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index bc258e644e5e..8d9deec00de9 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -217,16 +217,7 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; -/* - * The Intel specification for the SPEC_CTRL MSR requires that we - * preserve any already set reserved bits at boot time (e.g. for - * future additions that this kernel is not currently aware of). - * We then set any additional mitigation bits that we want - * ourselves and always use this as the base for SPEC_CTRL. - * We also use this when handling guest entry/exit as below. - */ extern void x86_spec_ctrl_set(u64); -extern u64 x86_spec_ctrl_get_default(void); /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { @@ -278,6 +269,9 @@ static inline void indirect_branch_prediction_barrier(void) alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); } +/* The Intel SPEC CTRL MSR base value cache */ +extern u64 x86_spec_ctrl_base; + /* * With retpoline, we must use IBRS to restrict branch prediction * before calling into firmware. @@ -286,7 +280,7 @@ static inline void indirect_branch_prediction_barrier(void) */ #define firmware_restrict_branch_speculation_start() \ do { \ - u64 val = x86_spec_ctrl_get_default() | SPEC_CTRL_IBRS; \ + u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ \ preempt_disable(); \ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ @@ -295,7 +289,7 @@ do { \ #define firmware_restrict_branch_speculation_end() \ do { \ - u64 val = x86_spec_ctrl_get_default(); \ + u64 val = x86_spec_ctrl_base; \ \ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 9cecbe5e57ee..763d49710329 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -47,9 +47,6 @@ void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) extern u64 x86_amd_ls_cfg_base; extern u64 x86_amd_ls_cfg_ssbd_mask; -/* The Intel SPEC CTRL MSR base value cache */ -extern u64 x86_spec_ctrl_base; - static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) { BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 92031508be01..47b7f4f6b5ac 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -35,6 +35,7 @@ static void __init ssb_select_mitigation(void); * writes to SPEC_CTRL contain whatever reserved bits have been set. */ u64 __ro_after_init x86_spec_ctrl_base; +EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); /* * The vendor and possibly platform specific bits which can be modified in @@ -140,16 +141,6 @@ void x86_spec_ctrl_set(u64 val) } EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); -u64 x86_spec_ctrl_get_default(void) -{ - u64 msrval = x86_spec_ctrl_base; - - if (static_cpu_has(X86_FEATURE_SPEC_CTRL)) - msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags); - return msrval; -} -EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default); - void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { From ec90464d96c50f90bfe1bde6dea748a6c962313c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 12 May 2018 20:53:14 +0200 Subject: [PATCH 83/88] x86/bugs: Remove x86_spec_ctrl_set() commit 4b59bdb569453a60b752b274ca61f009e37f4dae upstream x86_spec_ctrl_set() is only used in bugs.c and the extra mask checks there provide no real value as both call sites can just write x86_spec_ctrl_base to MSR_SPEC_CTRL. x86_spec_ctrl_base is valid and does not need any extra masking or checking. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 2 -- arch/x86/kernel/cpu/bugs.c | 13 ++----------- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8d9deec00de9..8b38df98548e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -217,8 +217,6 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; -extern void x86_spec_ctrl_set(u64); - /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 47b7f4f6b5ac..82a99d00563e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -132,15 +132,6 @@ static const char *spectre_v2_strings[] = { static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; -void x86_spec_ctrl_set(u64 val) -{ - if (val & x86_spec_ctrl_mask) - WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val); - else - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val); -} -EXPORT_SYMBOL_GPL(x86_spec_ctrl_set); - void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { @@ -502,7 +493,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) case X86_VENDOR_INTEL: x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD; - x86_spec_ctrl_set(SPEC_CTRL_SSBD); + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); break; case X86_VENDOR_AMD: x86_amd_ssb_disable(); @@ -614,7 +605,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask); + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); From 0ec827f974e198c609c2f258a5a1f11f9af48bb2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 12 May 2018 20:10:00 +0200 Subject: [PATCH 84/88] x86/bugs: Rework spec_ctrl base and mask logic commit be6fcb5478e95bb1c91f489121238deb3abca46a upstream x86_spec_ctrL_mask is intended to mask out bits from a MSR_SPEC_CTRL value which are not to be modified. However the implementation is not really used and the bitmask was inverted to make a check easier, which was removed in "x86/bugs: Remove x86_spec_ctrl_set()" Aside of that it is missing the STIBP bit if it is supported by the platform, so if the mask would be used in x86_virt_spec_ctrl() then it would prevent a guest from setting STIBP. Add the STIBP bit if supported and use the mask in x86_virt_spec_ctrl() to sanitize the value which is supplied by the guest. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 82a99d00563e..2ae358668ab8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -41,7 +41,7 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); * The vendor and possibly platform specific bits which can be modified in * x86_spec_ctrl_base. */ -static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS; +static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; /* * AMD specific MSR info for Speculative Store Bypass control. @@ -67,6 +67,10 @@ void __init check_bugs(void) if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + /* Allow STIBP in MSR_SPEC_CTRL if supported */ + if (boot_cpu_has(X86_FEATURE_STIBP)) + x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; + /* Select the proper spectre mitigation before patching alternatives */ spectre_v2_select_mitigation(); @@ -135,18 +139,26 @@ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { + u64 msrval, guestval, hostval = x86_spec_ctrl_base; struct thread_info *ti = current_thread_info(); - u64 msr, host = x86_spec_ctrl_base; /* Is MSR_SPEC_CTRL implemented ? */ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { + /* + * Restrict guest_spec_ctrl to supported values. Clear the + * modifiable bits in the host base value and or the + * modifiable bits from the guest value. + */ + guestval = hostval & ~x86_spec_ctrl_mask; + guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; + /* SSBD controlled in MSR_SPEC_CTRL */ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) - host |= ssbd_tif_to_spec_ctrl(ti->flags); + hostval |= ssbd_tif_to_spec_ctrl(ti->flags); - if (host != guest_spec_ctrl) { - msr = setguest ? guest_spec_ctrl : host; - wrmsrl(MSR_IA32_SPEC_CTRL, msr); + if (hostval != guestval) { + msrval = setguest ? guestval : hostval; + wrmsrl(MSR_IA32_SPEC_CTRL, msrval); } } } @@ -492,7 +504,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD; + x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); break; case X86_VENDOR_AMD: From b0ef8c72b3d70505ba7fd72af6b1e3fc9b3ae9bc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 May 2018 20:42:48 +0200 Subject: [PATCH 85/88] x86/speculation, KVM: Implement support for VIRT_SPEC_CTRL/LS_CFG commit 47c61b3955cf712cadfc25635bf9bc174af030ea upstream Add the necessary logic for supporting the emulated VIRT_SPEC_CTRL MSR to x86_virt_spec_ctrl(). If either X86_FEATURE_LS_CFG_SSBD or X86_FEATURE_VIRT_SPEC_CTRL is set then use the new guest_virt_spec_ctrl argument to check whether the state must be modified on the host. The update reuses speculative_store_bypass_update() so the ZEN-specific sibling coordination can be reused. Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/spec-ctrl.h | 6 ++++++ arch/x86/kernel/cpu/bugs.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 763d49710329..ae7c2c5cd7f0 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -53,6 +53,12 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) +{ + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); +} + static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) { return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2ae358668ab8..86af9b1b049d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -161,6 +161,36 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) wrmsrl(MSR_IA32_SPEC_CTRL, msrval); } } + + /* + * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update + * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported. + */ + if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) && + !static_cpu_has(X86_FEATURE_VIRT_SSBD)) + return; + + /* + * If the host has SSBD mitigation enabled, force it in the host's + * virtual MSR value. If its not permanently enabled, evaluate + * current's TIF_SSBD thread flag. + */ + if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE)) + hostval = SPEC_CTRL_SSBD; + else + hostval = ssbd_tif_to_spec_ctrl(ti->flags); + + /* Sanitize the guest value */ + guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD; + + if (hostval != guestval) { + unsigned long tif; + + tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : + ssbd_spec_ctrl_to_tif(hostval); + + speculative_store_bypass_update(tif); + } } EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); From b965592a07a248ef254d9d421bd34a6b548db21f Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 10 May 2018 22:06:39 +0200 Subject: [PATCH 86/88] KVM: SVM: Implement VIRT_SPEC_CTRL support for SSBD commit bc226f07dcd3c9ef0b7f6236fe356ea4a9cb4769 upstream Expose the new virtualized architectural mechanism, VIRT_SSBD, for using speculative store bypass disable (SSBD) under SVM. This will allow guests to use SSBD on hardware that uses non-architectural mechanisms for enabling SSBD. [ tglx: Folded the migration fixup from Paolo Bonzini ] Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kernel/cpu/common.c | 3 ++- arch/x86/kvm/cpuid.c | 11 +++++++++-- arch/x86/kvm/cpuid.h | 9 +++++++++ arch/x86/kvm/svm.c | 21 +++++++++++++++++++-- arch/x86/kvm/vmx.c | 18 +++++++++++++++--- arch/x86/kvm/x86.c | 13 ++++--------- 7 files changed, 59 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 20cfeeb681c6..7598a6c26f76 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -864,7 +864,7 @@ struct kvm_x86_ops { int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ bool (*cpu_has_accelerated_tpr)(void); - bool (*cpu_has_high_real_mode_segbase)(void); + bool (*has_emulated_msr)(int index); void (*cpuid_update)(struct kvm_vcpu *vcpu); int (*vm_init)(struct kvm *kvm); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 945e84105f8d..40fc748f60f6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -735,7 +735,8 @@ static void init_speculation_control(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) set_cpu_cap(c, X86_FEATURE_STIBP); - if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD)) + if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) || + cpu_has(c, X86_FEATURE_VIRT_SSBD)) set_cpu_cap(c, X86_FEATURE_SSBD); if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 910c2dbb38de..a69f18d4676c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -357,7 +357,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(AMD_IBPB) | F(AMD_IBRS); + F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -618,13 +618,20 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; - /* IBRS and IBPB aren't necessarily present in hardware cpuid */ + /* + * IBRS, IBPB and VIRT_SSBD aren't necessarily present in + * hardware cpuid + */ if (boot_cpu_has(X86_FEATURE_AMD_IBPB)) entry->ebx |= F(AMD_IBPB); if (boot_cpu_has(X86_FEATURE_AMD_IBRS)) entry->ebx |= F(AMD_IBRS); + if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) + entry->ebx |= F(VIRT_SSBD); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + entry->ebx |= F(VIRT_SSBD); break; } case 0x80000019: diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index eb47c37db15f..c38369781239 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -190,6 +190,15 @@ static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES)); } +static inline bool guest_cpuid_has_virt_ssbd(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + return best && (best->ebx & bit(X86_FEATURE_VIRT_SSBD)); +} + + /* * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 57c96f165ac4..a27f9e442ffc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3557,6 +3557,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = svm->spec_ctrl; break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has_virt_ssbd(vcpu)) + return 1; + + msr_info->data = svm->virt_spec_ctrl; + break; case MSR_IA32_UCODE_REV: msr_info->data = 0x01000065; break; @@ -3691,6 +3698,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has_virt_ssbd(vcpu)) + return 1; + + if (data & ~SPEC_CTRL_SSBD) + return 1; + + svm->virt_spec_ctrl = data; + break; case MSR_STAR: svm->vmcb->save.star = data; break; @@ -5150,7 +5167,7 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static bool svm_has_high_real_mode_segbase(void) +static bool svm_has_emulated_msr(int index) { return true; } @@ -5467,7 +5484,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .hardware_enable = svm_hardware_enable, .hardware_disable = svm_hardware_disable, .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, - .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase, + .has_emulated_msr = svm_has_emulated_msr, .vcpu_create = svm_create_vcpu, .vcpu_free = svm_free_vcpu, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f1d158a268a2..d92523afb425 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8691,9 +8691,21 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) } } -static bool vmx_has_high_real_mode_segbase(void) +static bool vmx_has_emulated_msr(int index) { - return enable_unrestricted_guest || emulate_invalid_guest_state; + switch (index) { + case MSR_IA32_SMBASE: + /* + * We cannot do SMM unless we can run the guest in big + * real mode. + */ + return enable_unrestricted_guest || emulate_invalid_guest_state; + case MSR_AMD64_VIRT_SPEC_CTRL: + /* This is AMD only. */ + return false; + default: + return true; + } } static bool vmx_mpx_supported(void) @@ -11346,7 +11358,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .hardware_enable = hardware_enable, .hardware_disable = hardware_disable, .cpu_has_accelerated_tpr = report_flexpriority, - .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, + .has_emulated_msr = vmx_has_emulated_msr, .vcpu_create = vmx_create_vcpu, .vcpu_free = vmx_free_vcpu, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3aaaf305420d..a0cb85f30c94 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1002,6 +1002,7 @@ static u32 emulated_msrs[] = { MSR_IA32_MCG_CTL, MSR_IA32_MCG_EXT_CTL, MSR_IA32_SMBASE, + MSR_AMD64_VIRT_SPEC_CTRL, }; static unsigned num_emulated_msrs; @@ -2664,7 +2665,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) * fringe case that is not enabled except via specific settings * of the module parameters. */ - r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); + r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE); break; case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; @@ -4226,14 +4227,8 @@ static void kvm_init_msr_list(void) num_msrs_to_save = j; for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { - switch (emulated_msrs[i]) { - case MSR_IA32_SMBASE: - if (!kvm_x86_ops->cpu_has_high_real_mode_segbase()) - continue; - break; - default: - break; - } + if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i])) + continue; if (j < i) emulated_msrs[j] = emulated_msrs[i]; From 3394ef1a7efc08e3c185ac2446f06284847ccb37 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 16 May 2018 23:18:09 -0400 Subject: [PATCH 87/88] x86/bugs: Rename SSBD_NO to SSB_NO commit 240da953fcc6a9008c92fae5b1f727ee5ed167ab upstream The "336996 Speculative Execution Side Channel Mitigations" from May defines this as SSB_NO, hence lets sync-up. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 2 +- arch/x86/kernel/cpu/common.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 9fd9dcfe179e..1ec13e253174 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -63,7 +63,7 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ -#define ARCH_CAP_SSBD_NO (1 << 4) /* +#define ARCH_CAP_SSB_NO (1 << 4) /* * Not susceptible to Speculative Store Bypass * attack, so no Speculative Store Bypass * control required. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 40fc748f60f6..b0fd028b2eee 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -926,7 +926,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_SSBD_NO)) + !(ia32_cap & ARCH_CAP_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (x86_match_cpu(cpu_no_speculation)) From 2272cdd5d5bf42e3721430ae6076656a42043c34 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 May 2018 16:58:04 +0200 Subject: [PATCH 88/88] Linux 4.9.102 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7d7bda23db8f..d84c39c290f7 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 101 +SUBLEVEL = 102 EXTRAVERSION = NAME = Roaring Lionus