From 3eaa5de1101627899c0cfb986588a969969fdabc Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 20 Mar 2018 19:29:51 +1100 Subject: [PATCH 01/68] MIPS: ralink: Remove ralink_halt() commit 891731f6a5dbe508d12443175a7e166a2fba616a upstream. ralink_halt() does nothing that machine_halt() doesn't already do, so it adds no value. It actually causes incorrect behaviour due to the "unreachable()" at the end. This tells the compiler that the end of the function will never be reached, which isn't true. The compiler responds by not adding a 'return' instruction, so control simply moves on to whatever bytes come afterwards in memory. In my tested, that was the ralink_restart() function. This means that an attempt to 'halt' the machine would actually cause a reboot. So remove ralink_halt() so that a 'halt' really does halt. Fixes: c06e836ada59 ("MIPS: ralink: adds reset code") Signed-off-by: NeilBrown Cc: John Crispin Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: # 3.9+ Patchwork: https://patchwork.linux-mips.org/patch/18851/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/ralink/reset.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/mips/ralink/reset.c b/arch/mips/ralink/reset.c index 64543d66e76b..e9531fea23a2 100644 --- a/arch/mips/ralink/reset.c +++ b/arch/mips/ralink/reset.c @@ -96,16 +96,9 @@ static void ralink_restart(char *command) unreachable(); } -static void ralink_halt(void) -{ - local_irq_disable(); - unreachable(); -} - static int __init mips_reboot_setup(void) { _machine_restart = ralink_restart; - _machine_halt = ralink_halt; return 0; } From 055c49dcf10fe7db6087037abb87728e7dc84192 Mon Sep 17 00:00:00 2001 From: Michael Nosthoff Date: Fri, 9 Mar 2018 10:02:45 +0100 Subject: [PATCH 02/68] iio: st_pressure: st_accel: pass correct platform data to init commit 8b438686a001db64c21782d04ef68111e53c45d9 upstream. Commit 7383d44b added a pointer pdata which get set to the default platform_data when non was defined in the device. But it did not pass this pointer to the st_sensors_init_sensor call but still used the maybe uninitialized platform_data from dev. This breaks initialization when no platform_data is given and the optional st,drdy-int-pin devicetree option is not set. This commit fixes this. Cc: stable@vger.kernel.org Fixes: 7383d44b ("iio: st_pressure: st_accel: Initialise sensor platform data properly") Signed-off-by: Michael Nosthoff Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/st_accel_core.c | 2 +- drivers/iio/pressure/st_pressure_core.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index fe94415a0bc7..32cd64c2ee06 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -858,7 +858,7 @@ int st_accel_common_probe(struct iio_dev *indio_dev) if (!pdata) pdata = (struct st_sensors_platform_data *)&default_accel_pdata; - err = st_sensors_init_sensor(indio_dev, adata->dev->platform_data); + err = st_sensors_init_sensor(indio_dev, pdata); if (err < 0) goto st_accel_power_off; diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c index bec60299b6ec..3458418d88bc 100644 --- a/drivers/iio/pressure/st_pressure_core.c +++ b/drivers/iio/pressure/st_pressure_core.c @@ -678,7 +678,7 @@ int st_press_common_probe(struct iio_dev *indio_dev) if (!pdata && press_data->sensor_settings->drdy_irq.addr) pdata = (struct st_sensors_platform_data *)&default_press_pdata; - err = st_sensors_init_sensor(indio_dev, press_data->dev->platform_data); + err = st_sensors_init_sensor(indio_dev, pdata); if (err < 0) goto st_press_power_off; From b1d25da581e5f36c5a6cea0fca7db3bb2b6573ad Mon Sep 17 00:00:00 2001 From: Kirill Marinushkin Date: Mon, 19 Mar 2018 07:11:08 +0100 Subject: [PATCH 03/68] ALSA: usb-audio: Fix parsing descriptor of UAC2 processing unit commit a6618f4aedb2b60932d766bd82ae7ce866e842aa upstream. Currently, the offsets in the UAC2 processing unit descriptor are calculated incorrectly. It causes an issue when connecting the device which provides such a feature: ~~~~ [84126.724420] usb 1-1.3.1: invalid Processing Unit descriptor (id 18) ~~~~ After this patch is applied, the UAC2 processing unit inits w/o this error. Fixes: 23caaf19b11e ("ALSA: usb-mixer: Add support for Audio Class v2.0") Signed-off-by: Kirill Marinushkin Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/audio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/usb/audio.h b/include/uapi/linux/usb/audio.h index d2314be4f0c0..19f9dc2c06f6 100644 --- a/include/uapi/linux/usb/audio.h +++ b/include/uapi/linux/usb/audio.h @@ -369,7 +369,7 @@ static inline __u8 uac_processing_unit_bControlSize(struct uac_processing_unit_d { return (protocol == UAC_VERSION_1) ? desc->baSourceID[desc->bNrInPins + 4] : - desc->baSourceID[desc->bNrInPins + 6]; + 2; /* in UAC2, this value is constant */ } static inline __u8 *uac_processing_unit_bmControls(struct uac_processing_unit_descriptor *desc, @@ -377,7 +377,7 @@ static inline __u8 *uac_processing_unit_bmControls(struct uac_processing_unit_de { return (protocol == UAC_VERSION_1) ? &desc->baSourceID[desc->bNrInPins + 5] : - &desc->baSourceID[desc->bNrInPins + 7]; + &desc->baSourceID[desc->bNrInPins + 6]; } static inline __u8 uac_processing_unit_iProcessing(struct uac_processing_unit_descriptor *desc, From d44f3ad7b06b84cff1c9f74efa3e4305c7b3e177 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 22 Mar 2018 08:56:06 +0100 Subject: [PATCH 04/68] ALSA: aloop: Sync stale timer before release commit 67a01afaf3d34893cf7d2ea19b34555d6abb7cb0 upstream. The aloop driver tries to stop the pending timer via timer_del() in the trigger callback and in the close callback. The former is correct, as it's an atomic operation, while the latter expects that the timer gets really removed and proceeds the resource releases after that. But timer_del() doesn't synchronize, hence the running timer may still access the released resources. A similar situation can be also seen in the prepare callback after trigger(STOP) where the prepare tries to re-initialize the things while a timer is still running. The problems like the above are seen indirectly in some syzkaller reports (although it's not 100% clear whether this is the only cause, as the race condition is quite narrow and not always easy to trigger). For addressing these issues, this patch adds the explicit alls of timer_del_sync() in some places, so that the pending timer is properly killed / synced. Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/drivers/aloop.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index cbd20cb8ca11..610d36bce2c2 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -192,6 +192,11 @@ static inline void loopback_timer_stop(struct loopback_pcm *dpcm) dpcm->timer.expires = 0; } +static inline void loopback_timer_stop_sync(struct loopback_pcm *dpcm) +{ + del_timer_sync(&dpcm->timer); +} + #define CABLE_VALID_PLAYBACK (1 << SNDRV_PCM_STREAM_PLAYBACK) #define CABLE_VALID_CAPTURE (1 << SNDRV_PCM_STREAM_CAPTURE) #define CABLE_VALID_BOTH (CABLE_VALID_PLAYBACK|CABLE_VALID_CAPTURE) @@ -326,6 +331,8 @@ static int loopback_prepare(struct snd_pcm_substream *substream) struct loopback_cable *cable = dpcm->cable; int bps, salign; + loopback_timer_stop_sync(dpcm); + salign = (snd_pcm_format_width(runtime->format) * runtime->channels) / 8; bps = salign * runtime->rate; @@ -745,7 +752,7 @@ static int loopback_close(struct snd_pcm_substream *substream) struct loopback *loopback = substream->private_data; struct loopback_pcm *dpcm = substream->runtime->private_data; - loopback_timer_stop(dpcm); + loopback_timer_stop_sync(dpcm); mutex_lock(&loopback->cable_lock); free_cable(substream); mutex_unlock(&loopback->cable_lock); From 789697007799297173bfb388cc7a71417b31273a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 22 Mar 2018 10:40:27 +0100 Subject: [PATCH 05/68] ALSA: aloop: Fix access to not-yet-ready substream via cable commit 8e6b1a72a75bb5067ccb6b56d8ca4aa3a300a64e upstream. In loopback_open() and loopback_close(), we assign and release the substream object to the corresponding cable in a racy way. It's neither locked nor done in the right position. The open callback assigns the substream before its preparation finishes, hence the other side of the cable may pick it up, which may lead to the invalid memory access. This patch addresses these: move the assignment to the end of the open callback, and wrap with cable->lock for avoiding concurrent accesses. Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/drivers/aloop.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index 610d36bce2c2..dc91002d1e0d 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -666,7 +666,9 @@ static void free_cable(struct snd_pcm_substream *substream) return; if (cable->streams[!substream->stream]) { /* other stream is still alive */ + spin_lock_irq(&cable->lock); cable->streams[substream->stream] = NULL; + spin_unlock_irq(&cable->lock); } else { /* free the cable */ loopback->cables[substream->number][dev] = NULL; @@ -706,7 +708,6 @@ static int loopback_open(struct snd_pcm_substream *substream) loopback->cables[substream->number][dev] = cable; } dpcm->cable = cable; - cable->streams[substream->stream] = dpcm; snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS); @@ -738,6 +739,11 @@ static int loopback_open(struct snd_pcm_substream *substream) runtime->hw = loopback_pcm_hardware; else runtime->hw = cable->hw; + + spin_lock_irq(&cable->lock); + cable->streams[substream->stream] = dpcm; + spin_unlock_irq(&cable->lock); + unlock: if (err < 0) { free_cable(substream); From ff0b03a460b12bcafa0cccd6c2dd8ce89340d986 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 17 Mar 2018 22:40:18 +0100 Subject: [PATCH 06/68] ALSA: hda/realtek - Always immediately update mute LED with pin VREF commit e40bdb03d3cd7da66bd0bc1e40cbcfb49351265c upstream. Some HP laptops have a mute mute LED controlled by a pin VREF. The Realtek codec driver updates the VREF via vmaster hook by calling snd_hda_set_pin_ctl_cache(). This works fine as long as the driver is running in a normal mode. However, when the VREF change happens during the codec being in runtime PM suspend, the regmap access will skip and postpone the actual register change. This ends up with the unchanged LED status until the next runtime PM resume even if you change the Master mute switch. (Interestingly, the machine keeps the LED status even after the codec goes into D3 -- but it's another story.) For improving this usability, let the driver temporarily powering up / down only during the pin VREF change. This can be achieved easily by wrapping the call with snd_hda_power_up_pm() / *_down_pm(). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199073 Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index dae0021f39c3..e2230bed7409 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3261,8 +3261,12 @@ static void alc269_fixup_mic_mute_hook(void *private_data, int enabled) pinval = snd_hda_codec_get_pin_target(codec, spec->mute_led_nid); pinval &= ~AC_PINCTL_VREFEN; pinval |= enabled ? AC_PINCTL_VREF_HIZ : AC_PINCTL_VREF_80; - if (spec->mute_led_nid) + if (spec->mute_led_nid) { + /* temporarily power up/down for setting VREF */ + snd_hda_power_up_pm(codec); snd_hda_set_pin_ctl_cache(codec, spec->mute_led_nid, pinval); + snd_hda_power_down_pm(codec); + } } /* Make sure the led works even in runtime suspend */ From d8963938399a4b066448dd2a8dd100fb4e91fef8 Mon Sep 17 00:00:00 2001 From: Evgeniy Didin Date: Wed, 14 Mar 2018 22:30:51 +0300 Subject: [PATCH 07/68] mmc: dw_mmc: fix falling from idmac to PIO mode when dw_mci_reset occurs commit 47b7de2f6c18f75d1f2716efe752cba43f32a626 upstream. It was found that in IDMAC mode after soft-reset driver switches to PIO mode. That's what happens in case of DTO timeout overflow calculation failure: 1. soft-reset is called 2. driver restarts dma 3. descriptors states are checked, one of descriptor is owned by the IDMAC. 4. driver can't use DMA and then switches to PIO mode. Failure was already fixed in: https://www.spinics.net/lists/linux-mmc/msg48125.html. Behaviour while soft-reset is not something we except or even want to happen. So we switch from dw_mci_idmac_reset to dw_mci_idmac_init, so descriptors are cleaned before starting dma. And while at it explicitly zero des0 which otherwise might contain garbage as being allocated by dmam_alloc_coherent(). Signed-off-by: Evgeniy Didin Cc: Jaehoon Chung Cc: Ulf Hansson Cc: Andy Shevchenko Cc: Jisheng Zhang Cc: Shawn Lin Cc: Alexey Brodkin Cc: Eugeniy Paltsev Cc: linux-snps-arc@lists.infradead.org Cc: # 4.4+ Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/dw_mmc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index f81f4175f49a..d382dbd44635 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -490,6 +490,7 @@ static int dw_mci_idmac_init(struct dw_mci *host) (sizeof(struct idmac_desc_64addr) * (i + 1))) >> 32; /* Initialize reserved and buffer size fields to "0" */ + p->des0 = 0; p->des1 = 0; p->des2 = 0; p->des3 = 0; @@ -512,6 +513,7 @@ static int dw_mci_idmac_init(struct dw_mci *host) i++, p++) { p->des3 = cpu_to_le32(host->sg_dma + (sizeof(struct idmac_desc) * (i + 1))); + p->des0 = 0; p->des1 = 0; } @@ -2878,8 +2880,8 @@ static bool dw_mci_reset(struct dw_mci *host) } if (host->use_dma == TRANS_MODE_IDMAC) - /* It is also recommended that we reset and reprogram idmac */ - dw_mci_idmac_reset(host); + /* It is also required that we reinit idmac */ + dw_mci_idmac_init(host); ret = true; From d2327a25e43a7843d65f5bab3daeb833c5caf63f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 2 Mar 2018 11:36:33 +0100 Subject: [PATCH 08/68] PCI: Add function 1 DMA alias quirk for Highpoint RocketRAID 644L commit 1903be8222b7c278ca897c129ce477c1dd6403a8 upstream. The Highpoint RocketRAID 644L uses a Marvel 88SE9235 controller, as with other Marvel controllers this needs a function 1 DMA alias quirk. Note the RocketRAID 642L uses the same Marvel 88SE9235 controller and already is listed with a function 1 DMA alias quirk. Cc: stable@vger.kernel.org BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1534106 Signed-off-by: Hans de Goede Acked-by: Bjorn Helgaas Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 4c9fb8b323e8..fb177dc576d6 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3877,6 +3877,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9230, quirk_dma_func1_alias); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0642, quirk_dma_func1_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0645, + quirk_dma_func1_alias); /* https://bugs.gentoo.org/show_bug.cgi?id=497630 */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB388_ESD, From 3ba5143bbbed8bd0ab2ee85a46f1165ff3039232 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 2 Mar 2018 11:36:32 +0100 Subject: [PATCH 09/68] ahci: Add PCI-id for the Highpoint Rocketraid 644L card commit 28b2182dad43f6f8fcbd167539a26714fd12bd64 upstream. Like the Highpoint Rocketraid 642L and cards using a Marvel 88SE9235 controller in general, this RAID card also supports AHCI mode and short of a custom driver, this is the only way to make it work under Linux. Note that even though the card is called to 644L, it has a product-id of 0x0645. Cc: stable@vger.kernel.org BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1534106 Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo Acked-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 9b46ef4c851e..4d4b5f607b81 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -539,7 +539,9 @@ static const struct pci_device_id ahci_pci_tbl[] = { .driver_data = board_ahci_yes_fbs }, { PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x9230), .driver_data = board_ahci_yes_fbs }, - { PCI_DEVICE(PCI_VENDOR_ID_TTI, 0x0642), + { PCI_DEVICE(PCI_VENDOR_ID_TTI, 0x0642), /* highpoint rocketraid 642L */ + .driver_data = board_ahci_yes_fbs }, + { PCI_DEVICE(PCI_VENDOR_ID_TTI, 0x0645), /* highpoint rocketraid 644L */ .driver_data = board_ahci_yes_fbs }, /* Promise */ From beb9ece1db9cabbe01b297e7df2e4d6769dad8bc Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 8 Feb 2018 14:43:35 +0100 Subject: [PATCH 10/68] clk: bcm2835: Fix ana->maskX definitions commit 49012d1bf5f78782d398adb984a080a88ba42965 upstream. ana->maskX values are already '~'-ed in bcm2835_pll_set_rate(). Remove the '~' in the definition to fix ANA setup. Note that this commit fixes a long standing bug preventing one from using an HDMI display if it's plugged after the FW has booted Linux. This is because PLLH is used by the HDMI encoder to generate the pixel clock. Fixes: 41691b8862e2 ("clk: bcm2835: Add support for programming the audio domain clocks") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Eric Anholt Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/bcm/clk-bcm2835.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index 2acaa77ad482..48e7040adb63 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -401,17 +401,17 @@ struct bcm2835_pll_ana_bits { static const struct bcm2835_pll_ana_bits bcm2835_ana_default = { .mask0 = 0, .set0 = 0, - .mask1 = (u32)~(A2W_PLL_KI_MASK | A2W_PLL_KP_MASK), + .mask1 = A2W_PLL_KI_MASK | A2W_PLL_KP_MASK, .set1 = (2 << A2W_PLL_KI_SHIFT) | (8 << A2W_PLL_KP_SHIFT), - .mask3 = (u32)~A2W_PLL_KA_MASK, + .mask3 = A2W_PLL_KA_MASK, .set3 = (2 << A2W_PLL_KA_SHIFT), .fb_prediv_mask = BIT(14), }; static const struct bcm2835_pll_ana_bits bcm2835_ana_pllh = { - .mask0 = (u32)~(A2W_PLLH_KA_MASK | A2W_PLLH_KI_LOW_MASK), + .mask0 = A2W_PLLH_KA_MASK | A2W_PLLH_KI_LOW_MASK, .set0 = (2 << A2W_PLLH_KA_SHIFT) | (2 << A2W_PLLH_KI_LOW_SHIFT), - .mask1 = (u32)~(A2W_PLLH_KI_HIGH_MASK | A2W_PLLH_KP_MASK), + .mask1 = A2W_PLLH_KI_HIGH_MASK | A2W_PLLH_KP_MASK, .set1 = (6 << A2W_PLLH_KP_SHIFT), .mask3 = 0, .set3 = 0, From 8f0dd27b3db01e7785038ea9a711f210067ee6ae Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 8 Feb 2018 14:43:36 +0100 Subject: [PATCH 11/68] clk: bcm2835: Protect sections updating shared registers commit 7997f3b2df751aab0b8e60149b226a32966c41ac upstream. CM_PLLx and A2W_XOSC_CTRL registers are accessed by different clock handlers and must be accessed with ->regs_lock held. Update the sections where this protection is missing. Fixes: 41691b8862e2 ("clk: bcm2835: Add support for programming the audio domain clocks") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Eric Anholt Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/bcm/clk-bcm2835.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index 48e7040adb63..abdc149941e2 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -566,8 +566,10 @@ static int bcm2835_pll_on(struct clk_hw *hw) ~A2W_PLL_CTRL_PWRDN); /* Take the PLL out of reset. */ + spin_lock(&cprman->regs_lock); cprman_write(cprman, data->cm_ctrl_reg, cprman_read(cprman, data->cm_ctrl_reg) & ~CM_PLL_ANARST); + spin_unlock(&cprman->regs_lock); /* Wait for the PLL to lock. */ timeout = ktime_add_ns(ktime_get(), LOCK_TIMEOUT_NS); @@ -644,9 +646,11 @@ static int bcm2835_pll_set_rate(struct clk_hw *hw, } /* Unmask the reference clock from the oscillator. */ + spin_lock(&cprman->regs_lock); cprman_write(cprman, A2W_XOSC_CTRL, cprman_read(cprman, A2W_XOSC_CTRL) | data->reference_enable_mask); + spin_unlock(&cprman->regs_lock); if (do_ana_setup_first) bcm2835_pll_write_ana(cprman, data->ana_reg_base, ana); From bdbd9153899061cc9aacfc376b26a2224850c61a Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sat, 17 Feb 2018 21:05:04 +0800 Subject: [PATCH 12/68] clk: sunxi-ng: a31: Fix CLK_OUT_* clock ops commit 5682e268350f9eccdbb04006605c1b7068a7b323 upstream. When support for the A31/A31s CCU was first added, the clock ops for the CLK_OUT_* clocks was set to the wrong type. The clocks are MP-type, but the ops was set for div (M) clocks. This went unnoticed until now. This was because while they are different clocks, their data structures aligned in a way that ccu_div_ops would access the second ccu_div_internal and ccu_mux_internal structures, which were valid, if not incorrect. Furthermore, the use of these CLK_OUT_* was for feeding a precise 32.768 kHz clock signal to the WiFi chip. This was achievable by using the parent with the same clock rate and no divider. So the incorrect divider setting did not affect this usage. Commit 946797aa3f08 ("clk: sunxi-ng: Support fixed post-dividers on MP style clocks") added a new field to the ccu_mp structure, which broke the aforementioned alignment. Now the system crashes as div_ops tries to look up a nonexistent table. Reported-by: Philipp Rossak Tested-by: Philipp Rossak Fixes: c6e6c96d8fa6 ("clk: sunxi-ng: Add A31/A31s clocks") Cc: Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Signed-off-by: Greg Kroah-Hartman --- drivers/clk/sunxi-ng/ccu-sun6i-a31.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c index 9fe0939c1273..6ea5401e6881 100644 --- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c +++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c @@ -750,7 +750,7 @@ static struct ccu_mp out_a_clk = { .features = CCU_FEATURE_FIXED_PREDIV, .hw.init = CLK_HW_INIT_PARENTS("out-a", clk_out_parents, - &ccu_div_ops, + &ccu_mp_ops, 0), }, }; @@ -771,7 +771,7 @@ static struct ccu_mp out_b_clk = { .features = CCU_FEATURE_FIXED_PREDIV, .hw.init = CLK_HW_INIT_PARENTS("out-b", clk_out_parents, - &ccu_div_ops, + &ccu_mp_ops, 0), }, }; @@ -792,7 +792,7 @@ static struct ccu_mp out_c_clk = { .features = CCU_FEATURE_FIXED_PREDIV, .hw.init = CLK_HW_INIT_PARENTS("out-c", clk_out_parents, - &ccu_div_ops, + &ccu_mp_ops, 0), }, }; From dd62bc3058313ef183aeb22d9de2297cb486dd8d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 15 Mar 2018 17:02:34 +0100 Subject: [PATCH 13/68] Bluetooth: btusb: Fix quirk for Atheros 1525/QCA6174 commit f44cb4b19ed40b655c2d422c9021ab2c2625adb6 upstream. The Atheros 1525/QCA6174 BT doesn't seem working properly on the recent kernels, as it tries to load a wrong firmware ar3k/AthrBT_0x00000200.dfu and it fails. This seems to have been a problem for some time, and the known workaround is to apply BTUSB_QCA_ROM quirk instead of BTUSB_ATH3012. The device in question is: T: Bus=01 Lev=01 Prnt=01 Port=09 Cnt=03 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=3004 Rev= 0.01 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms Bugzilla: http://bugzilla.opensuse.org/show_bug.cgi?id=1082504 Reported-by: Ivan Levshin Tested-by: Ivan Levshin Cc: Signed-off-by: Takashi Iwai Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 3257647d4f74..f8ba5c714df5 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -217,7 +217,6 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x0036), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311e), .driver_info = BTUSB_ATH3012 }, @@ -250,6 +249,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 }, /* QCA ROME chipset */ + { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe300), .driver_info = BTUSB_QCA_ROME }, From 85f0fec12c58509b0e3717ad07427aa508a2d869 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 3 Feb 2018 20:30:56 -0800 Subject: [PATCH 14/68] libata: fix length validation of ATAPI-relayed SCSI commands commit 058f58e235cbe03e923b30ea7c49995a46a8725f upstream. syzkaller reported a crash in ata_bmdma_fill_sg() when writing to /dev/sg1. The immediate cause was that the ATA command's scatterlist was not DMA-mapped, which causes 'pi - 1' to underflow, resulting in a write to 'qc->ap->bmdma_prd[0xffffffff]'. Strangely though, the flag ATA_QCFLAG_DMAMAP was set in qc->flags. The root cause is that when __ata_scsi_queuecmd() is preparing to relay a SCSI command to an ATAPI device, it doesn't correctly validate the CDB length before copying it into the 16-byte buffer 'cdb' in 'struct ata_queued_cmd'. Namely, it validates the fixed CDB length expected based on the SCSI opcode but not the actual CDB length, which can be larger due to the use of the SG_NEXT_CMD_LEN ioctl. Since 'flags' is the next member in ata_queued_cmd, a buffer overflow corrupts it. Fix it by requiring that the actual CDB length be <= 16 (ATAPI_CDB_LEN). [Really it seems the length should be required to be <= dev->cdb_len, but the current behavior seems to have been intentionally introduced by commit 607126c2a21c ("libata-scsi: be tolerant of 12-byte ATAPI commands in 16-byte CDBs") to work around a userspace bug in mplayer. Probably the workaround is no longer needed (mplayer was fixed in 2007), but continuing to allow lengths to up 16 appears harmless for now.] Here's a reproducer that works in QEMU when /dev/sg1 refers to the CD-ROM drive that qemu-system-x86_64 creates by default: #include #include #include #define SG_NEXT_CMD_LEN 0x2283 int main() { char buf[53] = { [36] = 0x7e, [52] = 0x02 }; int fd = open("/dev/sg1", O_RDWR); ioctl(fd, SG_NEXT_CMD_LEN, &(int){ 17 }); write(fd, buf, sizeof(buf)); } The crash was: BUG: unable to handle kernel paging request at ffff8cb97db37ffc IP: ata_bmdma_fill_sg drivers/ata/libata-sff.c:2623 [inline] IP: ata_bmdma_qc_prep+0xa4/0xc0 drivers/ata/libata-sff.c:2727 PGD fb6c067 P4D fb6c067 PUD 0 Oops: 0002 [#1] SMP CPU: 1 PID: 150 Comm: syz_ata_bmdma_q Not tainted 4.15.0-next-20180202 #99 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014 [...] Call Trace: ata_qc_issue+0x100/0x1d0 drivers/ata/libata-core.c:5421 ata_scsi_translate+0xc9/0x1a0 drivers/ata/libata-scsi.c:2024 __ata_scsi_queuecmd drivers/ata/libata-scsi.c:4326 [inline] ata_scsi_queuecmd+0x8c/0x210 drivers/ata/libata-scsi.c:4375 scsi_dispatch_cmd+0xa2/0xe0 drivers/scsi/scsi_lib.c:1727 scsi_request_fn+0x24c/0x530 drivers/scsi/scsi_lib.c:1865 __blk_run_queue_uncond block/blk-core.c:412 [inline] __blk_run_queue+0x3a/0x60 block/blk-core.c:432 blk_execute_rq_nowait+0x93/0xc0 block/blk-exec.c:78 sg_common_write.isra.7+0x272/0x5a0 drivers/scsi/sg.c:806 sg_write+0x1ef/0x340 drivers/scsi/sg.c:677 __vfs_write+0x31/0x160 fs/read_write.c:480 vfs_write+0xa7/0x160 fs/read_write.c:544 SYSC_write fs/read_write.c:589 [inline] SyS_write+0x4d/0xc0 fs/read_write.c:581 do_syscall_64+0x5e/0x110 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x21/0x86 Fixes: 607126c2a21c ("libata-scsi: be tolerant of 12-byte ATAPI commands in 16-byte CDBs") Reported-by: syzbot+1ff6f9fcc3c35f1c72a95e26528c8e7e3276e4da@syzkaller.appspotmail.com Cc: # v2.6.24+ Signed-off-by: Eric Biggers Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-scsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index e3e10e8f6f6a..64c8f9f46ddb 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4177,7 +4177,9 @@ static inline int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, if (likely((scsi_op != ATA_16) || !atapi_passthru16)) { /* relay SCSI command to ATAPI device */ int len = COMMAND_SIZE(scsi_op); - if (unlikely(len > scmd->cmd_len || len > dev->cdb_len)) + if (unlikely(len > scmd->cmd_len || + len > dev->cdb_len || + scmd->cmd_len > ATAPI_CDB_LEN)) goto bad_cdb_len; xlat_func = atapi_xlat; From 195c71dc031bed7890e39d9571d8f9e2acdb6a5b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 3 Feb 2018 20:33:27 -0800 Subject: [PATCH 15/68] libata: remove WARN() for DMA or PIO command without data commit 9173e5e80729c8434b8d27531527c5245f4a5594 upstream. syzkaller hit a WARN() in ata_qc_issue() when writing to /dev/sg0. This happened because it issued a READ_6 command with no data buffer. Just remove the WARN(), as it doesn't appear indicate a kernel bug. The expected behavior is to fail the command, which the code does. Here's a reproducer that works in QEMU when /dev/sg0 refers to a disk of the default type ("82371SB PIIX3 IDE"): #include #include int main() { char buf[42] = { [36] = 0x8 /* READ_6 */ }; write(open("/dev/sg0", O_RDWR), buf, sizeof(buf)); } Fixes: f92a26365a72 ("libata: change ATA_QCFLAG_DMAMAP semantics") Reported-by: syzbot+f7b556d1766502a69d85071d2ff08bd87be53d0f@syzkaller.appspotmail.com Cc: # v2.6.25+ Signed-off-by: Eric Biggers Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index aee39524375c..233040a3f1ad 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5265,8 +5265,7 @@ void ata_qc_issue(struct ata_queued_cmd *qc) * We guarantee to LLDs that they will have at least one * non-zero sg if the command is a data command. */ - if (WARN_ON_ONCE(ata_is_data(prot) && - (!qc->sg || !qc->n_elem || !qc->nbytes))) + if (ata_is_data(prot) && (!qc->sg || !qc->n_elem || !qc->nbytes)) goto sys_err; if (ata_is_dma(prot) || (ata_is_pio(prot) && From 8b8524d75697da34116c3fd42103b67f69f3dcc8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 3 Feb 2018 20:33:51 -0800 Subject: [PATCH 16/68] libata: don't try to pass through NCQ commands to non-NCQ devices commit 2c1ec6fda2d07044cda922ee25337cf5d4b429b3 upstream. syzkaller hit a WARN() in ata_bmdma_qc_issue() when writing to /dev/sg0. This happened because it issued an ATA pass-through command (ATA_16) where the protocol field indicated that NCQ should be used -- but the device did not support NCQ. We could just remove the WARN() from libata-sff.c, but the real problem seems to be that the SCSI -> ATA translation code passes through NCQ commands without verifying that the device actually supports NCQ. Fix this by adding the appropriate check to ata_scsi_pass_thru(). Here's reproducer that works in QEMU when /dev/sg0 refers to a disk of the default type ("82371SB PIIX3 IDE"): #include #include int main() { char buf[53] = { 0 }; buf[36] = 0x85; /* ATA_16 */ buf[37] = (12 << 1); /* FPDMA */ buf[38] = 0x1; /* Has data */ buf[51] = 0xC8; /* ATA_CMD_READ */ write(open("/dev/sg0", O_RDWR), buf, sizeof(buf)); } Fixes: ee7fb331c3ac ("libata: add support for NCQ commands for SG interface") Reported-by: syzbot+2f69ca28df61bdfc77cd36af2e789850355a221e@syzkaller.appspotmail.com Cc: # v4.4+ Signed-off-by: Eric Biggers Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-scsi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 64c8f9f46ddb..9babbc845750 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3226,6 +3226,12 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) goto invalid_fld; } + /* We may not issue NCQ commands to devices not supporting NCQ */ + if (ata_is_ncq(tf->protocol) && !ata_ncq_enabled(dev)) { + fp = 1; + goto invalid_fld; + } + /* sanity check for pio multi commands */ if ((cdb[1] & 0xe0) && !is_multi_taskfile(tf)) { fp = 1; From a9f062b850db21be39afb2cf22b8eca42ec1e314 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 16 Feb 2018 10:48:20 +0100 Subject: [PATCH 17/68] libata: Apply NOLPM quirk to Crucial MX100 512GB SSDs commit 9c7be59fc519af9081c46c48f06f2b8fadf55ad8 upstream. Various people have reported the Crucial MX100 512GB model not working with LPM set to min_power. I've now received a report that it also does not work with the new med_power_with_dipm level. It does work with medium_power, but that has no measurable power-savings and given the amount of people being bitten by the other levels not working, this commit just disables LPM altogether. Note all reporters of this have either the 512GB model (max capacity), or are not specifying their SSD's size. So for now this quirk assumes this is a problem with the 512GB model only. Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=89261 Buglink: https://github.com/linrunner/TLP/issues/84 Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 233040a3f1ad..6b90bf756c7d 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4403,6 +4403,11 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "PIONEER DVD-RW DVR-212D", NULL, ATA_HORKAGE_NOSETXFER }, { "PIONEER DVD-RW DVR-216D", NULL, ATA_HORKAGE_NOSETXFER }, + /* The 512GB version of the MX100 has both queued TRIM and LPM issues */ + { "Crucial_CT512MX100*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM | + ATA_HORKAGE_NOLPM, }, + /* devices that don't properly handle queued TRIM commands */ { "Micron_M500_*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, From 2bcfcae486246875cfb9835cee24955fbfc0c17a Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Sun, 18 Feb 2018 22:17:09 +0800 Subject: [PATCH 18/68] libata: disable LPM for Crucial BX100 SSD 500GB drive commit b17e5729a630d8326a48ec34ef02e6b4464a6aef upstream. After Laptop Mode Tools starts to use min_power for LPM, a user found out Crucial BX100 SSD can't get mounted. Crucial BX100 SSD 500GB drive don't work well with min_power. This also happens to med_power_with_dipm. So let's disable LPM for Crucial BX100 SSD 500GB drive. BugLink: https://bugs.launchpad.net/bugs/1726930 Signed-off-by: Kai-Heng Feng Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 6b90bf756c7d..ac35d5738ba9 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4403,6 +4403,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "PIONEER DVD-RW DVR-212D", NULL, ATA_HORKAGE_NOSETXFER }, { "PIONEER DVD-RW DVR-216D", NULL, ATA_HORKAGE_NOSETXFER }, + /* Crucial BX100 SSD 500GB has broken LPM support */ + { "CT500BX100SSD1", "MU02", ATA_HORKAGE_NOLPM }, + /* The 512GB version of the MX100 has both queued TRIM and LPM issues */ { "Crucial_CT512MX100*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM | From db4a121a8939ec1d24efe38e3405dc9776c6b925 Mon Sep 17 00:00:00 2001 From: Ju Hyung Park Date: Sun, 11 Mar 2018 02:28:35 +0900 Subject: [PATCH 19/68] libata: Enable queued TRIM for Samsung SSD 860 commit ca6bfcb2f6d9deab3924bf901e73622a94900473 upstream. Samsung explicitly states that queued TRIM is supported for Linux with 860 PRO and 860 EVO. Make the previous blacklist to cover only 840 and 850 series. Signed-off-by: Park Ju Hyung Reviewed-by: Martin K. Petersen Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index ac35d5738ba9..1f4810ea33c5 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4422,7 +4422,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Crucial_CT*MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, - { "Samsung SSD 8*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + { "Samsung SSD 840*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Samsung SSD 850*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "FCCT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, From a7262d24ab62e92728b7c3fbbd85b718a81ce865 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 19 Mar 2018 16:33:58 +0100 Subject: [PATCH 20/68] libata: Apply NOLPM quirk to Crucial M500 480 and 960GB SSDs commit 62ac3f7305470e3f52f159de448bc1a771717e88 upstream. There have been reports of the Crucial M500 480GB model not working with LPM set to min_power / med_power_with_dipm level. It has not been tested with medium_power, but that typically has no measurable power-savings. Note the reporters Crucial_CT480M500SSD3 has a firmware version of MU03 and there is a MU05 update available, but that update does not mention any LPM fixes in its changelog, so the quirk matches all firmware versions. In my experience the LPM problems with (older) Crucial SSDs seem to be limited to higher capacity versions of the SSDs (different firmware?), so this commit adds a NOLPM quirk for the 480 and 960GB versions of the M500, to avoid LPM causing issues with these SSDs. Cc: stable@vger.kernel.org Reported-and-tested-by: Martin Steigerwald Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 1f4810ea33c5..cf84a52ed20e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4411,6 +4411,14 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM | ATA_HORKAGE_NOLPM, }, + /* 480GB+ M500 SSDs have both queued TRIM and LPM issues */ + { "Crucial_CT480M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM | + ATA_HORKAGE_NOLPM, }, + { "Crucial_CT960M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM | + ATA_HORKAGE_NOLPM, }, + /* devices that don't properly handle queued TRIM commands */ { "Micron_M500_*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, From ed57941c84c4d8c5d30998c3832e0d20deaf0cfe Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 19 Mar 2018 16:33:59 +0100 Subject: [PATCH 21/68] libata: Make Crucial BX100 500GB LPM quirk apply to all firmware versions commit 3bf7b5d6d017c27e0d3b160aafb35a8e7cfeda1f upstream. Commit b17e5729a630 ("libata: disable LPM for Crucial BX100 SSD 500GB drive"), introduced a ATA_HORKAGE_NOLPM quirk for Crucial BX100 500GB SSDs but limited this to the MU02 firmware version, according to: http://www.crucial.com/usa/en/support-ssd-firmware MU02 is the last version, so there are no newer possibly fixed versions and if the MU02 version has broken LPM then the MU01 almost certainly also has broken LPM, so this commit changes the quirk to apply to all firmware versions. Fixes: b17e5729a630 ("libata: disable LPM for Crucial BX100 SSD 500GB...") Cc: stable@vger.kernel.org Cc: Kai-Heng Feng Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index cf84a52ed20e..b41d1a7b7c9d 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4404,7 +4404,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "PIONEER DVD-RW DVR-216D", NULL, ATA_HORKAGE_NOSETXFER }, /* Crucial BX100 SSD 500GB has broken LPM support */ - { "CT500BX100SSD1", "MU02", ATA_HORKAGE_NOLPM }, + { "CT500BX100SSD1", NULL, ATA_HORKAGE_NOLPM }, /* The 512GB version of the MX100 has both queued TRIM and LPM issues */ { "Crucial_CT512MX100*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | From 8d7a2a6d455cea5b80b584a6df147191ba0043ad Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 19 Mar 2018 16:34:00 +0100 Subject: [PATCH 22/68] libata: Modify quirks for MX100 to limit NCQ_TRIM quirk to MU01 version commit d418ff56b8f2d2b296daafa8da151fe27689b757 upstream. When commit 9c7be59fc519af ("libata: Apply NOLPM quirk to Crucial MX100 512GB SSDs") was added it inherited the ATA_HORKAGE_NO_NCQ_TRIM quirk from the existing "Crucial_CT*MX100*" entry, but that entry sets model_rev to "MU01", where as the entry adding the NOLPM quirk sets it to NULL. This means that after this commit we no apply the NO_NCQ_TRIM quirk to all "Crucial_CT512MX100*" SSDs even if they have the fixed "MU02" firmware. This commit splits the "Crucial_CT512MX100*" quirk into 2 quirks, one for the "MU01" firmware and one for all other firmware versions, so that we once again only apply the NO_NCQ_TRIM quirk to the "MU01" firmware version. Fixes: 9c7be59fc519af ("libata: Apply NOLPM quirk to ... MX100 512GB SSDs") Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index b41d1a7b7c9d..e08c09fa5da0 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4406,10 +4406,13 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Crucial BX100 SSD 500GB has broken LPM support */ { "CT500BX100SSD1", NULL, ATA_HORKAGE_NOLPM }, - /* The 512GB version of the MX100 has both queued TRIM and LPM issues */ - { "Crucial_CT512MX100*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + /* 512GB MX100 with MU01 firmware has both queued TRIM and LPM issues */ + { "Crucial_CT512MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM | ATA_HORKAGE_NOLPM, }, + /* 512GB MX100 with newer firmware has only LPM issues */ + { "Crucial_CT512MX100*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM | + ATA_HORKAGE_NOLPM, }, /* 480GB+ M500 SSDs have both queued TRIM and LPM issues */ { "Crucial_CT480M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | From e294c4c2d33b7307a89cdf31bec08a112d6a9297 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Mar 2018 11:32:02 -0400 Subject: [PATCH 23/68] nfsd: remove blocked locks on client teardown commit 68ef3bc3166468678d5e1fdd216628c35bd1186f upstream. We had some reports of panics in nfsd4_lm_notify, and that showed a nfs4_lockowner that had outlived its so_client. Ensure that we walk any leftover lockowners after tearing down all of the stateids, and remove any blocked locks that they hold. With this change, we also don't need to walk the nbl_lru on nfsd_net shutdown, as that will happen naturally when we tear down the clients. Fixes: 76d348fadff5 (nfsd: have nfsd4_lock use blocking locks for v4.1+ locks) Reported-by: Frank Sorenson Signed-off-by: Jeff Layton Cc: stable@vger.kernel.org # 4.9 Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 62 +++++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f463c4e0b2ea..12d780718b48 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -263,6 +263,35 @@ free_blocked_lock(struct nfsd4_blocked_lock *nbl) kfree(nbl); } +static void +remove_blocked_locks(struct nfs4_lockowner *lo) +{ + struct nfs4_client *clp = lo->lo_owner.so_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct nfsd4_blocked_lock *nbl; + LIST_HEAD(reaplist); + + /* Dequeue all blocked locks */ + spin_lock(&nn->blocked_locks_lock); + while (!list_empty(&lo->lo_blocked)) { + nbl = list_first_entry(&lo->lo_blocked, + struct nfsd4_blocked_lock, + nbl_list); + list_del_init(&nbl->nbl_list); + list_move(&nbl->nbl_lru, &reaplist); + } + spin_unlock(&nn->blocked_locks_lock); + + /* Now free them */ + while (!list_empty(&reaplist)) { + nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, + nbl_lru); + list_del_init(&nbl->nbl_lru); + posix_unblock_lock(&nbl->nbl_lock); + free_blocked_lock(nbl); + } +} + static int nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) { @@ -1854,6 +1883,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp) static void __destroy_client(struct nfs4_client *clp) { + int i; struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct list_head reaplist; @@ -1883,6 +1913,16 @@ __destroy_client(struct nfs4_client *clp) nfs4_get_stateowner(&oo->oo_owner); release_openowner(oo); } + for (i = 0; i < OWNER_HASH_SIZE; i++) { + struct nfs4_stateowner *so, *tmp; + + list_for_each_entry_safe(so, tmp, &clp->cl_ownerstr_hashtbl[i], + so_strhash) { + /* Should be no openowners at this point */ + WARN_ON_ONCE(so->so_is_open_owner); + remove_blocked_locks(lockowner(so)); + } + } nfsd4_return_all_client_layouts(clp); nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) @@ -6266,6 +6306,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, } spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); + remove_blocked_locks(lo); nfs4_put_stateowner(&lo->lo_owner); return status; @@ -7051,6 +7092,8 @@ nfs4_state_destroy_net(struct net *net) } } + WARN_ON(!list_empty(&nn->blocked_locks_lru)); + for (i = 0; i < CLIENT_HASH_SIZE; i++) { while (!list_empty(&nn->unconf_id_hashtbl[i])) { clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); @@ -7117,7 +7160,6 @@ nfs4_state_shutdown_net(struct net *net) struct nfs4_delegation *dp = NULL; struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct nfsd4_blocked_lock *nbl; cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); @@ -7138,24 +7180,6 @@ nfs4_state_shutdown_net(struct net *net) nfs4_put_stid(&dp->dl_stid); } - BUG_ON(!list_empty(&reaplist)); - spin_lock(&nn->blocked_locks_lock); - while (!list_empty(&nn->blocked_locks_lru)) { - nbl = list_first_entry(&nn->blocked_locks_lru, - struct nfsd4_blocked_lock, nbl_lru); - list_move(&nbl->nbl_lru, &reaplist); - list_del_init(&nbl->nbl_list); - } - spin_unlock(&nn->blocked_locks_lock); - - while (!list_empty(&reaplist)) { - nbl = list_first_entry(&reaplist, - struct nfsd4_blocked_lock, nbl_lru); - list_del_init(&nbl->nbl_lru); - posix_unblock_lock(&nbl->nbl_lock); - free_blocked_lock(nbl); - } - nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); } From 9c7f7bdb1932f8c1e5f80d32c717184701afe701 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 22 Mar 2018 16:17:20 -0700 Subject: [PATCH 24/68] mm/vmalloc: add interfaces to free unmapped page table commit b6bdb7517c3d3f41f20e5c2948d6bc3f8897394e upstream. On architectures with CONFIG_HAVE_ARCH_HUGE_VMAP set, ioremap() may create pud/pmd mappings. A kernel panic was observed on arm64 systems with Cortex-A75 in the following steps as described by Hanjun Guo. 1. ioremap a 4K size, valid page table will build, 2. iounmap it, pte0 will set to 0; 3. ioremap the same address with 2M size, pgd/pmd is unchanged, then set the a new value for pmd; 4. pte0 is leaked; 5. CPU may meet exception because the old pmd is still in TLB, which will lead to kernel panic. This panic is not reproducible on x86. INVLPG, called from iounmap, purges all levels of entries associated with purged address on x86. x86 still has memory leak. The patch changes the ioremap path to free unmapped page table(s) since doing so in the unmap path has the following issues: - The iounmap() path is shared with vunmap(). Since vmap() only supports pte mappings, making vunmap() to free a pte page is an overhead for regular vmap users as they do not need a pte page freed up. - Checking if all entries in a pte page are cleared in the unmap path is racy, and serializing this check is expensive. - The unmap path calls free_vmap_area_noflush() to do lazy TLB purges. Clearing a pud/pmd entry before the lazy TLB purges needs extra TLB purge. Add two interfaces, pud_free_pmd_page() and pmd_free_pte_page(), which clear a given pud/pmd entry and free up a page for the lower level entries. This patch implements their stub functions on x86 and arm64, which work as workaround. [akpm@linux-foundation.org: fix typo in pmd_free_pte_page() stub] Link: http://lkml.kernel.org/r/20180314180155.19492-2-toshi.kani@hpe.com Fixes: e61ce6ade404e ("mm: change ioremap to set up huge I/O mappings") Reported-by: Lei Li Signed-off-by: Toshi Kani Cc: Catalin Marinas Cc: Wang Xuefeng Cc: Will Deacon Cc: Hanjun Guo Cc: Michal Hocko Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Borislav Petkov Cc: Matthew Wilcox Cc: Chintan Pandya Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/mmu.c | 10 ++++++++++ arch/x86/mm/pgtable.c | 24 ++++++++++++++++++++++++ include/asm-generic/pgtable.h | 10 ++++++++++ lib/ioremap.c | 6 ++++-- 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index d5cc6d73c2c4..638f7f2bd79c 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -772,3 +772,13 @@ int pmd_clear_huge(pmd_t *pmd) pmd_clear(pmd); return 1; } + +int pud_free_pmd_page(pud_t *pud) +{ + return pud_none(*pud); +} + +int pmd_free_pte_page(pmd_t *pmd) +{ + return pmd_none(*pmd); +} diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 209b9465e97a..e478b105a103 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -643,4 +643,28 @@ int pmd_clear_huge(pmd_t *pmd) return 0; } + +/** + * pud_free_pmd_page - Clear pud entry and free pmd page. + * @pud: Pointer to a PUD. + * + * Context: The pud range has been unmaped and TLB purged. + * Return: 1 if clearing the entry succeeded. 0 otherwise. + */ +int pud_free_pmd_page(pud_t *pud) +{ + return pud_none(*pud); +} + +/** + * pmd_free_pte_page - Clear pmd entry and free pte page. + * @pmd: Pointer to a PMD. + * + * Context: The pmd range has been unmaped and TLB purged. + * Return: 1 if clearing the entry succeeded. 0 otherwise. + */ +int pmd_free_pte_page(pmd_t *pmd) +{ + return pmd_none(*pmd); +} #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index c4f8fd2fd384..f6ea0f3c03f8 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -764,6 +764,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pud_clear_huge(pud_t *pud); int pmd_clear_huge(pmd_t *pmd); +int pud_free_pmd_page(pud_t *pud); +int pmd_free_pte_page(pmd_t *pmd); #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) { @@ -781,6 +783,14 @@ static inline int pmd_clear_huge(pmd_t *pmd) { return 0; } +static inline int pud_free_pmd_page(pud_t *pud) +{ + return 0; +} +static inline int pmd_free_pte_page(pmd_t *pmd) +{ + return 0; +} #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ #ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE diff --git a/lib/ioremap.c b/lib/ioremap.c index 86c8911b0e3a..5323b59ca393 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -83,7 +83,8 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, if (ioremap_pmd_enabled() && ((next - addr) == PMD_SIZE) && - IS_ALIGNED(phys_addr + addr, PMD_SIZE)) { + IS_ALIGNED(phys_addr + addr, PMD_SIZE) && + pmd_free_pte_page(pmd)) { if (pmd_set_huge(pmd, phys_addr + addr, prot)) continue; } @@ -109,7 +110,8 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, if (ioremap_pud_enabled() && ((next - addr) == PUD_SIZE) && - IS_ALIGNED(phys_addr + addr, PUD_SIZE)) { + IS_ALIGNED(phys_addr + addr, PUD_SIZE) && + pud_free_pmd_page(pud)) { if (pud_set_huge(pud, phys_addr + addr, prot)) continue; } From f4fe4f987ad9df650495102965ff86a950eff4e2 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 22 Mar 2018 16:17:24 -0700 Subject: [PATCH 25/68] x86/mm: implement free pmd/pte page interfaces commit 28ee90fe6048fa7b7ceaeb8831c0e4e454a4cf89 upstream. Implement pud_free_pmd_page() and pmd_free_pte_page() on x86, which clear a given pud/pmd entry and free up lower level page table(s). The address range associated with the pud/pmd entry must have been purged by INVLPG. Link: http://lkml.kernel.org/r/20180314180155.19492-3-toshi.kani@hpe.com Fixes: e61ce6ade404e ("mm: change ioremap to set up huge I/O mappings") Signed-off-by: Toshi Kani Reported-by: Lei Li Cc: Michal Hocko Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Borislav Petkov Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pgtable.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index e478b105a103..b97ef29c940f 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -653,7 +653,22 @@ int pmd_clear_huge(pmd_t *pmd) */ int pud_free_pmd_page(pud_t *pud) { - return pud_none(*pud); + pmd_t *pmd; + int i; + + if (pud_none(*pud)) + return 1; + + pmd = (pmd_t *)pud_page_vaddr(*pud); + + for (i = 0; i < PTRS_PER_PMD; i++) + if (!pmd_free_pte_page(&pmd[i])) + return 0; + + pud_clear(pud); + free_page((unsigned long)pmd); + + return 1; } /** @@ -665,6 +680,15 @@ int pud_free_pmd_page(pud_t *pud) */ int pmd_free_pte_page(pmd_t *pmd) { - return pmd_none(*pmd); + pte_t *pte; + + if (pmd_none(*pmd)) + return 1; + + pte = (pte_t *)pmd_page_vaddr(*pmd); + pmd_clear(pmd); + free_page((unsigned long)pte); + + return 1; } #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ From 24284d5f53d0d8d4bc9321cfbd23f7bd63480059 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 22 Mar 2018 16:17:28 -0700 Subject: [PATCH 26/68] mm/khugepaged.c: convert VM_BUG_ON() to collapse fail commit fece2029a9e65b9a990831afe2a2b83290cbbe26 upstream. khugepaged is not yet able to convert PTE-mapped huge pages back to PMD mapped. We do not collapse such pages. See check khugepaged_scan_pmd(). But if between khugepaged_scan_pmd() and __collapse_huge_page_isolate() somebody managed to instantiate THP in the range and then split the PMD back to PTEs we would have a problem -- VM_BUG_ON_PAGE(PageCompound(page)) will get triggered. It's possible since we drop mmap_sem during collapse to re-take for write. Replace the VM_BUG_ON() with graceful collapse fail. Link: http://lkml.kernel.org/r/20180315152353.27989-1-kirill.shutemov@linux.intel.com Fixes: b1caa957ae6d ("khugepaged: ignore pmd tables with THP mapped with ptes") Signed-off-by: Kirill A. Shutemov Cc: Laura Abbott Cc: Jerome Marchand Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/khugepaged.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 5d7c006373d3..898eb26f5dc8 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -528,7 +528,12 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, goto out; } - VM_BUG_ON_PAGE(PageCompound(page), page); + /* TODO: teach khugepaged to collapse THP mapped with pte */ + if (PageCompound(page)) { + result = SCAN_PAGE_COMPOUND; + goto out; + } + VM_BUG_ON_PAGE(!PageAnon(page), page); VM_BUG_ON_PAGE(!PageSwapBacked(page), page); From 142d9dda9e3b55c2b4604bd3d90f3db1c3219e55 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 22 Mar 2018 16:17:31 -0700 Subject: [PATCH 27/68] mm/thp: do not wait for lock_page() in deferred_split_scan() commit fa41b900c30b45fab03783724932dc30cd46a6be upstream. deferred_split_scan() gets called from reclaim path. Waiting for page lock may lead to deadlock there. Replace lock_page() with trylock_page() and skip the page if we failed to lock it. We will get to the page on the next scan. Link: http://lkml.kernel.org/r/20180315150747.31945-1-kirill.shutemov@linux.intel.com Fixes: 9a982250f773 ("thp: introduce deferred_split_huge_page()") Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c234c078693c..e2982ea26090 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2279,11 +2279,13 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, list_for_each_safe(pos, next, &list) { page = list_entry((void *)pos, struct page, mapping); - lock_page(page); + if (!trylock_page(page)) + goto next; /* split_huge_page() removes page from list on success */ if (!split_huge_page(page)) split++; unlock_page(page); +next: put_page(page); } From 8ab899550b9a7ab2eebf84c1a694aafbd1e2121f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 22 Mar 2018 16:17:35 -0700 Subject: [PATCH 28/68] mm/shmem: do not wait for lock_page() in shmem_unused_huge_shrink() commit b3cd54b257ad95d344d121dc563d943ca39b0921 upstream. shmem_unused_huge_shrink() gets called from reclaim path. Waiting for page lock may lead to deadlock there. There was a bug report that may be attributed to this: http://lkml.kernel.org/r/alpine.LRH.2.11.1801242349220.30642@mail.ewheeler.net Replace lock_page() with trylock_page() and skip the page if we failed to lock it. We will get to the page on the next scan. We can test for the PageTransHuge() outside the page lock as we only need protection against splitting the page under us. Holding pin oni the page is enough for this. Link: http://lkml.kernel.org/r/20180316210830.43738-1-kirill.shutemov@linux.intel.com Fixes: 779750d20b93 ("shmem: split huge pages beyond i_size under memory pressure") Signed-off-by: Kirill A. Shutemov Reported-by: Eric Wheeler Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Tetsuo Handa Cc: Hugh Dickins Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 2123bfc39ef2..42ca5df2c0e3 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -466,36 +466,45 @@ next: info = list_entry(pos, struct shmem_inode_info, shrinklist); inode = &info->vfs_inode; - if (nr_to_split && split >= nr_to_split) { - iput(inode); - continue; - } + if (nr_to_split && split >= nr_to_split) + goto leave; - page = find_lock_page(inode->i_mapping, + page = find_get_page(inode->i_mapping, (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT); if (!page) goto drop; + /* No huge page at the end of the file: nothing to split */ if (!PageTransHuge(page)) { - unlock_page(page); put_page(page); goto drop; } + /* + * Leave the inode on the list if we failed to lock + * the page at this time. + * + * Waiting for the lock may lead to deadlock in the + * reclaim path. + */ + if (!trylock_page(page)) { + put_page(page); + goto leave; + } + ret = split_huge_page(page); unlock_page(page); put_page(page); - if (ret) { - /* split failed: leave it on the list */ - iput(inode); - continue; - } + /* If split failed leave the inode on the list */ + if (ret) + goto leave; split++; drop: list_del_init(&info->shrinklist); removed++; +leave: iput(inode); } From f0c88241d3526f9de39970e006a49d9c66650ab3 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 21 Mar 2018 10:18:38 +0100 Subject: [PATCH 29/68] drm/vmwgfx: Fix a destoy-while-held mutex problem. commit 73a88250b70954a8f27c2444e1c2411bba3c29d9 upstream. When validating legacy surfaces, the backup bo might be destroyed at surface validate time. However, the kms resource validation code may have the bo reserved, so we will destroy a locked mutex. While there shouldn't be any other users of that mutex when it is destroyed, it causes a lock leak and thus throws a lockdep error. Fix this by having the kms resource validation code hold a reference to the bo while we have it reserved. We do this by introducing a validation context which might come in handy when the kms code is extended to validate multiple resources or buffers. Cc: Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Sinclair Yeh Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 28 +++++++++++++++++++--------- drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 12 +++++++++--- drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 5 +++-- drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 5 +++-- 4 files changed, 34 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index bf28ccc150df..87086af42114 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -27,7 +27,6 @@ #include "vmwgfx_kms.h" - /* Might need a hrtimer here? */ #define VMWGFX_PRESENT_RATE ((HZ / 60 > 0) ? HZ / 60 : 1) @@ -1933,9 +1932,12 @@ void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv, * Helper to be used if an error forces the caller to undo the actions of * vmw_kms_helper_resource_prepare. */ -void vmw_kms_helper_resource_revert(struct vmw_resource *res) +void vmw_kms_helper_resource_revert(struct vmw_validation_ctx *ctx) { - vmw_kms_helper_buffer_revert(res->backup); + struct vmw_resource *res = ctx->res; + + vmw_kms_helper_buffer_revert(ctx->buf); + vmw_dmabuf_unreference(&ctx->buf); vmw_resource_unreserve(res, false, NULL, 0); mutex_unlock(&res->dev_priv->cmdbuf_mutex); } @@ -1952,10 +1954,14 @@ void vmw_kms_helper_resource_revert(struct vmw_resource *res) * interrupted by a signal. */ int vmw_kms_helper_resource_prepare(struct vmw_resource *res, - bool interruptible) + bool interruptible, + struct vmw_validation_ctx *ctx) { int ret = 0; + ctx->buf = NULL; + ctx->res = res; + if (interruptible) ret = mutex_lock_interruptible(&res->dev_priv->cmdbuf_mutex); else @@ -1974,6 +1980,8 @@ int vmw_kms_helper_resource_prepare(struct vmw_resource *res, res->dev_priv->has_mob); if (ret) goto out_unreserve; + + ctx->buf = vmw_dmabuf_reference(res->backup); } ret = vmw_resource_validate(res); if (ret) @@ -1981,7 +1989,7 @@ int vmw_kms_helper_resource_prepare(struct vmw_resource *res, return 0; out_revert: - vmw_kms_helper_buffer_revert(res->backup); + vmw_kms_helper_buffer_revert(ctx->buf); out_unreserve: vmw_resource_unreserve(res, false, NULL, 0); out_unlock: @@ -1997,11 +2005,13 @@ out_unlock: * @out_fence: Optional pointer to a fence pointer. If non-NULL, a * ref-counted fence pointer is returned here. */ -void vmw_kms_helper_resource_finish(struct vmw_resource *res, - struct vmw_fence_obj **out_fence) +void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx, + struct vmw_fence_obj **out_fence) { - if (res->backup || out_fence) - vmw_kms_helper_buffer_finish(res->dev_priv, NULL, res->backup, + struct vmw_resource *res = ctx->res; + + if (ctx->buf || out_fence) + vmw_kms_helper_buffer_finish(res->dev_priv, NULL, ctx->buf, out_fence, NULL); vmw_resource_unreserve(res, false, NULL, 0); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index ff4803c107bc..2dd05395e98b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -183,6 +183,11 @@ struct vmw_display_unit { int set_gui_y; }; +struct vmw_validation_ctx { + struct vmw_resource *res; + struct vmw_dma_buffer *buf; +}; + #define vmw_crtc_to_du(x) \ container_of(x, struct vmw_display_unit, crtc) #define vmw_connector_to_du(x) \ @@ -233,9 +238,10 @@ void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv, struct drm_vmw_fence_rep __user * user_fence_rep); int vmw_kms_helper_resource_prepare(struct vmw_resource *res, - bool interruptible); -void vmw_kms_helper_resource_revert(struct vmw_resource *res); -void vmw_kms_helper_resource_finish(struct vmw_resource *res, + bool interruptible, + struct vmw_validation_ctx *ctx); +void vmw_kms_helper_resource_revert(struct vmw_validation_ctx *ctx); +void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx, struct vmw_fence_obj **out_fence); int vmw_kms_readback(struct vmw_private *dev_priv, struct drm_file *file_priv, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index f42359084adc..a6ca2185f5b0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -753,12 +753,13 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv, struct vmw_framebuffer_surface *vfbs = container_of(framebuffer, typeof(*vfbs), base); struct vmw_kms_sou_surface_dirty sdirty; + struct vmw_validation_ctx ctx; int ret; if (!srf) srf = &vfbs->surface->res; - ret = vmw_kms_helper_resource_prepare(srf, true); + ret = vmw_kms_helper_resource_prepare(srf, true, &ctx); if (ret) return ret; @@ -777,7 +778,7 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv, ret = vmw_kms_helper_dirty(dev_priv, framebuffer, clips, vclips, dest_x, dest_y, num_clips, inc, &sdirty.base); - vmw_kms_helper_resource_finish(srf, out_fence); + vmw_kms_helper_resource_finish(&ctx, out_fence); return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index 94ad8d2acf9a..8b914504b857 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -977,12 +977,13 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, struct vmw_framebuffer_surface *vfbs = container_of(framebuffer, typeof(*vfbs), base); struct vmw_stdu_dirty sdirty; + struct vmw_validation_ctx ctx; int ret; if (!srf) srf = &vfbs->surface->res; - ret = vmw_kms_helper_resource_prepare(srf, true); + ret = vmw_kms_helper_resource_prepare(srf, true, &ctx); if (ret) return ret; @@ -1005,7 +1006,7 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, dest_x, dest_y, num_clips, inc, &sdirty.base); out_finish: - vmw_kms_helper_resource_finish(srf, out_fence); + vmw_kms_helper_resource_finish(&ctx, out_fence); return ret; } From e664e6d66380d53e5e468d0f1318df4516a3be25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Wed, 14 Mar 2018 18:14:04 +0100 Subject: [PATCH 30/68] drm/radeon: Don't turn off DP sink when disconnected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2681bc79eeb640562c932007bfebbbdc55bf6a7d upstream. Turning off the sink in this case causes various issues, because userspace expects it to stay on until it turns it off explicitly. Instead, turn the sink off and back on when a display is connected again. This dance seems necessary for link training to work correctly. Bugzilla: https://bugs.freedesktop.org/105308 Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_connectors.c | 29 ++++++++-------------- 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index af0d7fd5706b..f416f5c2e8e9 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -90,25 +90,18 @@ void radeon_connector_hotplug(struct drm_connector *connector) /* don't do anything if sink is not display port, i.e., * passive dp->(dvi|hdmi) adaptor */ - if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) { - int saved_dpms = connector->dpms; - /* Only turn off the display if it's physically disconnected */ - if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) { - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); - } else if (radeon_dp_needs_link_train(radeon_connector)) { - /* Don't try to start link training before we - * have the dpcd */ - if (!radeon_dp_getdpcd(radeon_connector)) - return; + if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT && + radeon_hpd_sense(rdev, radeon_connector->hpd.hpd) && + radeon_dp_needs_link_train(radeon_connector)) { + /* Don't start link training before we have the DPCD */ + if (!radeon_dp_getdpcd(radeon_connector)) + return; - /* set it to OFF so that drm_helper_connector_dpms() - * won't return immediately since the current state - * is ON at this point. - */ - connector->dpms = DRM_MODE_DPMS_OFF; - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); - } - connector->dpms = saved_dpms; + /* Turn the connector off and back on immediately, which + * will trigger link training + */ + drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); + drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); } } } From 4ac9ab4f5f45d1ad0585c7bfa9ccff43b9984045 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 21 Mar 2018 16:45:53 +0100 Subject: [PATCH 31/68] drm: udl: Properly check framebuffer mmap offsets commit 3b82a4db8eaccce735dffd50b4d4e1578099b8e8 upstream. The memmap options sent to the udl framebuffer driver were not being checked for all sets of possible crazy values. Fix this up by properly bounding the allowed values. Reported-by: Eyal Itkin Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180321154553.GA18454@kroah.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/udl/udl_fb.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index 611b6b9bb3cb..67ea2ce03a23 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -158,10 +158,15 @@ static int udl_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) { unsigned long start = vma->vm_start; unsigned long size = vma->vm_end - vma->vm_start; - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long offset; unsigned long page, pos; - if (offset + size > info->fix.smem_len) + if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) + return -EINVAL; + + offset = vma->vm_pgoff << PAGE_SHIFT; + + if (offset > info->fix.smem_len || size > info->fix.smem_len - offset) return -EINVAL; pos = (unsigned long)info->fix.smem_start + offset; From f33db316d0f531f18659a240ffeda9d910446370 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 15 Mar 2018 19:49:14 -0700 Subject: [PATCH 32/68] acpi, numa: fix pxm to online numa node associations commit dc9e0a9347e932e3fd3cd03e7ff241022ed6ea8a upstream. Commit 99759869faf1 "acpi: Add acpi_map_pxm_to_online_node()" added support for mapping a given proximity to its nearest, by SLIT distance, online node. However, it sometimes returns unexpected results due to the fact that it switches from comparing the PXM node to the last node that was closer than the current max. for_each_online_node(n) { dist = node_distance(node, n); if (dist < min_dist) { min_dist = dist; node = n; <---- from this point we're using the wrong node for node_distance() Fixes: 99759869faf1 ("acpi: Add acpi_map_pxm_to_online_node()") Cc: Reviewed-by: Toshi Kani Acked-by: Rafael J. Wysocki > Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/numa.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index ce3a7a16f03f..17b518cb787c 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -103,25 +103,27 @@ int acpi_map_pxm_to_node(int pxm) */ int acpi_map_pxm_to_online_node(int pxm) { - int node, n, dist, min_dist; + int node, min_node; node = acpi_map_pxm_to_node(pxm); if (node == NUMA_NO_NODE) node = 0; + min_node = node; if (!node_online(node)) { - min_dist = INT_MAX; + int min_dist = INT_MAX, dist, n; + for_each_online_node(n) { dist = node_distance(node, n); if (dist < min_dist) { min_dist = dist; - node = n; + min_node = n; } } } - return node; + return min_node; } EXPORT_SYMBOL(acpi_map_pxm_to_online_node); From d0826ba87dedab9abd3f2628cc9fbee3f413bd4f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 19 Mar 2018 14:51:49 +0100 Subject: [PATCH 33/68] ACPI / watchdog: Fix off-by-one error at resource assignment commit b1abf6fc49829d89660c961fafe3f90f3d843c55 upstream. The resource allocation in WDAT watchdog has off-one-by error, it sets one byte more than the actual end address. This may eventually lead to unexpected resource conflicts. Fixes: 058dfc767008 (ACPI / watchdog: Add support for WDAT hardware watchdog) Cc: 4.9+ # 4.9+ Signed-off-by: Takashi Iwai Acked-by: Mika Westerberg Acked-by: Guenter Roeck Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_watchdog.c | 4 ++-- drivers/watchdog/wdat_wdt.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index 13caebd679f5..ce8fc680785b 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -74,10 +74,10 @@ void __init acpi_watchdog_init(void) res.start = gas->address; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { res.flags = IORESOURCE_MEM; - res.end = res.start + ALIGN(gas->access_width, 4); + res.end = res.start + ALIGN(gas->access_width, 4) - 1; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { res.flags = IORESOURCE_IO; - res.end = res.start + gas->access_width; + res.end = res.start + gas->access_width - 1; } else { pr_warn("Unsupported address space: %u\n", gas->space_id); diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c index 6d1fbda0f461..0da9943d405f 100644 --- a/drivers/watchdog/wdat_wdt.c +++ b/drivers/watchdog/wdat_wdt.c @@ -392,7 +392,7 @@ static int wdat_wdt_probe(struct platform_device *pdev) memset(&r, 0, sizeof(r)); r.start = gas->address; - r.end = r.start + gas->access_width; + r.end = r.start + gas->access_width - 1; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { r.flags = IORESOURCE_MEM; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { From 6fa877d2aca8da2f597c3a97355f7b4d8690dc1e Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Mon, 5 Mar 2018 16:56:13 -0700 Subject: [PATCH 34/68] libnvdimm, {btt, blk}: do integrity setup before add_disk() commit 3ffb0ba9b567a8efb9a04ed3d1ec15ff333ada22 upstream. Prior to 25520d55cdb6 ("block: Inline blk_integrity in struct gendisk") we needed to temporarily add a zero-capacity disk before registering for blk-integrity. But adding a zero-capacity disk caused the partition table scanning to bail early, and this resulted in partitions not coming up after a probe of the BTT or blk namespaces. We can now register for integrity before the disk has been added, and this fixes the rescan problems. Fixes: 25520d55cdb6 ("block: Inline blk_integrity in struct gendisk") Reported-by: Dariusz Dokupil Cc: Signed-off-by: Vishal Verma Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/blk.c | 3 +-- drivers/nvdimm/btt.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 9faaa9694d87..77db9795510f 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -286,8 +286,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) disk->queue = q; disk->flags = GENHD_FL_EXT_DEVT; nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name); - set_capacity(disk, 0); - device_add_disk(dev, disk); if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk)) return -ENOMEM; @@ -300,6 +298,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) } set_capacity(disk, available_disk_size >> SECTOR_SHIFT); + device_add_disk(dev, disk); revalidate_disk(disk); return 0; } diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 7121453ec047..0c46ada027cf 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1392,8 +1392,6 @@ static int btt_blk_init(struct btt *btt) queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue); btt->btt_queue->queuedata = btt; - set_capacity(btt->btt_disk, 0); - device_add_disk(&btt->nd_btt->dev, btt->btt_disk); if (btt_meta_size(btt)) { int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt)); @@ -1405,6 +1403,7 @@ static int btt_blk_init(struct btt *btt) } } set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); + device_add_disk(&btt->nd_btt->dev, btt->btt_disk); btt->nd_btt->size = btt->nlba * (u64)btt->sector_size; revalidate_disk(btt->btt_disk); From 0a9be1b13232aa76c5561513415029da4e513f75 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Wed, 28 Feb 2018 21:15:20 +0100 Subject: [PATCH 35/68] brcmfmac: fix P2P_DEVICE ethernet address generation commit 455f3e76cfc0d893585a5f358b9ddbe9c1e1e53b upstream. The firmware has a requirement that the P2P_DEVICE address should be different from the address of the primary interface. When not specified by user-space, the driver generates the MAC address for the P2P_DEVICE interface using the MAC address of the primary interface and setting the locally administered bit. However, the MAC address of the primary interface may already have that bit set causing the creation of the P2P_DEVICE interface to fail with -EBUSY. Fix this by using a random address instead to determine the P2P_DEVICE address. Cc: stable@vger.kernel.org # 3.10.y Reported-by: Hans de Goede Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- .../broadcom/brcm80211/brcmfmac/p2p.c | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index 85d949e03f79..f78d91b69287 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -462,25 +462,23 @@ static int brcmf_p2p_set_firmware(struct brcmf_if *ifp, u8 *p2p_mac) * @dev_addr: optional device address. * * P2P needs mac addresses for P2P device and interface. If no device - * address it specified, these are derived from the primary net device, ie. - * the permanent ethernet address of the device. + * address it specified, these are derived from a random ethernet + * address. */ static void brcmf_p2p_generate_bss_mac(struct brcmf_p2p_info *p2p, u8 *dev_addr) { - struct brcmf_if *pri_ifp = p2p->bss_idx[P2PAPI_BSSCFG_PRIMARY].vif->ifp; - bool local_admin = false; + bool random_addr = false; - if (!dev_addr || is_zero_ether_addr(dev_addr)) { - dev_addr = pri_ifp->mac_addr; - local_admin = true; - } + if (!dev_addr || is_zero_ether_addr(dev_addr)) + random_addr = true; - /* Generate the P2P Device Address. This consists of the device's - * primary MAC address with the locally administered bit set. + /* Generate the P2P Device Address obtaining a random ethernet + * address with the locally administered bit set. */ - memcpy(p2p->dev_addr, dev_addr, ETH_ALEN); - if (local_admin) - p2p->dev_addr[0] |= 0x02; + if (random_addr) + eth_random_addr(p2p->dev_addr); + else + memcpy(p2p->dev_addr, dev_addr, ETH_ALEN); /* Generate the P2P Interface Address. If the discovery and connection * BSSCFGs need to simultaneously co-exist, then this address must be From 0e17fddb648afe74a4fc94789b7b70cf854e8903 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 22 Feb 2018 14:28:59 -0600 Subject: [PATCH 36/68] rtlwifi: rtl8723be: Fix loss of signal commit 78dc897b7ee67205423dbbc6b56be49fb18d15b5 upstream. In commit c713fb071edc ("rtlwifi: rtl8821ae: Fix connection lost problem correctly") a problem in rtl8821ae that caused loss of signal was fixed. That same problem has now been reported for rtl8723be. Accordingly, the ASPM L1 latency has been increased from 0 to 7 to fix the instability. Signed-off-by: Larry Finger Cc: Stable Tested-by: James Cameron Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c index aba60c3145c5..618e509e75d6 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c @@ -1125,7 +1125,8 @@ static void _rtl8723be_enable_aspm_back_door(struct ieee80211_hw *hw) /* Configuration Space offset 0x70f BIT7 is used to control L0S */ tmp8 = _rtl8723be_dbi_read(rtlpriv, 0x70f); - _rtl8723be_dbi_write(rtlpriv, 0x70f, tmp8 | BIT(7)); + _rtl8723be_dbi_write(rtlpriv, 0x70f, tmp8 | BIT(7) | + ASPM_L1_LATENCY << 3); /* Configuration Space offset 0x719 Bit3 is for L1 * BIT4 is for clock request From d434dae76129e2466dd1dab294ed0acc8e49939e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 17 Mar 2018 21:38:10 +0900 Subject: [PATCH 37/68] tracing: probeevent: Fix to support minus offset from symbol commit c5d343b6b7badd1f5fe0873eff2e8d63a193e732 upstream. In Documentation/trace/kprobetrace.txt, it says @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) However, the parser doesn't parse minus offset correctly, since commit 2fba0c8867af ("tracing/kprobes: Fix probe offset to be unsigned") drops minus ("-") offset support for kprobe probe address usage. This fixes the traceprobe_split_symbol_offset() to parse minus offset again with checking the offset range, and add a minus offset check in kprobe probe address usage. Link: http://lkml.kernel.org/r/152129028983.31874.13419301530285775521.stgit@devbox Cc: Ingo Molnar Cc: Tom Zanussi Cc: Arnaldo Carvalho de Melo Cc: Ravi Bangoria Cc: stable@vger.kernel.org Fixes: 2fba0c8867af ("tracing/kprobes: Fix probe offset to be unsigned") Acked-by: Namhyung Kim Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_kprobe.c | 4 ++-- kernel/trace/trace_probe.c | 8 +++----- kernel/trace/trace_probe.h | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5ff45cae4ac4..ea3ed03fed7e 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -607,7 +607,7 @@ static int create_trace_kprobe(int argc, char **argv) bool is_return = false, is_delete = false; char *symbol = NULL, *event = NULL, *group = NULL; char *arg; - unsigned long offset = 0; + long offset = 0; void *addr = NULL; char buf[MAX_EVENT_NAME_LEN]; @@ -675,7 +675,7 @@ static int create_trace_kprobe(int argc, char **argv) symbol = argv[1]; /* TODO: support .init module functions */ ret = traceprobe_split_symbol_offset(symbol, &offset); - if (ret) { + if (ret || offset < 0 || offset > UINT_MAX) { pr_info("Failed to parse either an address or a symbol.\n"); return ret; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 8c0553d9afd3..5ea191b917e9 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -319,7 +319,7 @@ static fetch_func_t get_fetch_size_function(const struct fetch_type *type, } /* Split symbol and offset. */ -int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset) +int traceprobe_split_symbol_offset(char *symbol, long *offset) { char *tmp; int ret; @@ -327,13 +327,11 @@ int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset) if (!offset) return -EINVAL; - tmp = strchr(symbol, '+'); + tmp = strpbrk(symbol, "+-"); if (tmp) { - /* skip sign because kstrtoul doesn't accept '+' */ - ret = kstrtoul(tmp + 1, 0, offset); + ret = kstrtol(tmp, 0, offset); if (ret) return ret; - *tmp = '\0'; } else *offset = 0; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 0c0ae54d44c6..2b84c0de92c7 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -354,7 +354,7 @@ extern int traceprobe_conflict_field_name(const char *name, extern void traceprobe_update_arg(struct probe_arg *arg); extern void traceprobe_free_probe_arg(struct probe_arg *arg); -extern int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset); +extern int traceprobe_split_symbol_offset(char *symbol, long *offset); extern ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, From 9eab80eded7bb767f2f0770b3380e6affd3dc2c5 Mon Sep 17 00:00:00 2001 From: OuYang ZhiZhong Date: Sun, 11 Mar 2018 15:59:07 +0800 Subject: [PATCH 38/68] mtdchar: fix usage of mtd_ooblayout_ecc() commit 6de564939e14327148e31ddcf769e34105176447 upstream. Section was not properly computed. The value of OOB region definition is always ECC section 0 information in the OOB area, but we want to get all the ECC bytes information, so we should call mtd_ooblayout_ecc(mtd, section++, &oobregion) until it returns -ERANGE. Fixes: c2b78452a9db ("mtd: use mtd_ooblayout_xxx() helpers where appropriate") Cc: Signed-off-by: OuYang ZhiZhong Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/mtdchar.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 2a47a3f0e730..b4092eab53ac 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -487,7 +487,7 @@ static int shrink_ecclayout(struct mtd_info *mtd, for (i = 0; i < MTD_MAX_ECCPOS_ENTRIES;) { u32 eccpos; - ret = mtd_ooblayout_ecc(mtd, section, &oobregion); + ret = mtd_ooblayout_ecc(mtd, section++, &oobregion); if (ret < 0) { if (ret != -ERANGE) return ret; @@ -534,7 +534,7 @@ static int get_oobinfo(struct mtd_info *mtd, struct nand_oobinfo *to) for (i = 0; i < ARRAY_SIZE(to->eccpos);) { u32 eccpos; - ret = mtd_ooblayout_ecc(mtd, section, &oobregion); + ret = mtd_ooblayout_ecc(mtd, section++, &oobregion); if (ret < 0) { if (ret != -ERANGE) return ret; From 9b5dd849509b3df53d0b98eac79d86cdeb54ef2c Mon Sep 17 00:00:00 2001 From: Jagdish Gediya Date: Wed, 21 Mar 2018 04:31:36 +0530 Subject: [PATCH 39/68] mtd: nand: fsl_ifc: Fix nand waitfunc return value commit fa8e6d58c5bc260f4369c6699683d69695daed0a upstream. As per the IFC hardware manual, Most significant 2 bytes in nand_fsr register are the outcome of NAND READ STATUS command. So status value need to be shifted and aligned as per the nand framework requirement. Fixes: 82771882d960 ("NAND Machine support for Integrated Flash Controller") Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Jagdish Gediya Reviewed-by: Prabhakar Kushwaha Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/fsl_ifc_nand.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index f8f12ccc6471..a4d78c57b1e1 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -656,6 +656,7 @@ static int fsl_ifc_wait(struct mtd_info *mtd, struct nand_chip *chip) struct fsl_ifc_ctrl *ctrl = priv->ctrl; struct fsl_ifc_runtime __iomem *ifc = ctrl->rregs; u32 nand_fsr; + int status; /* Use READ_STATUS command, but wait for the device to be ready */ ifc_out32((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | @@ -670,12 +671,12 @@ static int fsl_ifc_wait(struct mtd_info *mtd, struct nand_chip *chip) fsl_ifc_run_command(mtd); nand_fsr = ifc_in32(&ifc->ifc_nand.nand_fsr); - + status = nand_fsr >> 24; /* * The chip always seems to report that it is * write-protected, even when it is not. */ - return nand_fsr | NAND_STATUS_WP; + return status | NAND_STATUS_WP; } static int fsl_ifc_read_page(struct mtd_info *mtd, struct nand_chip *chip, From eca95cb6b47a4190d3cf110d51ba186eb40e3a44 Mon Sep 17 00:00:00 2001 From: Jagdish Gediya Date: Wed, 21 Mar 2018 05:51:46 +0530 Subject: [PATCH 40/68] mtd: nand: fsl_ifc: Fix eccstat array overflow for IFC ver >= 2.0.0 commit 843c3a59997f18060848b8632607dd04781b52d1 upstream. Number of ECC status registers i.e. (ECCSTATx) has been increased in IFC version 2.0.0 due to increase in SRAM size. This is causing eccstat array to over flow. So, replace eccstat array with u32 variable to make it fail-safe and independent of number of ECC status registers or SRAM size. Fixes: bccb06c353af ("mtd: nand: ifc: update bufnum mask for ver >= 2.0.0") Cc: stable@vger.kernel.org # 3.18+ Signed-off-by: Prabhakar Kushwaha Signed-off-by: Jagdish Gediya Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/fsl_ifc_nand.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index a4d78c57b1e1..97e6f37a742e 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -201,14 +201,9 @@ static int is_blank(struct mtd_info *mtd, unsigned int bufnum) /* returns nonzero if entire page is blank */ static int check_read_ecc(struct mtd_info *mtd, struct fsl_ifc_ctrl *ctrl, - u32 *eccstat, unsigned int bufnum) + u32 eccstat, unsigned int bufnum) { - u32 reg = eccstat[bufnum / 4]; - int errors; - - errors = (reg >> ((3 - bufnum % 4) * 8)) & 15; - - return errors; + return (eccstat >> ((3 - bufnum % 4) * 8)) & 15; } /* @@ -221,7 +216,7 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) struct fsl_ifc_ctrl *ctrl = priv->ctrl; struct fsl_ifc_nand_ctrl *nctrl = ifc_nand_ctrl; struct fsl_ifc_runtime __iomem *ifc = ctrl->rregs; - u32 eccstat[4]; + u32 eccstat; int i; /* set the chip select for NAND Transaction */ @@ -256,8 +251,8 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) if (nctrl->eccread) { int errors; int bufnum = nctrl->page & priv->bufnum_mask; - int sector = bufnum * chip->ecc.steps; - int sector_end = sector + chip->ecc.steps - 1; + int sector_start = bufnum * chip->ecc.steps; + int sector_end = sector_start + chip->ecc.steps - 1; __be32 *eccstat_regs; if (ctrl->version >= FSL_IFC_VERSION_2_0_0) @@ -265,10 +260,12 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) else eccstat_regs = ifc->ifc_nand.v1_nand_eccstat; - for (i = sector / 4; i <= sector_end / 4; i++) - eccstat[i] = ifc_in32(&eccstat_regs[i]); + eccstat = ifc_in32(&eccstat_regs[sector_start / 4]); + + for (i = sector_start; i <= sector_end; i++) { + if (i != sector_start && !(i % 4)) + eccstat = ifc_in32(&eccstat_regs[i / 4]); - for (i = sector; i <= sector_end; i++) { errors = check_read_ecc(mtd, ctrl, eccstat, i); if (errors == 15) { From 4d9ed68855f0844af153d70424e9e6160fe6f61c Mon Sep 17 00:00:00 2001 From: Jagdish Gediya Date: Thu, 22 Mar 2018 01:08:10 +0530 Subject: [PATCH 41/68] mtd: nand: fsl_ifc: Read ECCSTAT0 and ECCSTAT1 registers for IFC 2.0 commit 6b00c35138b404be98b85f4a703be594cbed501c upstream. Due to missing information in Hardware manual, current implementation doesn't read ECCSTAT0 and ECCSTAT1 registers for IFC 2.0. Add support to read ECCSTAT0 and ECCSTAT1 registers during ecccheck for IFC 2.0. Fixes: 656441478ed5 ("mtd: nand: ifc: Fix location of eccstat registers for IFC V1.0") Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Jagdish Gediya Reviewed-by: Prabhakar Kushwaha Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/fsl_ifc_nand.c | 6 +----- include/linux/fsl_ifc.h | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index 97e6f37a742e..2f6b55229d5b 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -255,11 +255,7 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) int sector_end = sector_start + chip->ecc.steps - 1; __be32 *eccstat_regs; - if (ctrl->version >= FSL_IFC_VERSION_2_0_0) - eccstat_regs = ifc->ifc_nand.v2_nand_eccstat; - else - eccstat_regs = ifc->ifc_nand.v1_nand_eccstat; - + eccstat_regs = ifc->ifc_nand.nand_eccstat; eccstat = ifc_in32(&eccstat_regs[sector_start / 4]); for (i = sector_start; i <= sector_end; i++) { diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h index c332f0a45607..3fdfede2f0f3 100644 --- a/include/linux/fsl_ifc.h +++ b/include/linux/fsl_ifc.h @@ -734,11 +734,7 @@ struct fsl_ifc_nand { u32 res19[0x10]; __be32 nand_fsr; u32 res20; - /* The V1 nand_eccstat is actually 4 words that overlaps the - * V2 nand_eccstat. - */ - __be32 v1_nand_eccstat[2]; - __be32 v2_nand_eccstat[6]; + __be32 nand_eccstat[8]; u32 res21[0x1c]; __be32 nanndcr; u32 res22[0x2]; From 5e7124c4d6786488198b192f90491e5a5ba51230 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 19 Mar 2018 14:07:45 +0300 Subject: [PATCH 42/68] staging: ncpfs: memory corruption in ncp_read_kernel() commit 4c41aa24baa4ed338241d05494f2c595c885af8f upstream. If the server is malicious then *bytes_read could be larger than the size of the "target" buffer. It would lead to memory corruption when we do the memcpy(). Reported-by: Dr Silvio Cesare of InfoSect Signed-off-by: Dan Carpenter Cc: stable Signed-off-by: Greg Kroah-Hartman --- fs/ncpfs/ncplib_kernel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c index 88dbbc9fcf4d..f571570a2e72 100644 --- a/fs/ncpfs/ncplib_kernel.c +++ b/fs/ncpfs/ncplib_kernel.c @@ -980,6 +980,10 @@ ncp_read_kernel(struct ncp_server *server, const char *file_id, goto out; } *bytes_read = ncp_reply_be16(server, 0); + if (*bytes_read > to_read) { + result = -EINVAL; + goto out; + } source = ncp_reply_data(server, 2 + (offset & 1)); memcpy(target, source, *bytes_read); From a452b356c58cdec58ff3a628be725755b0715540 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 1 Mar 2018 19:34:00 +0100 Subject: [PATCH 43/68] can: ifi: Repair the error handling commit 880dd464b4304583c557c4e5f5ecebfd55d232b1 upstream. The new version of the IFI CANFD core has significantly less complex error state indication logic. In particular, the warning/error state bits are no longer all over the place, but are all present in the STATUS register. Moreover, there is a new IRQ register bit indicating transition between error states (active/warning/passive/busoff). This patch makes use of this bit to weed out the obscure selective INTERRUPT register clearing, which was used to carry over the error state indication into the poll function. While at it, this patch fixes the handling of the ACTIVE state, since the hardware provides indication of the core being in ACTIVE state and that in turn fixes the state transition indication toward userspace. Finally, register reads in the poll function are moved to the matching subfunctions since those are also no longer needed in the poll function. Signed-off-by: Marek Vasut Cc: Heiko Schocher Cc: Markus Marb Cc: Marc Kleine-Budde Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/ifi_canfd/ifi_canfd.c | 64 ++++++++++++++++----------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index c06ef438f23f..acfa7bfb98af 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -30,6 +30,7 @@ #define IFI_CANFD_STCMD_ERROR_ACTIVE BIT(2) #define IFI_CANFD_STCMD_ERROR_PASSIVE BIT(3) #define IFI_CANFD_STCMD_BUSOFF BIT(4) +#define IFI_CANFD_STCMD_ERROR_WARNING BIT(5) #define IFI_CANFD_STCMD_BUSMONITOR BIT(16) #define IFI_CANFD_STCMD_LOOPBACK BIT(18) #define IFI_CANFD_STCMD_DISABLE_CANFD BIT(24) @@ -52,7 +53,10 @@ #define IFI_CANFD_TXSTCMD_OVERFLOW BIT(13) #define IFI_CANFD_INTERRUPT 0xc +#define IFI_CANFD_INTERRUPT_ERROR_BUSOFF BIT(0) #define IFI_CANFD_INTERRUPT_ERROR_WARNING BIT(1) +#define IFI_CANFD_INTERRUPT_ERROR_STATE_CHG BIT(2) +#define IFI_CANFD_INTERRUPT_ERROR_REC_TEC_INC BIT(3) #define IFI_CANFD_INTERRUPT_ERROR_COUNTER BIT(10) #define IFI_CANFD_INTERRUPT_TXFIFO_EMPTY BIT(16) #define IFI_CANFD_INTERRUPT_TXFIFO_REMOVE BIT(22) @@ -61,6 +65,10 @@ #define IFI_CANFD_INTERRUPT_SET_IRQ ((u32)BIT(31)) #define IFI_CANFD_IRQMASK 0x10 +#define IFI_CANFD_IRQMASK_ERROR_BUSOFF BIT(0) +#define IFI_CANFD_IRQMASK_ERROR_WARNING BIT(1) +#define IFI_CANFD_IRQMASK_ERROR_STATE_CHG BIT(2) +#define IFI_CANFD_IRQMASK_ERROR_REC_TEC_INC BIT(3) #define IFI_CANFD_IRQMASK_SET_ERR BIT(7) #define IFI_CANFD_IRQMASK_SET_TS BIT(15) #define IFI_CANFD_IRQMASK_TXFIFO_EMPTY BIT(16) @@ -220,7 +228,10 @@ static void ifi_canfd_irq_enable(struct net_device *ndev, bool enable) if (enable) { enirq = IFI_CANFD_IRQMASK_TXFIFO_EMPTY | - IFI_CANFD_IRQMASK_RXFIFO_NEMPTY; + IFI_CANFD_IRQMASK_RXFIFO_NEMPTY | + IFI_CANFD_IRQMASK_ERROR_STATE_CHG | + IFI_CANFD_IRQMASK_ERROR_WARNING | + IFI_CANFD_IRQMASK_ERROR_BUSOFF; if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) enirq |= IFI_CANFD_INTERRUPT_ERROR_COUNTER; } @@ -361,12 +372,13 @@ static int ifi_canfd_handle_lost_msg(struct net_device *ndev) return 1; } -static int ifi_canfd_handle_lec_err(struct net_device *ndev, const u32 errctr) +static int ifi_canfd_handle_lec_err(struct net_device *ndev) { struct ifi_canfd_priv *priv = netdev_priv(ndev); struct net_device_stats *stats = &ndev->stats; struct can_frame *cf; struct sk_buff *skb; + u32 errctr = readl(priv->base + IFI_CANFD_ERROR_CTR); const u32 errmask = IFI_CANFD_ERROR_CTR_OVERLOAD_FIRST | IFI_CANFD_ERROR_CTR_ACK_ERROR_FIRST | IFI_CANFD_ERROR_CTR_BIT0_ERROR_FIRST | @@ -449,6 +461,11 @@ static int ifi_canfd_handle_state_change(struct net_device *ndev, switch (new_state) { case CAN_STATE_ERROR_ACTIVE: + /* error active state */ + priv->can.can_stats.error_warning++; + priv->can.state = CAN_STATE_ERROR_ACTIVE; + break; + case CAN_STATE_ERROR_WARNING: /* error warning state */ priv->can.can_stats.error_warning++; priv->can.state = CAN_STATE_ERROR_WARNING; @@ -477,7 +494,7 @@ static int ifi_canfd_handle_state_change(struct net_device *ndev, ifi_canfd_get_berr_counter(ndev, &bec); switch (new_state) { - case CAN_STATE_ERROR_ACTIVE: + case CAN_STATE_ERROR_WARNING: /* error warning state */ cf->can_id |= CAN_ERR_CRTL; cf->data[1] = (bec.txerr > bec.rxerr) ? @@ -510,22 +527,21 @@ static int ifi_canfd_handle_state_change(struct net_device *ndev, return 1; } -static int ifi_canfd_handle_state_errors(struct net_device *ndev, u32 stcmd) +static int ifi_canfd_handle_state_errors(struct net_device *ndev) { struct ifi_canfd_priv *priv = netdev_priv(ndev); + u32 stcmd = readl(priv->base + IFI_CANFD_STCMD); int work_done = 0; - u32 isr; - /* - * The ErrWarn condition is a little special, since the bit is - * located in the INTERRUPT register instead of STCMD register. - */ - isr = readl(priv->base + IFI_CANFD_INTERRUPT); - if ((isr & IFI_CANFD_INTERRUPT_ERROR_WARNING) && + if ((stcmd & IFI_CANFD_STCMD_ERROR_ACTIVE) && + (priv->can.state != CAN_STATE_ERROR_ACTIVE)) { + netdev_dbg(ndev, "Error, entered active state\n"); + work_done += ifi_canfd_handle_state_change(ndev, + CAN_STATE_ERROR_ACTIVE); + } + + if ((stcmd & IFI_CANFD_STCMD_ERROR_WARNING) && (priv->can.state != CAN_STATE_ERROR_WARNING)) { - /* Clear the interrupt */ - writel(IFI_CANFD_INTERRUPT_ERROR_WARNING, - priv->base + IFI_CANFD_INTERRUPT); netdev_dbg(ndev, "Error, entered warning state\n"); work_done += ifi_canfd_handle_state_change(ndev, CAN_STATE_ERROR_WARNING); @@ -552,18 +568,11 @@ static int ifi_canfd_poll(struct napi_struct *napi, int quota) { struct net_device *ndev = napi->dev; struct ifi_canfd_priv *priv = netdev_priv(ndev); - const u32 stcmd_state_mask = IFI_CANFD_STCMD_ERROR_PASSIVE | - IFI_CANFD_STCMD_BUSOFF; + u32 rxstcmd = readl(priv->base + IFI_CANFD_RXSTCMD); int work_done = 0; - u32 stcmd = readl(priv->base + IFI_CANFD_STCMD); - u32 rxstcmd = readl(priv->base + IFI_CANFD_RXSTCMD); - u32 errctr = readl(priv->base + IFI_CANFD_ERROR_CTR); - /* Handle bus state changes */ - if ((stcmd & stcmd_state_mask) || - ((stcmd & IFI_CANFD_STCMD_ERROR_ACTIVE) == 0)) - work_done += ifi_canfd_handle_state_errors(ndev, stcmd); + work_done += ifi_canfd_handle_state_errors(ndev); /* Handle lost messages on RX */ if (rxstcmd & IFI_CANFD_RXSTCMD_OVERFLOW) @@ -571,7 +580,7 @@ static int ifi_canfd_poll(struct napi_struct *napi, int quota) /* Handle lec errors on the bus */ if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) - work_done += ifi_canfd_handle_lec_err(ndev, errctr); + work_done += ifi_canfd_handle_lec_err(ndev); /* Handle normal messages on RX */ if (!(rxstcmd & IFI_CANFD_RXSTCMD_EMPTY)) @@ -592,12 +601,13 @@ static irqreturn_t ifi_canfd_isr(int irq, void *dev_id) struct net_device_stats *stats = &ndev->stats; const u32 rx_irq_mask = IFI_CANFD_INTERRUPT_RXFIFO_NEMPTY | IFI_CANFD_INTERRUPT_RXFIFO_NEMPTY_PER | + IFI_CANFD_INTERRUPT_ERROR_COUNTER | + IFI_CANFD_INTERRUPT_ERROR_STATE_CHG | IFI_CANFD_INTERRUPT_ERROR_WARNING | - IFI_CANFD_INTERRUPT_ERROR_COUNTER; + IFI_CANFD_INTERRUPT_ERROR_BUSOFF; const u32 tx_irq_mask = IFI_CANFD_INTERRUPT_TXFIFO_EMPTY | IFI_CANFD_INTERRUPT_TXFIFO_REMOVE; - const u32 clr_irq_mask = ~((u32)(IFI_CANFD_INTERRUPT_SET_IRQ | - IFI_CANFD_INTERRUPT_ERROR_WARNING)); + const u32 clr_irq_mask = ~((u32)IFI_CANFD_INTERRUPT_SET_IRQ); u32 isr; isr = readl(priv->base + IFI_CANFD_INTERRUPT); From 6e400b460a92fd662548edd23d819fd5cc11b208 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 5 Mar 2018 21:29:52 +0100 Subject: [PATCH 44/68] can: ifi: Check core revision upon probe commit 591d65d5b15496af8d05e252bc1da611c66c0b79 upstream. Older versions of the core are not compatible with the driver due to various intrusive fixes of the core. Read out the VER register, check the core revision bitfield and verify if the core in use is new enough (rev 2.1 or newer) to work correctly with this driver. Signed-off-by: Marek Vasut Cc: Heiko Schocher Cc: Markus Marb Cc: Marc Kleine-Budde Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/ifi_canfd/ifi_canfd.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index acfa7bfb98af..6c676403f823 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -144,6 +144,8 @@ #define IFI_CANFD_SYSCLOCK 0x50 #define IFI_CANFD_VER 0x54 +#define IFI_CANFD_VER_REV_MASK 0xff +#define IFI_CANFD_VER_REV_MIN_SUPPORTED 0x15 #define IFI_CANFD_IP_ID 0x58 #define IFI_CANFD_IP_ID_VALUE 0xD073CAFD @@ -943,7 +945,7 @@ static int ifi_canfd_plat_probe(struct platform_device *pdev) struct resource *res; void __iomem *addr; int irq, ret; - u32 id; + u32 id, rev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); addr = devm_ioremap_resource(dev, res); @@ -957,6 +959,13 @@ static int ifi_canfd_plat_probe(struct platform_device *pdev) return -EINVAL; } + rev = readl(addr + IFI_CANFD_VER) & IFI_CANFD_VER_REV_MASK; + if (rev < IFI_CANFD_VER_REV_MIN_SUPPORTED) { + dev_err(dev, "This block is too old (rev %i), minimum supported is rev %i\n", + rev, IFI_CANFD_VER_REV_MIN_SUPPORTED); + return -EINVAL; + } + ndev = alloc_candev(sizeof(*priv), 1); if (!ndev) return -ENOMEM; From 01a303d27a9c5fc80d7bdf2f2961e9ad14b608dc Mon Sep 17 00:00:00 2001 From: Andri Yngvason Date: Wed, 14 Mar 2018 11:52:56 +0000 Subject: [PATCH 45/68] can: cc770: Fix stalls on rt-linux, remove redundant IRQ ack commit f4353daf4905c0099fd25fa742e2ffd4a4bab26a upstream. This has been reported to cause stalls on rt-linux. Suggested-by: Richard Weinberger Tested-by: Richard Weinberger Signed-off-by: Andri Yngvason Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/cc770/cc770.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c index 1e37313054f3..9fed163262e0 100644 --- a/drivers/net/can/cc770/cc770.c +++ b/drivers/net/can/cc770/cc770.c @@ -447,15 +447,6 @@ static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev) stats->tx_bytes += dlc; - - /* - * HM: We had some cases of repeated IRQs so make sure the - * INT is acknowledged I know it's already further up, but - * doing again fixed the issue - */ - cc770_write_reg(priv, msgobj[mo].ctrl0, - MSGVAL_UNC | TXIE_UNC | RXIE_UNC | INTPND_RES); - return NETDEV_TX_OK; } @@ -684,12 +675,6 @@ static void cc770_tx_interrupt(struct net_device *dev, unsigned int o) /* Nothing more to send, switch off interrupts */ cc770_write_reg(priv, msgobj[mo].ctrl0, MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES); - /* - * We had some cases of repeated IRQ so make sure the - * INT is acknowledged - */ - cc770_write_reg(priv, msgobj[mo].ctrl0, - MSGVAL_UNC | TXIE_UNC | RXIE_UNC | INTPND_RES); stats->tx_packets++; can_get_echo_skb(dev, 0); From 5fdbcc3d6db6f17ec43ef200353208a3a9cf7234 Mon Sep 17 00:00:00 2001 From: Andri Yngvason Date: Wed, 14 Mar 2018 11:52:57 +0000 Subject: [PATCH 46/68] can: cc770: Fix queue stall & dropped RTR reply commit 746201235b3f876792099079f4c6fea941d76183 upstream. While waiting for the TX object to send an RTR, an external message with a matching id can overwrite the TX data. In this case we must call the rx routine and then try transmitting the message that was overwritten again. The queue was being stalled because the RX event did not generate an interrupt to wake up the queue again and the TX event did not happen because the TXRQST flag is reset by the chip when new data is received. According to the CC770 datasheet the id of a message object should not be changed while the MSGVAL bit is set. This has been fixed by resetting the MSGVAL bit before modifying the object in the transmit function and setting it after. It is not enough to set & reset CPUUPD. It is important to keep the MSGVAL bit reset while the message object is being modified. Otherwise, during RTR transmission, a frame with matching id could trigger an rx-interrupt, which would cause a race condition between the interrupt routine and the transmit function. Signed-off-by: Andri Yngvason Tested-by: Richard Weinberger Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/cc770/cc770.c | 94 ++++++++++++++++++++++++----------- drivers/net/can/cc770/cc770.h | 2 + 2 files changed, 68 insertions(+), 28 deletions(-) diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c index 9fed163262e0..2743d82d4424 100644 --- a/drivers/net/can/cc770/cc770.c +++ b/drivers/net/can/cc770/cc770.c @@ -390,37 +390,23 @@ static int cc770_get_berr_counter(const struct net_device *dev, return 0; } -static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev) +static void cc770_tx(struct net_device *dev, int mo) { struct cc770_priv *priv = netdev_priv(dev); - struct net_device_stats *stats = &dev->stats; - struct can_frame *cf = (struct can_frame *)skb->data; - unsigned int mo = obj2msgobj(CC770_OBJ_TX); + struct can_frame *cf = (struct can_frame *)priv->tx_skb->data; u8 dlc, rtr; u32 id; int i; - if (can_dropped_invalid_skb(dev, skb)) - return NETDEV_TX_OK; - - if ((cc770_read_reg(priv, - msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) { - netdev_err(dev, "TX register is still occupied!\n"); - return NETDEV_TX_BUSY; - } - - netif_stop_queue(dev); - dlc = cf->can_dlc; id = cf->can_id; - if (cf->can_id & CAN_RTR_FLAG) - rtr = 0; - else - rtr = MSGCFG_DIR; + rtr = cf->can_id & CAN_RTR_FLAG ? 0 : MSGCFG_DIR; + + cc770_write_reg(priv, msgobj[mo].ctrl0, + MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES); cc770_write_reg(priv, msgobj[mo].ctrl1, RMTPND_RES | TXRQST_RES | CPUUPD_SET | NEWDAT_RES); - cc770_write_reg(priv, msgobj[mo].ctrl0, - MSGVAL_SET | TXIE_SET | RXIE_RES | INTPND_RES); + if (id & CAN_EFF_FLAG) { id &= CAN_EFF_MASK; cc770_write_reg(priv, msgobj[mo].config, @@ -439,13 +425,30 @@ static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev) for (i = 0; i < dlc; i++) cc770_write_reg(priv, msgobj[mo].data[i], cf->data[i]); - /* Store echo skb before starting the transfer */ - can_put_echo_skb(skb, dev, 0); - cc770_write_reg(priv, msgobj[mo].ctrl1, - RMTPND_RES | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC); + RMTPND_UNC | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC); + cc770_write_reg(priv, msgobj[mo].ctrl0, + MSGVAL_SET | TXIE_SET | RXIE_SET | INTPND_UNC); +} - stats->tx_bytes += dlc; +static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct cc770_priv *priv = netdev_priv(dev); + unsigned int mo = obj2msgobj(CC770_OBJ_TX); + + if (can_dropped_invalid_skb(dev, skb)) + return NETDEV_TX_OK; + + netif_stop_queue(dev); + + if ((cc770_read_reg(priv, + msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) { + netdev_err(dev, "TX register is still occupied!\n"); + return NETDEV_TX_BUSY; + } + + priv->tx_skb = skb; + cc770_tx(dev, mo); return NETDEV_TX_OK; } @@ -671,13 +674,47 @@ static void cc770_tx_interrupt(struct net_device *dev, unsigned int o) struct cc770_priv *priv = netdev_priv(dev); struct net_device_stats *stats = &dev->stats; unsigned int mo = obj2msgobj(o); + struct can_frame *cf; + u8 ctrl1; + + ctrl1 = cc770_read_reg(priv, msgobj[mo].ctrl1); - /* Nothing more to send, switch off interrupts */ cc770_write_reg(priv, msgobj[mo].ctrl0, MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES); + cc770_write_reg(priv, msgobj[mo].ctrl1, + RMTPND_RES | TXRQST_RES | MSGLST_RES | NEWDAT_RES); - stats->tx_packets++; + if (unlikely(!priv->tx_skb)) { + netdev_err(dev, "missing tx skb in tx interrupt\n"); + return; + } + + if (unlikely(ctrl1 & MSGLST_SET)) { + stats->rx_over_errors++; + stats->rx_errors++; + } + + /* When the CC770 is sending an RTR message and it receives a regular + * message that matches the id of the RTR message, it will overwrite the + * outgoing message in the TX register. When this happens we must + * process the received message and try to transmit the outgoing skb + * again. + */ + if (unlikely(ctrl1 & NEWDAT_SET)) { + cc770_rx(dev, mo, ctrl1); + cc770_tx(dev, mo); + return; + } + + can_put_echo_skb(priv->tx_skb, dev, 0); can_get_echo_skb(dev, 0); + + cf = (struct can_frame *)priv->tx_skb->data; + stats->tx_bytes += cf->can_dlc; + stats->tx_packets++; + + priv->tx_skb = NULL; + netif_wake_queue(dev); } @@ -789,6 +826,7 @@ struct net_device *alloc_cc770dev(int sizeof_priv) priv->can.do_set_bittiming = cc770_set_bittiming; priv->can.do_set_mode = cc770_set_mode; priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES; + priv->tx_skb = NULL; memcpy(priv->obj_flags, cc770_obj_flags, sizeof(cc770_obj_flags)); diff --git a/drivers/net/can/cc770/cc770.h b/drivers/net/can/cc770/cc770.h index a1739db98d91..95752e1d1283 100644 --- a/drivers/net/can/cc770/cc770.h +++ b/drivers/net/can/cc770/cc770.h @@ -193,6 +193,8 @@ struct cc770_priv { u8 cpu_interface; /* CPU interface register */ u8 clkout; /* Clock out register */ u8 bus_config; /* Bus conffiguration register */ + + struct sk_buff *tx_skb; }; struct net_device *alloc_cc770dev(int sizeof_priv); From 101a72edd98d2f87d112f21f81f796f584f1bd58 Mon Sep 17 00:00:00 2001 From: Andri Yngvason Date: Thu, 15 Mar 2018 18:23:17 +0000 Subject: [PATCH 47/68] can: cc770: Fix use after free in cc770_tx_interrupt() commit 9ffd7503944ec7c0ef41c3245d1306c221aef2be upstream. This fixes use after free introduced by the last cc770 patch. Signed-off-by: Andri Yngvason Fixes: 746201235b3f ("can: cc770: Fix queue stall & dropped RTR reply") Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/cc770/cc770.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c index 2743d82d4424..6da69af103e6 100644 --- a/drivers/net/can/cc770/cc770.c +++ b/drivers/net/can/cc770/cc770.c @@ -706,13 +706,12 @@ static void cc770_tx_interrupt(struct net_device *dev, unsigned int o) return; } - can_put_echo_skb(priv->tx_skb, dev, 0); - can_get_echo_skb(dev, 0); - cf = (struct can_frame *)priv->tx_skb->data; stats->tx_bytes += cf->can_dlc; stats->tx_packets++; + can_put_echo_skb(priv->tx_skb, dev, 0); + can_get_echo_skb(dev, 0); priv->tx_skb = NULL; netif_wake_queue(dev); From 7c28067736a24a19e0d646fea510357228e95910 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 24 Mar 2018 10:43:26 +0100 Subject: [PATCH 48/68] tty: vt: fix up tabstops properly commit f1869a890cdedb92a3fab969db5d0fd982850273 upstream. Tabs on a console with long lines do not wrap properly, so correctly account for the line length when computing the tab placement location. Reported-by: James Holderness Signed-off-by: Greg Kroah-Hartman Cc: stable Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index ce2c3c6349d4..68c7bb0b7991 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1727,7 +1727,7 @@ static void reset_terminal(struct vc_data *vc, int do_clear) default_attr(vc); update_attr(vc); - vc->vc_tab_stop[0] = 0x01010100; + vc->vc_tab_stop[0] = vc->vc_tab_stop[1] = vc->vc_tab_stop[2] = vc->vc_tab_stop[3] = @@ -1771,7 +1771,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) vc->vc_pos -= (vc->vc_x << 1); while (vc->vc_x < vc->vc_cols - 1) { vc->vc_x++; - if (vc->vc_tab_stop[vc->vc_x >> 5] & (1 << (vc->vc_x & 31))) + if (vc->vc_tab_stop[7 & (vc->vc_x >> 5)] & (1 << (vc->vc_x & 31))) break; } vc->vc_pos += (vc->vc_x << 1); @@ -1831,7 +1831,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) lf(vc); return; case 'H': - vc->vc_tab_stop[vc->vc_x >> 5] |= (1 << (vc->vc_x & 31)); + vc->vc_tab_stop[7 & (vc->vc_x >> 5)] |= (1 << (vc->vc_x & 31)); return; case 'Z': respond_ID(tty); @@ -2024,7 +2024,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) return; case 'g': if (!vc->vc_par[0]) - vc->vc_tab_stop[vc->vc_x >> 5] &= ~(1 << (vc->vc_x & 31)); + vc->vc_tab_stop[7 & (vc->vc_x >> 5)] &= ~(1 << (vc->vc_x & 31)); else if (vc->vc_par[0] == 3) { vc->vc_tab_stop[0] = vc->vc_tab_stop[1] = From c68a7a87e1e6553b7f65ad35acdaf31c84fc5b16 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 17 Mar 2018 08:25:07 -0700 Subject: [PATCH 49/68] selftests/x86/ptrace_syscall: Fix for yet more glibc interference commit 4b0b37d4cc54b21a6ecad7271cbc850555869c62 upstream. glibc keeps getting cleverer, and my version now turns raise() into more than one syscall. Since the test relies on ptrace seeing an exact set of syscalls, this breaks the test. Replace raise(SIGSTOP) with syscall(SYS_tgkill, ...) to force glibc to get out of our way. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/bc80338b453afa187bc5f895bd8e2c8d6e264da2.1521300271.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/ptrace_syscall.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index eaea92439708..1e3da137a8bb 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -182,8 +182,10 @@ static void test_ptrace_syscall_restart(void) if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0) err(1, "PTRACE_TRACEME"); + pid_t pid = getpid(), tid = syscall(SYS_gettid); + printf("\tChild will make one syscall\n"); - raise(SIGSTOP); + syscall(SYS_tgkill, pid, tid, SIGSTOP); syscall(SYS_gettid, 10, 11, 12, 13, 14, 15); _exit(0); @@ -300,9 +302,11 @@ static void test_restart_under_ptrace(void) if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0) err(1, "PTRACE_TRACEME"); + pid_t pid = getpid(), tid = syscall(SYS_gettid); + printf("\tChild will take a nap until signaled\n"); setsigign(SIGUSR1, SA_RESTART); - raise(SIGSTOP); + syscall(SYS_tgkill, pid, tid, SIGSTOP); syscall(SYS_pause, 0, 0, 0, 0, 0, 0); _exit(0); From 587da2b6282302325bd4bcc28a7615e822184ff1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 20 Mar 2018 12:16:59 -0700 Subject: [PATCH 50/68] kvm/x86: fix icebp instruction handling commit 32d43cd391bacb5f0814c2624399a5dad3501d09 upstream. The undocumented 'icebp' instruction (aka 'int1') works pretty much like 'int3' in the absense of in-circuit probing equipment (except, obviously, that it raises #DB instead of raising #BP), and is used by some validation test-suites as such. But Andy Lutomirski noticed that his test suite acted differently in kvm than on bare hardware. The reason is that kvm used an inexact test for the icebp instruction: it just assumed that an all-zero VM exit qualification value meant that the VM exit was due to icebp. That is not unlike the guess that do_debug() does for the actual exception handling case, but it's purely a heuristic, not an absolute rule. do_debug() does it because it wants to ascribe _some_ reasons to the #DB that happened, and an empty %dr6 value means that 'icebp' is the most likely casue and we have no better information. But kvm can just do it right, because unlike the do_debug() case, kvm actually sees the real reason for the #DB in the VM-exit interruption information field. So instead of relying on an inexact heuristic, just use the actual VM exit information that says "it was 'icebp'". Right now the 'icebp' instruction isn't technically documented by Intel, but that will hopefully change. The special "privileged software exception" information _is_ actually mentioned in the Intel SDM, even though the cause of it isn't enumerated. Reported-by: Andy Lutomirski Tested-by: Paolo Bonzini Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/vmx.h | 1 + arch/x86/kvm/vmx.c | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 6899cf187ba2..9cbfbef6a115 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -309,6 +309,7 @@ enum vmcs_field { #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ #define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ +#define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ #define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ /* GUEST_INTERRUPTIBILITY_INFO flags. */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0f3bb4632310..7ed422e2641b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1053,6 +1053,13 @@ static inline bool is_machine_check(u32 intr_info) (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); } +/* Undocumented: icebp/int1 */ +static inline bool is_icebp(u32 intr_info) +{ + return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) + == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK); +} + static inline bool cpu_has_vmx_msr_bitmap(void) { return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; @@ -5733,7 +5740,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { vcpu->arch.dr6 &= ~15; vcpu->arch.dr6 |= dr6 | DR6_RTM; - if (!(dr6 & ~DR6_RESERVED)) /* icebp */ + if (is_icebp(intr_info)) skip_emulated_instruction(vcpu); kvm_queue_exception(vcpu, DB_VECTOR); From 678b405bff8be2ccfc1d3b86ac9119049c1ea718 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Mon, 19 Mar 2018 13:57:46 -0700 Subject: [PATCH 51/68] x86/build/64: Force the linker to use 2MB page size commit e3d03598e8ae7d195af5d3d049596dec336f569f upstream. Binutils 2.31 will enable -z separate-code by default for x86 to avoid mixing code pages with data to improve cache performance as well as security. To reduce x86-64 executable and shared object sizes, the maximum page size is reduced from 2MB to 4KB. But x86-64 kernel must be aligned to 2MB. Pass -z max-page-size=0x200000 to linker to force 2MB page size regardless of the default page size used by linker. Tested with Linux kernel 4.15.6 on x86-64. Signed-off-by: H.J. Lu Cc: Andy Shevchenko Cc: Eric Biederman Cc: H. Peter Anvin Cc: Juergen Gross Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/CAMe9rOp4_%3D_8twdpTyAP2DhONOCeaTOsniJLoppzhoNptL8xzA@mail.gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/Makefile | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index b60996184fa4..f408babdf746 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -172,6 +172,15 @@ KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) LDFLAGS := -m elf_$(UTS_MACHINE) +# +# The 64-bit kernel must be aligned to 2MB. Pass -z max-page-size=0x200000 to +# the linker to force 2MB page size regardless of the default page size used +# by the linker. +# +ifdef CONFIG_X86_64 +LDFLAGS += $(call ld-option, -z max-page-size=0x200000) +endif + # Speed up the build KBUILD_CFLAGS += -pipe # Workaround for a gcc prelease that unfortunately was shipped in a suse release From f9ed24457265f2dac9fa80a20d8d546b201b5540 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Mon, 19 Mar 2018 14:08:11 -0700 Subject: [PATCH 52/68] x86/boot/64: Verify alignment of the LOAD segment commit c55b8550fa57ba4f5e507be406ff9fc2845713e8 upstream. Since the x86-64 kernel must be aligned to 2MB, refuse to boot the kernel if the alignment of the LOAD segment isn't a multiple of 2MB. Signed-off-by: H.J. Lu Cc: Andy Shevchenko Cc: Eric Biederman Cc: H. Peter Anvin Cc: Juergen Gross Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/CAMe9rOrR7xSJgUfiCoZLuqWUwymRxXPoGBW38%2BpN%3D9g%2ByKNhZw@mail.gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/misc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index c945acd8fa33..d86e68d3c794 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -299,6 +299,10 @@ static void parse_elf(void *output) switch (phdr->p_type) { case PT_LOAD: +#ifdef CONFIG_X86_64 + if ((phdr->p_align % 0x200000) != 0) + error("Alignment of LOAD segment isn't multiple of 2MB"); +#endif #ifdef CONFIG_RELOCATABLE dest = output; dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); From 3681c24a7d096b092cf05c8338adbb9019bb1536 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 23 Jul 2015 15:37:48 -0700 Subject: [PATCH 53/68] x86/entry/64: Don't use IST entry for #BP stack commit d8ba61ba58c88d5207c1ba2f7d9a2280e7d03be9 upstream. There's nothing IST-worthy about #BP/int3. We don't allow kprobes in the small handful of places in the kernel that run at CPL0 with an invalid stack, and 32-bit kernels have used normal interrupt gates for #BP forever. Furthermore, we don't allow kprobes in places that have usergs while in kernel mode, so "paranoid" is also unnecessary. Signed-off-by: Andy Lutomirski Signed-off-by: Linus Torvalds Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 2 +- arch/x86/kernel/traps.c | 24 +++++++++++------------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 58610fe93f5d..d58d8dcb8245 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -943,7 +943,7 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ #endif /* CONFIG_HYPERV */ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK -idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK +idtentry int3 do_int3 has_error_code=0 idtentry stack_segment do_stack_segment has_error_code=1 #ifdef CONFIG_XEN diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 322f433fbc76..f2142932ff0b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -526,7 +526,6 @@ do_general_protection(struct pt_regs *regs, long error_code) } NOKPROBE_SYMBOL(do_general_protection); -/* May run on IST stack. */ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { #ifdef CONFIG_DYNAMIC_FTRACE @@ -541,7 +540,15 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) if (poke_int3_handler(regs)) return; + /* + * Use ist_enter despite the fact that we don't use an IST stack. + * We can be called from a kprobe in non-CONTEXT_KERNEL kernel + * mode or even during context tracking state changes. + * + * This means that we can't schedule. That's okay. + */ ist_enter(regs); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, @@ -558,17 +565,11 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) SIGTRAP) == NOTIFY_STOP) goto exit; - /* - * Let others (NMI) know that the debug stack is in use - * as we may switch to the interrupt stack. - */ - debug_stack_usage_inc(); preempt_disable(); cond_local_irq_enable(regs); do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); cond_local_irq_disable(regs); preempt_enable_no_resched(); - debug_stack_usage_dec(); exit: ist_exit(regs); } @@ -989,19 +990,16 @@ void __init trap_init(void) cpu_init(); /* - * X86_TRAP_DB and X86_TRAP_BP have been set - * in early_trap_init(). However, ITS works only after - * cpu_init() loads TSS. See comments in early_trap_init(). + * X86_TRAP_DB was installed in early_trap_init(). However, + * IST works only after cpu_init() loads TSS. See comments + * in early_trap_init(). */ set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); - /* int3 can be called from all */ - set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); x86_init.irqs.trap_init(); #ifdef CONFIG_X86_64 memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16); set_nmi_gate(X86_TRAP_DB, &debug); - set_nmi_gate(X86_TRAP_BP, &int3); #endif } From b3076abb678614363985d58a4b839148a47f5e82 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 2 Mar 2018 07:22:30 -0800 Subject: [PATCH 54/68] perf/x86/intel/uncore: Fix Skylake UPI event format commit 317660940fd9dddd3201c2f92e25c27902c753fa upstream. There is no event extension (bit 21) for SKX UPI, so use 'event' instead of 'event_ext'. Reported-by: Stephane Eranian Signed-off-by: Kan Liang Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Fixes: cd34cd97b7b4 ("perf/x86/intel/uncore: Add Skylake server uncore support") Link: http://lkml.kernel.org/r/1520004150-4855-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/uncore_snbep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index afe8024e9e95..9c245dbc7d65 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3566,7 +3566,7 @@ static struct intel_uncore_type skx_uncore_imc = { }; static struct attribute *skx_upi_uncore_formats_attr[] = { - &format_attr_event_ext.attr, + &format_attr_event.attr, &format_attr_umask_ext.attr, &format_attr_edge.attr, &format_attr_inv.attr, From a8b3a6a4ae5e7b790cdc62335a6f5952c3f9b6ea Mon Sep 17 00:00:00 2001 From: Ilya Pronin Date: Mon, 5 Mar 2018 22:43:53 -0800 Subject: [PATCH 55/68] perf stat: Fix CVS output format for non-supported counters commit 40c21898ba5372c14ef71717040529794a91ccc2 upstream. When printing stats in CSV mode, 'perf stat' appends extra separators when a counter is not supported: ,,L1-dcache-store-misses,mesos/bd442f34-2b4a-47df-b966-9b281f9f56fc,0,100.00,,,, Which causes a failure when parsing fields. The numbers of separators should be the same for each line, no matter if the counter is or not supported. Signed-off-by: Ilya Pronin Acked-by: Jiri Olsa Cc: Andi Kleen Link: http://lkml.kernel.org/r/20180306064353.31930-1-xiyou.wangcong@gmail.com Fixes: 92a61f6412d3 ("perf stat: Implement CSV metrics output") Signed-off-by: Cong Wang Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/builtin-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5b60ec669e73..68861e81f06c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -876,7 +876,7 @@ static void print_metric_csv(void *ctx, char buf[64], *vals, *ends; if (unit == NULL || fmt == NULL) { - fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); + fprintf(out, "%s%s", csv_sep, csv_sep); return; } snprintf(buf, sizeof(buf), fmt, val); From e91ec3494168e1d4505f413effe0dfbbbcc64208 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 17 Mar 2018 14:52:16 +0300 Subject: [PATCH 56/68] perf/x86/intel: Don't accidentally clear high bits in bdw_limit_period() commit e5ea9b54a055619160bbfe527ebb7d7191823d66 upstream. We intended to clear the lowest 6 bits but because of a type bug we clear the high 32 bits as well. Andi says that periods are rarely more than U32_MAX so this bug probably doesn't have a huge runtime impact. Signed-off-by: Dan Carpenter Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 294fe0f52a44 ("perf/x86/intel: Add INST_RETIRED.ALL workarounds") Link: http://lkml.kernel.org/r/20180317115216.GB4035@mwanda Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 0bd0c1cc3228..6f353a874178 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3025,7 +3025,7 @@ static unsigned bdw_limit_period(struct perf_event *event, unsigned left) X86_CONFIG(.event=0xc0, .umask=0x01)) { if (left < 128) left = 128; - left &= ~0x3fu; + left &= ~0x3fULL; } return left; } From 9c0d0a0c79ee7e1c0834594a5a8f4e701067828f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 13 Mar 2018 11:51:34 -0700 Subject: [PATCH 57/68] perf/x86/intel/uncore: Fix multi-domain PCI CHA enumeration bug on Skylake servers commit 320b0651f32b830add6497fcdcfdcb6ae8c7b8a0 upstream. The number of CHAs is miscalculated on multi-domain PCI Skylake server systems, resulting in an uncore driver initialization error. Gary Kroening explains: "For systems with a single PCI segment, it is sufficient to look for the bus number to change in order to determine that all of the CHa's have been counted for a single socket. However, for multi PCI segment systems, each socket is given a new segment and the bus number does NOT change. So looking only for the bus number to change ends up counting all of the CHa's on all sockets in the system. This leads to writing CPU MSRs beyond a valid range and causes an error in ivbep_uncore_msr_init_box()." To fix this bug, query the number of CHAs from the CAPID6 register: it should read bits 27:0 in the CAPID6 register located at Device 30, Function 3, Offset 0x9C. These 28 bits form a bit vector of available LLC slices and the CHAs that manage those slices. Reported-by: Kroening, Gary Tested-by: Kroening, Gary Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andy Shevchenko Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: abanman@hpe.com Cc: dimitri.sivanich@hpe.com Cc: hpa@zytor.com Cc: mike.travis@hpe.com Cc: russ.anderson@hpe.com Fixes: cd34cd97b7b4 ("perf/x86/intel/uncore: Add Skylake server uncore support") Link: http://lkml.kernel.org/r/1520967094-13219-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/uncore_snbep.c | 31 +++++++++++++++------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 9c245dbc7d65..6bc36944a8c1 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3522,24 +3522,27 @@ static struct intel_uncore_type *skx_msr_uncores[] = { NULL, }; +/* + * To determine the number of CHAs, it should read bits 27:0 in the CAPID6 + * register which located at Device 30, Function 3, Offset 0x9C. PCI ID 0x2083. + */ +#define SKX_CAPID6 0x9c +#define SKX_CHA_BIT_MASK GENMASK(27, 0) + static int skx_count_chabox(void) { - struct pci_dev *chabox_dev = NULL; - int bus, count = 0; + struct pci_dev *dev = NULL; + u32 val = 0; - while (1) { - chabox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x208d, chabox_dev); - if (!chabox_dev) - break; - if (count == 0) - bus = chabox_dev->bus->number; - if (bus != chabox_dev->bus->number) - break; - count++; - } + dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2083, dev); + if (!dev) + goto out; - pci_dev_put(chabox_dev); - return count; + pci_read_config_dword(dev, SKX_CAPID6, &val); + val &= SKX_CHA_BIT_MASK; +out: + pci_dev_put(dev); + return hweight32(val); } void skx_uncore_cpu_init(void) From 162daa27140a592aa6bd524f4553f9678e1efcf8 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 5 Dec 2017 11:57:27 +0100 Subject: [PATCH 58/68] iio: ABI: Fix name of timestamp sysfs file commit b9a3589332c2a25fb7edad25a26fcaada3209126 upstream. The name of the file is "current_timetamp_clock" not "timestamp_clock". Fixes: bc2b7dab629a ("iio:core: timestamping clock selection support") Cc: Gregor Boirie Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-bus-iio | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index fee35c00cc4e..0406076e4405 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -32,7 +32,7 @@ Description: Description of the physical chip / device for device X. Typically a part number. -What: /sys/bus/iio/devices/iio:deviceX/timestamp_clock +What: /sys/bus/iio/devices/iio:deviceX/current_timestamp_clock KernelVersion: 4.5 Contact: linux-iio@vger.kernel.org Description: From 1e0fc7dba23d8ecf8baee8ccf8440503f7013221 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 5 Sep 2017 20:25:25 +0000 Subject: [PATCH 59/68] staging: lustre: ptlrpc: kfree used instead of kvfree commit c3eec59659cf25916647d2178c541302bb4822ad upstream. rq_reqbuf is allocated using kvmalloc() but released in one occasion using kfree() instead of kvfree(). The issue was found using grep based on a similar bug. Fixes: d7e09d0397e8 ("add Lustre file system client support") Fixes: ee0ec1946ec2 ("lustre: ptlrpc: Replace uses of OBD_{ALLOC,FREE}_LARGE") Cc: Peng Tao Cc: Oleg Drokin Cc: James Simmons Signed-off-by: Nadav Amit Signed-off-by: Andreas Dilger Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lustre/ptlrpc/sec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c index a7416cd9ac71..7b0587d8b176 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c @@ -838,7 +838,7 @@ void sptlrpc_request_out_callback(struct ptlrpc_request *req) if (req->rq_pool || !req->rq_reqbuf) return; - kfree(req->rq_reqbuf); + kvfree(req->rq_reqbuf); req->rq_reqbuf = NULL; req->rq_reqbuf_len = 0; } From 26e9852f9d50945c7a3b245143eed8f7004cdaed Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 3 Feb 2017 10:51:35 -0800 Subject: [PATCH 60/68] selftests, x86, protection_keys: fix wrong offset in siginfo commit 2195bff041486eb7fcceaf058acaedcd057efbdc upstream. The siginfo contains a bunch of information about the fault. For protection keys, it tells us which protection key's permissions were violated. The wrong offset in here leads to reading garbage and thus failures in the tests. We should probably eventually move this over to using the kernel's headers defining the siginfo instead of a hard-coded offset. But, for now, just do the simplest fix. Signed-off-by: Dave Hansen Cc: Ingo Molnar Cc: Shuah Khan Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/protection_keys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index 2842a5fa22b3..17c7d5833ccf 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -192,7 +192,7 @@ void lots_o_noops_around_write(int *write_to_me) #define SYS_pkey_alloc 381 #define SYS_pkey_free 382 #define REG_IP_IDX REG_EIP -#define si_pkey_offset 0x18 +#define si_pkey_offset 0x14 #else #define SYS_mprotect_key 329 #define SYS_pkey_alloc 330 From 93b4839239e3c76a31fe1e56e19405674c9948e7 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 4 Nov 2017 04:19:48 -0700 Subject: [PATCH 61/68] selftests/x86/protection_keys: Fix syscall NR redefinition warnings commit 693cb5580fdb026922363aa103add64b3ecd572e upstream. On new enough glibc, the pkey syscalls numbers are available. Check first before defining them to avoid warnings like: protection_keys.c:198:0: warning: "SYS_pkey_alloc" redefined Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1fbef53a9e6befb7165ff855fc1a7d4788a191d6.1509794321.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/protection_keys.c | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index 17c7d5833ccf..44548de3e69b 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -188,17 +188,29 @@ void lots_o_noops_around_write(int *write_to_me) #define u64 uint64_t #ifdef __i386__ -#define SYS_mprotect_key 380 -#define SYS_pkey_alloc 381 -#define SYS_pkey_free 382 + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 380 +#endif +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 381 +# define SYS_pkey_free 382 +#endif #define REG_IP_IDX REG_EIP #define si_pkey_offset 0x14 + #else -#define SYS_mprotect_key 329 -#define SYS_pkey_alloc 330 -#define SYS_pkey_free 331 + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 329 +#endif +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 330 +# define SYS_pkey_free 331 +#endif #define REG_IP_IDX REG_RIP #define si_pkey_offset 0x20 + #endif void dump_mem(void *dumpme, int len_bytes) From f41f8156aee5b69ef45f3e54a27ea85033973199 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 26 Jun 2017 16:36:57 -0500 Subject: [PATCH 62/68] signal/testing: Don't look for __SI_FAULT in userspace commit d12fe87e62d773e81e0cb3a123c5a480a10d7d91 upstream. Fix the debug print statements in these tests where they reference si_codes and in particular __SI_FAULT. __SI_FAULT is a kernel internal value and should never be seen by userspace. While I am in there also fix si_code_str. si_codes are an enumeration there are not a bitmap so == and not & is the apropriate operation to test for an si_code. Cc: Dave Hansen Fixes: 5f23f6d082a9 ("x86/pkeys: Add self-tests") Fixes: e754aedc26ef ("x86/mpx, selftests: Add MPX self test") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/mpx-mini-test.c | 3 +-- tools/testing/selftests/x86/protection_keys.c | 13 ++++++------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index 79e1d13d1cda..58384189370c 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -419,8 +419,7 @@ void handler(int signum, siginfo_t *si, void *vucontext) br_count++; dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); -#define __SI_FAULT (3 << 16) -#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */ +#define SEGV_BNDERR 3 /* failed address bound checks */ dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", status, ip, br_reason); diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index 44548de3e69b..d58ff87b1bb5 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -224,19 +224,18 @@ void dump_mem(void *dumpme, int len_bytes) } } -#define __SI_FAULT (3 << 16) -#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */ -#define SEGV_PKUERR (__SI_FAULT|4) +#define SEGV_BNDERR 3 /* failed address bound checks */ +#define SEGV_PKUERR 4 static char *si_code_str(int si_code) { - if (si_code & SEGV_MAPERR) + if (si_code == SEGV_MAPERR) return "SEGV_MAPERR"; - if (si_code & SEGV_ACCERR) + if (si_code == SEGV_ACCERR) return "SEGV_ACCERR"; - if (si_code & SEGV_BNDERR) + if (si_code == SEGV_BNDERR) return "SEGV_BNDERR"; - if (si_code & SEGV_PKUERR) + if (si_code == SEGV_PKUERR) return "SEGV_PKUERR"; return "UNKNOWN"; } From 1443abc90332311982e0634b6ef762ede5f98e4a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 10 Nov 2017 16:12:31 -0800 Subject: [PATCH 63/68] x86/pkeys/selftests: Rename 'si_pkey' to 'siginfo_pkey' commit 91c49c2deb96ffc3c461eaae70219d89224076b7 upstream. 'si_pkey' is now #defined to be the name of the new siginfo field that protection keys uses. Rename it not to conflict. Signed-off-by: Dave Hansen Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171111001231.DFFC8285@viggo.jf.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/protection_keys.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index d58ff87b1bb5..85a78eba0a93 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -249,7 +249,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) unsigned long ip; char *fpregs; u32 *pkru_ptr; - u64 si_pkey; + u64 siginfo_pkey; u32 *si_pkey_ptr; int pkru_offset; fpregset_t fpregset; @@ -291,9 +291,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); dump_mem(si_pkey_ptr - 8, 24); - si_pkey = *si_pkey_ptr; - pkey_assert(si_pkey < NR_PKEYS); - last_si_pkey = si_pkey; + siginfo_pkey = *si_pkey_ptr; + pkey_assert(siginfo_pkey < NR_PKEYS); + last_si_pkey = siginfo_pkey; if ((si->si_code == SEGV_MAPERR) || (si->si_code == SEGV_ACCERR) || @@ -305,7 +305,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); /* need __rdpkru() version so we do not do shadow_pkru checking */ dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); - dprintf1("si_pkey from siginfo: %jx\n", si_pkey); + dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); *(u64 *)pkru_ptr = 0x00000000; dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); pkru_faults++; From 353f71fe3c07fabc3433d94c894c0acdeadb2a29 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 2 Oct 2017 16:16:13 -0600 Subject: [PATCH 64/68] selftests: x86: sysret_ss_attrs doesn't build on a PIE build commit 3346a6a4e5ba8c040360f753b26938cec31a4bdc upstream. sysret_ss_attrs fails to compile leading x86 test run to fail on systems configured to build using PIE by default. Add -no-pie fix it. Relocation might still fail if relocated above 4G. For now this change fixes the build and runs x86 tests. tools/testing/selftests/x86$ make gcc -m64 -o .../tools/testing/selftests/x86/single_step_syscall_64 -O2 -g -std=gnu99 -pthread -Wall single_step_syscall.c -lrt -ldl gcc -m64 -o .../tools/testing/selftests/x86/sysret_ss_attrs_64 -O2 -g -std=gnu99 -pthread -Wall sysret_ss_attrs.c thunks.S -lrt -ldl /usr/bin/ld: /tmp/ccS6pvIh.o: relocation R_X86_64_32S against `.text' can not be used when making a shared object; recompile with -fPIC /usr/bin/ld: final link failed: Nonrepresentable section on output collect2: error: ld returned 1 exit status Makefile:49: recipe for target '.../tools/testing/selftests/x86/sysret_ss_attrs_64' failed make: *** [.../tools/testing/selftests/x86/sysret_ss_attrs_64] Error 1 Suggested-by: Andy Lutomirski Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 6eb50152baf0..e5b459a09cff 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -17,7 +17,7 @@ TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY) BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64) -CFLAGS := -O2 -g -std=gnu99 -pthread -Wall +CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie UNAME_M := $(shell uname -m) CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) From 733a4e1af803b1094b26040fc50e5c821fa2f28f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 21 Mar 2018 01:18:24 +0100 Subject: [PATCH 65/68] kbuild: disable clang's default use of -fmerge-all-constants commit 87e0d4f0f37fb0c8c4aeeac46fff5e957738df79 upstream. Prasad reported that he has seen crashes in BPF subsystem with netd on Android with arm64 in the form of (note, the taint is unrelated): [ 4134.721483] Unable to handle kernel paging request at virtual address 800000001 [ 4134.820925] Mem abort info: [ 4134.901283] Exception class = DABT (current EL), IL = 32 bits [ 4135.016736] SET = 0, FnV = 0 [ 4135.119820] EA = 0, S1PTW = 0 [ 4135.201431] Data abort info: [ 4135.301388] ISV = 0, ISS = 0x00000021 [ 4135.359599] CM = 0, WnR = 0 [ 4135.470873] user pgtable: 4k pages, 39-bit VAs, pgd = ffffffe39b946000 [ 4135.499757] [0000000800000001] *pgd=0000000000000000, *pud=0000000000000000 [ 4135.660725] Internal error: Oops: 96000021 [#1] PREEMPT SMP [ 4135.674610] Modules linked in: [ 4135.682883] CPU: 5 PID: 1260 Comm: netd Tainted: G S W 4.14.19+ #1 [ 4135.716188] task: ffffffe39f4aa380 task.stack: ffffff801d4e0000 [ 4135.731599] PC is at bpf_prog_add+0x20/0x68 [ 4135.741746] LR is at bpf_prog_inc+0x20/0x2c [ 4135.751788] pc : [] lr : [] pstate: 60400145 [ 4135.769062] sp : ffffff801d4e3ce0 [...] [ 4136.258315] Process netd (pid: 1260, stack limit = 0xffffff801d4e0000) [ 4136.273746] Call trace: [...] [ 4136.442494] 3ca0: ffffff94ab7ad584 0000000060400145 ffffffe3a01bf8f8 0000000000000006 [ 4136.460936] 3cc0: 0000008000000000 ffffff94ab844204 ffffff801d4e3cf0 ffffff94ab7ad584 [ 4136.479241] [] bpf_prog_add+0x20/0x68 [ 4136.491767] [] bpf_prog_inc+0x20/0x2c [ 4136.504536] [] bpf_obj_get_user+0x204/0x22c [ 4136.518746] [] SyS_bpf+0x5a8/0x1a88 Android's netd was basically pinning the uid cookie BPF map in BPF fs (/sys/fs/bpf/traffic_cookie_uid_map) and later on retrieving it again resulting in above panic. Issue is that the map was wrongly identified as a prog! Above kernel was compiled with clang 4.0, and it turns out that clang decided to merge the bpf_prog_iops and bpf_map_iops into a single memory location, such that the two i_ops could then not be distinguished anymore. Reason for this miscompilation is that clang has the more aggressive -fmerge-all-constants enabled by default. In fact, clang source code has a comment about it in lib/AST/ExprConstant.cpp on why it is okay to do so: Pointers with different bases cannot represent the same object. (Note that clang defaults to -fmerge-all-constants, which can lead to inconsistent results for comparisons involving the address of a constant; this generally doesn't matter in practice.) The issue never appeared with gcc however, since gcc does not enable -fmerge-all-constants by default and even *explicitly* states in it's option description that using this flag results in non-conforming behavior, quote from man gcc: Languages like C or C++ require each variable, including multiple instances of the same variable in recursive calls, to have distinct locations, so using this option results in non-conforming behavior. There are also various clang bug reports open on that matter [1], where clang developers acknowledge the non-conforming behavior, and refer to disabling it with -fno-merge-all-constants. But even if this gets fixed in clang today, there are already users out there that triggered this. Thus, fix this issue by explicitly adding -fno-merge-all-constants to the kernel's Makefile to generically disable this optimization, since potentially other places in the kernel could subtly break as well. Note, there is also a flag called -fmerge-constants (not supported by clang), which is more conservative and only applies to strings and it's enabled in gcc's -O/-O2/-O3/-Os optimization levels. In gcc's code, the two flags -fmerge-{all-,}constants share the same variable internally, so when disabling it via -fno-merge-all-constants, then we really don't merge any const data (e.g. strings), and text size increases with gcc (14,927,214 -> 14,942,646 for vmlinux.o). $ gcc -fverbose-asm -O2 foo.c -S -o foo.S -> foo.S lists -fmerge-constants under options enabled $ gcc -fverbose-asm -O2 -fno-merge-all-constants foo.c -S -o foo.S -> foo.S doesn't list -fmerge-constants under options enabled $ gcc -fverbose-asm -O2 -fno-merge-all-constants -fmerge-constants foo.c -S -o foo.S -> foo.S lists -fmerge-constants under options enabled Thus, as a workaround we need to set both -fno-merge-all-constants *and* -fmerge-constants in the Makefile in order for text size to stay as is. [1] https://bugs.llvm.org/show_bug.cgi?id=18538 Reported-by: Prasad Sodagudi Signed-off-by: Daniel Borkmann Cc: Linus Torvalds Cc: Chenbo Feng Cc: Richard Smith Cc: Chandler Carruth Cc: linux-kernel@vger.kernel.org Tested-by: Prasad Sodagudi Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- Makefile | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Makefile b/Makefile index df3b20af0fdb..db0de0d2e6c2 100644 --- a/Makefile +++ b/Makefile @@ -790,6 +790,15 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign) # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) +# clang sets -fmerge-all-constants by default as optimization, but this +# is non-conforming behavior for C and in fact breaks the kernel, so we +# need to disable it here generally. +KBUILD_CFLAGS += $(call cc-option,-fno-merge-all-constants) + +# for gcc -fno-merge-all-constants disables everything, but it is fine +# to have actual conforming behavior enabled. +KBUILD_CFLAGS += $(call cc-option,-fmerge-constants) + # Make sure -fstack-check isn't enabled (like gentoo apparently did) KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,) From 3eb88807b26daef1342d733d75956ad50d72b9d6 Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Mon, 19 Mar 2018 17:57:27 -0700 Subject: [PATCH 66/68] bpf: skip unnecessary capability check commit 0fa4fe85f4724fff89b09741c437cbee9cf8b008 upstream. The current check statement in BPF syscall will do a capability check for CAP_SYS_ADMIN before checking sysctl_unprivileged_bpf_disabled. This code path will trigger unnecessary security hooks on capability checking and cause false alarms on unprivileged process trying to get CAP_SYS_ADMIN access. This can be resolved by simply switch the order of the statement and CAP_SYS_ADMIN is not required anyway if unprivileged bpf syscall is allowed. Signed-off-by: Chenbo Feng Acked-by: Lorenzo Colitti Signed-off-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 91a2d3752007..f8b4e3e16cef 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -801,7 +801,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz union bpf_attr attr = {}; int err; - if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) + if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN)) return -EPERM; if (!access_ok(VERIFY_READ, uattr, 1)) From c9e307194fcdcb750e88a0014f9222d5a37b8bf5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 7 Mar 2018 22:10:01 +0100 Subject: [PATCH 67/68] bpf, x64: increase number of passes commit 6007b080d2e2adb7af22bf29165f0594ea12b34c upstream. In Cilium some of the main programs we run today are hitting 9 passes on x64's JIT compiler, and we've had cases already where we surpassed the limit where the JIT then punts the program to the interpreter instead, leading to insertion failures due to CONFIG_BPF_JIT_ALWAYS_ON or insertion failures due to the prog array owner being JITed but the program to insert not (both must have the same JITed/non-JITed property). One concrete case the program image shrunk from 12,767 bytes down to 10,288 bytes where the image converged after 16 steps. I've measured that this took 340us in the JIT until it converges on my i7-6600U. Thus, increase the original limit we had from day one where the JIT covered cBPF only back then before we run into the case (as similar with the complexity limit) where we trip over this and hit program rejections. Also add a cond_resched() into the compilation loop, the JIT process runs without any locks and may sleep anyway. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- arch/x86/net/bpf_jit_comp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 1f7ed2ed6ff7..cd9764520851 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1135,7 +1135,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) * may converge on the last pass. In such case do one more * pass to emit the final image */ - for (pass = 0; pass < 10 || image; pass++) { + for (pass = 0; pass < 20 || image; pass++) { proglen = do_jit(prog, addrs, image, oldproglen, &ctx); if (proglen <= 0) { image = NULL; @@ -1162,6 +1162,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } } oldproglen = proglen; + cond_resched(); } if (bpf_jit_enable > 1) From c44cfe06dfe2a5f54527e87a48c92a6595d070cc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 28 Mar 2018 18:39:26 +0200 Subject: [PATCH 68/68] Linux 4.9.91 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index db0de0d2e6c2..db3d37e18723 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 90 +SUBLEVEL = 91 EXTRAVERSION = NAME = Roaring Lionus