From 6b2aaf302ce8baf8e5a41777e97a8d1ac7b9b8f5 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 6 Feb 2023 12:11:57 -0800 Subject: [PATCH 001/118] hv_netvsc: Allocate memory in netvsc_dma_map() with GFP_ATOMIC commit c6aa9d3b43cd11ac13a8220368a3b0483c6751d4 upstream. Memory allocations in the network transmit path must use GFP_ATOMIC so they won't sleep. Reported-by: Paolo Abeni Link: https://lore.kernel.org/lkml/8a4d08f94d3e6fe8b6da68440eaa89a088ad84f9.camel@redhat.com/ Fixes: 846da38de0e8 ("net: netvsc: Add Isolation VM support for netvsc driver") Cc: stable@vger.kernel.org Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1675714317-48577-1-git-send-email-mikelley@microsoft.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index e02d1e3ef672..79f4e13620a4 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1034,7 +1034,7 @@ static int netvsc_dma_map(struct hv_device *hv_dev, packet->dma_range = kcalloc(page_count, sizeof(*packet->dma_range), - GFP_KERNEL); + GFP_ATOMIC); if (!packet->dma_range) return -ENOMEM; From 66cf3a8273abcac9c52b088f87f926e1a699d959 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 18 Jan 2023 16:35:13 -0500 Subject: [PATCH 002/118] btrfs: limit device extents to the device size commit 3c538de0f2a74d50aff7278c092f88ae59cee688 upstream. There was a recent regression in btrfs/177 that started happening with the size class patches ("btrfs: introduce size class to block group allocator"). This however isn't a regression introduced by those patches, but rather the bug was uncovered by a change in behavior in these patches. The patches triggered more chunk allocations in the ^free-space-tree case, which uncovered a race with device shrink. The problem is we will set the device total size to the new size, and use this to find a hole for a device extent. However during shrink we may have device extents allocated past this range, so we could potentially find a hole in a range past our new shrink size. We don't actually limit our found extent to the device size anywhere, we assume that we will not find a hole past our device size. This isn't true with shrink as we're relocating block groups and thus creating holes past the device size. Fix this by making sure we do not search past the new device size, and if we wander into any device extents that start after our device size simply break from the loop and use whatever hole we've already found. CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 65e4e887605f..f66bf1a88edf 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1612,7 +1612,7 @@ again: if (ret < 0) goto out; - while (1) { + while (search_start < search_end) { l = path->nodes[0]; slot = path->slots[0]; if (slot >= btrfs_header_nritems(l)) { @@ -1635,6 +1635,9 @@ again: if (key.type != BTRFS_DEV_EXTENT_KEY) goto next; + if (key.offset > search_end) + break; + if (key.offset > search_start) { hole_size = key.offset - search_start; dev_extent_hole_check(device, &search_start, &hole_size, @@ -1695,6 +1698,7 @@ next: else ret = 0; + ASSERT(max_hole_start + max_hole_size <= search_end); out: btrfs_free_path(path); *start = max_hole_start; From 8ab575add354e48126591606262446fa1ee7ebd4 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 24 Jan 2023 12:32:34 +0100 Subject: [PATCH 003/118] btrfs: zlib: zero-initialize zlib workspace commit eadd7deca0ad8a83edb2b894d8326c78e78635d6 upstream. KMSAN reports uses of uninitialized memory in zlib's longest_match() called on memory originating from zlib_alloc_workspace(). This issue is known by zlib maintainers and is claimed to be harmless, but to be on the safe side we'd better initialize the memory. Link: https://zlib.net/zlib_faq.html#faq36 Reported-by: syzbot+14d9e7602ebdf7ec0a60@syzkaller.appspotmail.com CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Alexander Potapenko Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/zlib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index b4f44662cda7..94d06a76edb1 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -63,7 +63,7 @@ struct list_head *zlib_alloc_workspace(unsigned int level) workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), zlib_inflate_workspacesize()); - workspace->strm.workspace = kvmalloc(workspacesize, GFP_KERNEL); + workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL); workspace->level = level; workspace->buf = NULL; /* From ab9b01a7acd56a21a64cd881e61c7a5369bd4602 Mon Sep 17 00:00:00 2001 From: Edson Juliano Drosdeck Date: Tue, 7 Feb 2023 15:37:20 -0300 Subject: [PATCH 004/118] ALSA: hda/realtek: Add Positivo N14KP6-TG commit 88d18b8896bd98e636b632f805b7e84e61458255 upstream. Positivo N14KP6-TG (1c6c:1251) require quirk for enabling headset-mic Signed-off-by: Edson Juliano Drosdeck Cc: Link: https://lore.kernel.org/r/20230207183720.2519-1-edson.drosdeck@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index db9518de9343..ee5d3347354c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9701,6 +9701,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */ SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802), SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X), + SND_PCI_QUIRK(0x1c6c, 0x1251, "Positivo N14KP6-TG", ALC288_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_SET_COEF_DEFAULTS), SND_PCI_QUIRK(0x1d05, 0x1096, "TongFang GMxMRxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x1100, "TongFang GKxNRxx", ALC269_FIXUP_NO_SHUTUP), From 9d26f571df4de4b8991aeb30c4ccc7b0a30f0ec7 Mon Sep 17 00:00:00 2001 From: Artemii Karasev Date: Tue, 7 Feb 2023 18:20:26 +0500 Subject: [PATCH 005/118] ALSA: emux: Avoid potential array out-of-bound in snd_emux_xg_control() commit 6a32425f953b955b4ff82f339d01df0b713caa5d upstream. snd_emux_xg_control() can be called with an argument 'param' greater than size of 'control' array. It may lead to accessing 'control' array at a wrong index. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Artemii Karasev Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: Link: https://lore.kernel.org/r/20230207132026.2870-1-karasev@ispras.ru Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/synth/emux/emux_nrpn.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/synth/emux/emux_nrpn.c b/sound/synth/emux/emux_nrpn.c index 8056422ed7c5..0d6b82ae2955 100644 --- a/sound/synth/emux/emux_nrpn.c +++ b/sound/synth/emux/emux_nrpn.c @@ -349,6 +349,9 @@ int snd_emux_xg_control(struct snd_emux_port *port, struct snd_midi_channel *chan, int param) { + if (param >= ARRAY_SIZE(chan->control)) + return -EINVAL; + return send_converted_effect(xg_effects, ARRAY_SIZE(xg_effects), port, chan, param, chan->control[param], From 49f4284ab3b52fd320b8ef1186b2287f791a71e3 Mon Sep 17 00:00:00 2001 From: Guillaume Pinot Date: Sun, 29 Jan 2023 18:13:38 +0100 Subject: [PATCH 006/118] ALSA: hda/realtek: Fix the speaker output on Samsung Galaxy Book2 Pro 360 commit bd401fd730cbcb0717bbc5438f15084db10f9259 upstream. Samsung Galaxy Book2 Pro 360 (13" 2022 NP930QED-KA1FR) with codec SSID 144d:ca03 requires the same workaround for enabling the speaker amp like other Samsung models with ALC298 codec. Cc: Signed-off-by: Guillaume Pinot Link: https://lore.kernel.org/r/20230129171338.17249-1-texitoi@texitoi.eu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ee5d3347354c..a3a4e9e895cc 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9523,6 +9523,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), + SND_PCI_QUIRK(0x144d, 0xca03, "Samsung Galaxy Book2 Pro 360 (NP930QED)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), From 566cad13beb62c9a2e9937becc9a4f4d5c12d257 Mon Sep 17 00:00:00 2001 From: Elvis Angelaccio Date: Sun, 5 Feb 2023 19:56:18 +0100 Subject: [PATCH 007/118] ALSA: hda/realtek: Enable mute/micmute LEDs on HP Elitebook, 645 G9 commit 9a6804aa1c92cd28e89e746ace44d5ba101db76c upstream. The HP Elitebook 645 G9 laptop (with motherboard model 89D2) uses the ALC236 codec and requires the alc236_fixup_hp_mute_led_micmute_vref fixup in order to enable mute/micmute LEDs. Note: the alc236_fixup_hp_gpio_led fixup, which is used by the Elitebook 640 G9, does not work with the 645 G9. [ rearranged the entry in SSID order -- tiwai ] Signed-off-by: Elvis Angelaccio Cc: Link: https://lore.kernel.org/r/4055cb48-e228-8a13-524d-afbb7aaafebe@kde.org Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a3a4e9e895cc..b21afed2bba2 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9423,6 +9423,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x89c3, "Zbook Studio G9", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x89c6, "Zbook Fury 17 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x89d3, "HP EliteBook 645 G9 (MB 89D2)", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8a78, "HP Dev One", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x103c, 0x8aa0, "HP ProBook 440 G9 (MB 8A9E)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED), From fdc304eef6e84153ca9a1e842c0c535085868fe0 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Mon, 6 Feb 2023 15:00:19 +0000 Subject: [PATCH 008/118] ALSA: hda/realtek: Add quirk for ASUS UM3402 using CS35L41 commit 7a17e8423a133a6ac238462126d7f88faaccc681 upstream. This Asus Zenbook laptop use Realtek HDA codec combined with 2xCS35L41 Amplifiers using I2C with External Boost. Signed-off-by: Stefan Binding Cc: Link: https://lore.kernel.org/r/20230206150019.3825120-1-sbinding@opensource.cirrus.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b21afed2bba2..d89fb50c227d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9481,6 +9481,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), + SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), From 63380b631f3dc04cf3139265732d860d870019f7 Mon Sep 17 00:00:00 2001 From: Andy Chi Date: Tue, 7 Feb 2023 16:30:09 +0800 Subject: [PATCH 009/118] ALSA: hda/realtek: fix mute/micmute LEDs don't work for a HP platform. commit 6c4715aa5b0ab1c0d35780b7c552e952dbb5515d upstream. There is a HP platform needs ALC236_FIXUP_HP_GPIO_LED quirk to make mic-mute/audio-mute working. Signed-off-by: Andy Chi Cc: Link: https://lore.kernel.org/r/20230207083011.100189-1-andy.chi@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d89fb50c227d..1134a493d225 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9434,6 +9434,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8b7a, "HP", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b7d, "HP", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b8a, "HP", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b8b, "HP", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b8d, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b92, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), From 3361eb2256f9e427c03a32f887519f06d192289a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 3 Feb 2023 13:57:29 -0600 Subject: [PATCH 010/118] Revert "PCI/ASPM: Save L1 PM Substates Capability for suspend/resume" commit a7152be79b627428c628da2a887ca4b2512a78fd upstream. This reverts commit 4ff116d0d5fd8a025604b0802d93a2d5f4e465d1. Tasev Nikola and Mark Enriquez reported that resume from suspend was broken in v6.1-rc1. Tasev bisected to a47126ec29f5 ("PCI/PTM: Cache PTM Capability offset"), but we can't figure out how that could be related. Mark saw the same symptoms and bisected to 4ff116d0d5fd ("PCI/ASPM: Save L1 PM Substates Capability for suspend/resume"), which does have a connection: it restores L1 Substates configuration while ASPM L1 may be enabled: pci_restore_state pci_restore_aspm_l1ss_state aspm_program_l1ss pci_write_config_dword(PCI_L1SS_CTL1, ctl1) # L1SS restore pci_restore_pcie_state pcie_capability_write_word(PCI_EXP_LNKCTL, cap[i++]) # L1 restore which is a problem because PCIe r6.0, sec 5.5.4, requires that: If setting either or both of the enable bits for ASPM L1 PM Substates, both ports must be configured as described in this section while ASPM L1 is disabled. Separately, Thomas Witt reported that 5e85eba6f50d ("PCI/ASPM: Refactor L1 PM Substates Control Register programming") broke suspend/resume, and it depends on 4ff116d0d5fd. Revert 4ff116d0d5fd ("PCI/ASPM: Save L1 PM Substates Capability for suspend/resume") to fix the resume issue and enable revert of 5e85eba6f50d to fix the issue Thomas reported. Note that reverting 4ff116d0d5fd means L1 Substates config may be lost on suspend/resume. As far as we know the system will use more power but will still *work* correctly. Fixes: 4ff116d0d5fd ("PCI/ASPM: Save L1 PM Substates Capability for suspend/resume") Link: https://bugzilla.kernel.org/show_bug.cgi?id=216782 Link: https://bugzilla.kernel.org/show_bug.cgi?id=216877 Reported-by: Tasev Nikola Reported-by: Mark Enriquez Reported-by: Thomas Witt Tested-by: Mark Enriquez Tested-by: Thomas Witt Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v6.1+ Cc: Vidya Sagar Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 7 ------- drivers/pci/pci.h | 4 ---- drivers/pci/pcie/aspm.c | 37 ------------------------------------- 3 files changed, 48 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index ab615ab4e440..6d81df459b2f 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1665,7 +1665,6 @@ int pci_save_state(struct pci_dev *dev) return i; pci_save_ltr_state(dev); - pci_save_aspm_l1ss_state(dev); pci_save_dpc_state(dev); pci_save_aer_state(dev); pci_save_ptm_state(dev); @@ -1772,7 +1771,6 @@ void pci_restore_state(struct pci_dev *dev) * LTR itself (in the PCIe capability). */ pci_restore_ltr_state(dev); - pci_restore_aspm_l1ss_state(dev); pci_restore_pcie_state(dev); pci_restore_pasid_state(dev); @@ -3465,11 +3463,6 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) if (error) pci_err(dev, "unable to allocate suspend buffer for LTR\n"); - error = pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_L1SS, - 2 * sizeof(u32)); - if (error) - pci_err(dev, "unable to allocate suspend buffer for ASPM-L1SS\n"); - pci_allocate_vc_save_buffers(dev); } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index b1ebb7ab8805..ce169b12a8f6 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -565,14 +565,10 @@ bool pcie_wait_for_link(struct pci_dev *pdev, bool active); void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev); void pcie_aspm_powersave_config_link(struct pci_dev *pdev); -void pci_save_aspm_l1ss_state(struct pci_dev *dev); -void pci_restore_aspm_l1ss_state(struct pci_dev *dev); #else static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { } -static inline void pci_save_aspm_l1ss_state(struct pci_dev *dev) { } -static inline void pci_restore_aspm_l1ss_state(struct pci_dev *dev) { } #endif #ifdef CONFIG_PCIE_ECRC diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 53a1fa306e1e..915cbd939dd9 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -757,43 +757,6 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) PCI_L1SS_CTL1_L1SS_MASK, val); } -void pci_save_aspm_l1ss_state(struct pci_dev *dev) -{ - struct pci_cap_saved_state *save_state; - u16 l1ss = dev->l1ss; - u32 *cap; - - if (!l1ss) - return; - - save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_L1SS); - if (!save_state) - return; - - cap = (u32 *)&save_state->cap.data[0]; - pci_read_config_dword(dev, l1ss + PCI_L1SS_CTL2, cap++); - pci_read_config_dword(dev, l1ss + PCI_L1SS_CTL1, cap++); -} - -void pci_restore_aspm_l1ss_state(struct pci_dev *dev) -{ - struct pci_cap_saved_state *save_state; - u32 *cap, ctl1, ctl2; - u16 l1ss = dev->l1ss; - - if (!l1ss) - return; - - save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_L1SS); - if (!save_state) - return; - - cap = (u32 *)&save_state->cap.data[0]; - ctl2 = *cap++; - ctl1 = *cap; - aspm_program_l1ss(dev, ctl1, ctl2); -} - static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val) { pcie_capability_clear_and_set_word(pdev, PCI_EXP_LNKCTL, From 5c4bd3f2f6ebb1e41e2df71d3f0a3450aa0bf102 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 3 Feb 2023 13:57:39 -0600 Subject: [PATCH 011/118] Revert "PCI/ASPM: Refactor L1 PM Substates Control Register programming" commit ff209ecc376a2ea8dd106a1f594427a5d94b7dd3 upstream. This reverts commit 5e85eba6f50dc288c22083a7e213152bcc4b8208. Thomas Witt reported that 5e85eba6f50d ("PCI/ASPM: Refactor L1 PM Substates Control Register programming") broke suspend/resume on a Tuxedo Infinitybook S 14 v5, which seems to use a Clevo L140CU Mainboard. The main symptom is: iwlwifi 0000:02:00.0: Unable to change power state from D3hot to D0, device inaccessible nvme 0000:03:00.0: Unable to change power state from D3hot to D0, device inaccessible and the machine is only partially usable after resume. It can't run dmesg and can't do a clean reboot. This happens on every suspend/resume cycle. Revert 5e85eba6f50d until we can figure out the root cause. Fixes: 5e85eba6f50d ("PCI/ASPM: Refactor L1 PM Substates Control Register programming") Link: https://bugzilla.kernel.org/show_bug.cgi?id=216877 Reported-by: Thomas Witt Tested-by: Thomas Witt Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v6.1+ Cc: Vidya Sagar Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pcie/aspm.c | 72 +++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 915cbd939dd9..4b4184563a92 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -470,31 +470,6 @@ static void pci_clear_and_set_dword(struct pci_dev *pdev, int pos, pci_write_config_dword(pdev, pos, val); } -static void aspm_program_l1ss(struct pci_dev *dev, u32 ctl1, u32 ctl2) -{ - u16 l1ss = dev->l1ss; - u32 l1_2_enable; - - /* - * Per PCIe r6.0, sec 5.5.4, T_POWER_ON in PCI_L1SS_CTL2 must be - * programmed prior to setting the L1.2 enable bits in PCI_L1SS_CTL1. - */ - pci_write_config_dword(dev, l1ss + PCI_L1SS_CTL2, ctl2); - - /* - * In addition, Common_Mode_Restore_Time and LTR_L1.2_THRESHOLD in - * PCI_L1SS_CTL1 must be programmed *before* setting the L1.2 - * enable bits, even though they're all in PCI_L1SS_CTL1. - */ - l1_2_enable = ctl1 & PCI_L1SS_CTL1_L1_2_MASK; - ctl1 &= ~PCI_L1SS_CTL1_L1_2_MASK; - - pci_write_config_dword(dev, l1ss + PCI_L1SS_CTL1, ctl1); - if (l1_2_enable) - pci_write_config_dword(dev, l1ss + PCI_L1SS_CTL1, - ctl1 | l1_2_enable); -} - /* Calculate L1.2 PM substate timing parameters */ static void aspm_calc_l1ss_info(struct pcie_link_state *link, u32 parent_l1ss_cap, u32 child_l1ss_cap) @@ -504,6 +479,7 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link, u32 t_common_mode, t_power_on, l1_2_threshold, scale, value; u32 ctl1 = 0, ctl2 = 0; u32 pctl1, pctl2, cctl1, cctl2; + u32 pl1_2_enables, cl1_2_enables; if (!(link->aspm_support & ASPM_STATE_L1_2_MASK)) return; @@ -552,21 +528,39 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link, ctl2 == pctl2 && ctl2 == cctl2) return; - pctl1 &= ~(PCI_L1SS_CTL1_CM_RESTORE_TIME | - PCI_L1SS_CTL1_LTR_L12_TH_VALUE | - PCI_L1SS_CTL1_LTR_L12_TH_SCALE); - pctl1 |= (ctl1 & (PCI_L1SS_CTL1_CM_RESTORE_TIME | - PCI_L1SS_CTL1_LTR_L12_TH_VALUE | - PCI_L1SS_CTL1_LTR_L12_TH_SCALE)); - aspm_program_l1ss(parent, pctl1, ctl2); + /* Disable L1.2 while updating. See PCIe r5.0, sec 5.5.4, 7.8.3.3 */ + pl1_2_enables = pctl1 & PCI_L1SS_CTL1_L1_2_MASK; + cl1_2_enables = cctl1 & PCI_L1SS_CTL1_L1_2_MASK; - cctl1 &= ~(PCI_L1SS_CTL1_CM_RESTORE_TIME | - PCI_L1SS_CTL1_LTR_L12_TH_VALUE | - PCI_L1SS_CTL1_LTR_L12_TH_SCALE); - cctl1 |= (ctl1 & (PCI_L1SS_CTL1_CM_RESTORE_TIME | - PCI_L1SS_CTL1_LTR_L12_TH_VALUE | - PCI_L1SS_CTL1_LTR_L12_TH_SCALE)); - aspm_program_l1ss(child, cctl1, ctl2); + if (pl1_2_enables || cl1_2_enables) { + pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1_2_MASK, 0); + pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1_2_MASK, 0); + } + + /* Program T_POWER_ON times in both ports */ + pci_write_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, ctl2); + pci_write_config_dword(child, child->l1ss + PCI_L1SS_CTL2, ctl2); + + /* Program Common_Mode_Restore_Time in upstream device */ + pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_CM_RESTORE_TIME, ctl1); + + /* Program LTR_L1.2_THRESHOLD time in both ports */ + pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_LTR_L12_TH_VALUE | + PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1); + pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_LTR_L12_TH_VALUE | + PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1); + + if (pl1_2_enables || cl1_2_enables) { + pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, 0, + pl1_2_enables); + pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, 0, + cl1_2_enables); + } } static void aspm_l1ss_init(struct pcie_link_state *link) From 0a3e60b3fe492c2420b46629539542e6ccea1bcb Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Thu, 2 Feb 2023 18:23:09 +0000 Subject: [PATCH 012/118] tracing: Fix poll() and select() do not work on per_cpu trace_pipe and trace_pipe_raw commit 3e46d910d8acf94e5360126593b68bf4fee4c4a1 upstream. poll() and select() on per_cpu trace_pipe and trace_pipe_raw do not work since kernel 6.1-rc6. This issue is seen after the commit 42fb0a1e84ff525ebe560e2baf9451ab69127e2b ("tracing/ring-buffer: Have polling block on watermark"). This issue is firstly detected and reported, when testing the CXL error events in the rasdaemon and also erified using the test application for poll() and select(). This issue occurs for the per_cpu case, when calling the ring_buffer_poll_wait(), in kernel/trace/ring_buffer.c, with the buffer_percent > 0 and then wait until the percentage of pages are available. The default value set for the buffer_percent is 50 in the kernel/trace/trace.c. As a fix, allow userspace application could set buffer_percent as 0 through the buffer_percent_fops, so that the task will wake up as soon as data is added to any of the specific cpu buffer. Link: https://lore.kernel.org/linux-trace-kernel/20230202182309.742-2-shiju.jose@huawei.com Cc: Cc: Cc: Cc: stable@vger.kernel.org Fixes: 42fb0a1e84ff5 ("tracing/ring-buffer: Have polling block on watermark") Signed-off-by: Shiju Jose Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 546e84ae9993..a387bdc6af01 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9144,9 +9144,6 @@ buffer_percent_write(struct file *filp, const char __user *ubuf, if (val > 100) return -EINVAL; - if (!val) - val = 1; - tr->buffer_percent = val; (*ppos)++; From ad7e46427f9299a8e5976b124bf1d6cdd988c66c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 28 Jan 2023 17:47:50 +0000 Subject: [PATCH 013/118] of/address: Return an error when no valid dma-ranges are found commit f6933c01e42d2fc83b9133ed755609e4aac6eadd upstream. Commit 7a8b64d17e35 ("of/address: use range parser for of_dma_get_range") converted the parsing of dma-range properties to use code shared with the PCI range parser. The intent was to introduce no functional changes however in the case where we fail to translate the first resource instead of returning -EINVAL the new code we return 0. Restore the previous behaviour by returning an error if we find no valid ranges, the original code only handled the first range but subsequently support for parsing all supplied ranges was added. This avoids confusing code using the parsed ranges which doesn't expect to successfully parse ranges but have only a list terminator returned, this fixes breakage with so far as I can tell all DMA for on SoC devices on the Socionext Synquacer platform which has a firmware supplied DT. A bisect identified the original conversion as triggering the issues there. Fixes: 7a8b64d17e35 ("of/address: use range parser for of_dma_get_range") Signed-off-by: Mark Brown Cc: Luca Di Stefano Cc: 993612@bugs.debian.org Cc: stable@kernel.org Link: https://lore.kernel.org/r/20230126-synquacer-boot-v2-1-cb80fd23c4e2@kernel.org Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/address.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index c34ac33b7338..67763e5b8c0e 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -965,8 +965,19 @@ int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map) } of_dma_range_parser_init(&parser, node); - for_each_of_range(&parser, &range) + for_each_of_range(&parser, &range) { + if (range.cpu_addr == OF_BAD_ADDR) { + pr_err("translation of DMA address(%llx) to CPU address failed node(%pOF)\n", + range.bus_addr, node); + continue; + } num_ranges++; + } + + if (!num_ranges) { + ret = -EINVAL; + goto out; + } r = kcalloc(num_ranges + 1, sizeof(*r), GFP_KERNEL); if (!r) { @@ -975,18 +986,16 @@ int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map) } /* - * Record all info in the generic DMA ranges array for struct device. + * Record all info in the generic DMA ranges array for struct device, + * returning an error if we don't find any parsable ranges. */ *map = r; of_dma_range_parser_init(&parser, node); for_each_of_range(&parser, &range) { pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n", range.bus_addr, range.cpu_addr, range.size); - if (range.cpu_addr == OF_BAD_ADDR) { - pr_err("translation of DMA address(%llx) to CPU address failed node(%pOF)\n", - range.bus_addr, node); + if (range.cpu_addr == OF_BAD_ADDR) continue; - } r->cpu_start = range.cpu_addr; r->dma_start = range.bus_addr; r->size = range.size; From c86e6d30c52227e6a02ff88d116b8dd9da172879 Mon Sep 17 00:00:00 2001 From: Devid Antonio Filoni Date: Fri, 25 Nov 2022 18:04:18 +0100 Subject: [PATCH 014/118] can: j1939: do not wait 250 ms if the same addr was already claimed commit 4ae5e1e97c44f4654516c1d41591a462ed62fa7b upstream. The ISO 11783-5 standard, in "4.5.2 - Address claim requirements", states: d) No CF shall begin, or resume, transmission on the network until 250 ms after it has successfully claimed an address except when responding to a request for address-claimed. But "Figure 6" and "Figure 7" in "4.5.4.2 - Address-claim prioritization" show that the CF begins the transmission after 250 ms from the first AC (address-claimed) message even if it sends another AC message during that time window to resolve the address contention with another CF. As stated in "4.4.2.3 - Address-claimed message": In order to successfully claim an address, the CF sending an address claimed message shall not receive a contending claim from another CF for at least 250 ms. As stated in "4.4.3.2 - NAME management (NM) message": 1) A commanding CF can d) request that a CF with a specified NAME transmit the address- claimed message with its current NAME. 2) A target CF shall d) send an address-claimed message in response to a request for a matching NAME Taking the above arguments into account, the 250 ms wait is requested only during network initialization. Do not restart the timer on AC message if both the NAME and the address match and so if the address has already been claimed (timer has expired) or the AC message has been sent to resolve the contention with another CF (timer is still running). Signed-off-by: Devid Antonio Filoni Acked-by: Oleksij Rempel Link: https://lore.kernel.org/all/20221125170418.34575-1-devid.filoni@egluetechnologies.com Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/j1939/address-claim.c | 40 +++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/net/can/j1939/address-claim.c b/net/can/j1939/address-claim.c index f33c47327927..ca4ad6cdd5cb 100644 --- a/net/can/j1939/address-claim.c +++ b/net/can/j1939/address-claim.c @@ -165,6 +165,46 @@ static void j1939_ac_process(struct j1939_priv *priv, struct sk_buff *skb) * leaving this function. */ ecu = j1939_ecu_get_by_name_locked(priv, name); + + if (ecu && ecu->addr == skcb->addr.sa) { + /* The ISO 11783-5 standard, in "4.5.2 - Address claim + * requirements", states: + * d) No CF shall begin, or resume, transmission on the + * network until 250 ms after it has successfully claimed + * an address except when responding to a request for + * address-claimed. + * + * But "Figure 6" and "Figure 7" in "4.5.4.2 - Address-claim + * prioritization" show that the CF begins the transmission + * after 250 ms from the first AC (address-claimed) message + * even if it sends another AC message during that time window + * to resolve the address contention with another CF. + * + * As stated in "4.4.2.3 - Address-claimed message": + * In order to successfully claim an address, the CF sending + * an address claimed message shall not receive a contending + * claim from another CF for at least 250 ms. + * + * As stated in "4.4.3.2 - NAME management (NM) message": + * 1) A commanding CF can + * d) request that a CF with a specified NAME transmit + * the address-claimed message with its current NAME. + * 2) A target CF shall + * d) send an address-claimed message in response to a + * request for a matching NAME + * + * Taking the above arguments into account, the 250 ms wait is + * requested only during network initialization. + * + * Do not restart the timer on AC message if both the NAME and + * the address match and so if the address has already been + * claimed (timer has expired) or the AC message has been sent + * to resolve the contention with another CF (timer is still + * running). + */ + goto out_ecu_put; + } + if (!ecu && j1939_address_is_unicast(skcb->addr.sa)) ecu = j1939_ecu_create_locked(priv, name); From 3fc0b7b7e9a571426613ea678b5a8c5efe8c8f52 Mon Sep 17 00:00:00 2001 From: Bastien Nocera Date: Fri, 3 Feb 2023 11:18:00 +0100 Subject: [PATCH 015/118] HID: logitech: Disable hi-res scrolling on USB commit 690eb7dec72ae52d1d710d14a451844b4d0f4f19 upstream. On some Logitech mice, such as the G903, and possibly the G403, the HID events are generated on a different interface to the HID++ one. If we enable hi-res through the HID++ interface, the HID interface wouldn't know anything about it, and handle the events as if they were regular scroll events, making the mouse unusable. Disable hi-res scrolling on those devices until we implement scroll events through HID++. Signed-off-by: Bastien Nocera Tested-by: Tobias Klausmann Link: https://bugzilla.kernel.org/show_bug.cgi?id=216885 Fixes: 908d325e1665 ("HID: logitech-hidpp: Detect hi-res scrolling support") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230203101800.139380-1-hadess@hadess.net Signed-off-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-logitech-hidpp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 656757c79f6b..07b8506eecc4 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -3978,7 +3978,8 @@ static void hidpp_connect_event(struct hidpp_device *hidpp) } hidpp_initialize_battery(hidpp); - hidpp_initialize_hires_scroll(hidpp); + if (!hid_is_usb(hidpp->hid_dev)) + hidpp_initialize_hires_scroll(hidpp); /* forward current battery state */ if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP10_BATTERY) { From cf118814ae89f4ae702fd376089cdac296915877 Mon Sep 17 00:00:00 2001 From: Anastasia Belova Date: Tue, 10 Jan 2023 12:14:50 +0300 Subject: [PATCH 016/118] xfrm: compat: change expression for switch in xfrm_xlate64 [ Upstream commit eb6c59b735aa6cca77cdbb59cc69d69a0d63d986 ] Compare XFRM_MSG_NEWSPDINFO (value from netlink configuration messages enum) with nlh_src->nlmsg_type instead of nlh_src->nlmsg_type - XFRM_MSG_BASE. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 4e9505064f58 ("net/xfrm/compat: Copy xfrm_spdattr_type_t atributes") Signed-off-by: Anastasia Belova Acked-by: Dmitry Safonov <0x7f454c46@gmail.com> Tested-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index a0f62fa02e06..12405aa5bce8 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -302,7 +302,7 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src) nla_for_each_attr(nla, attrs, len, remaining) { int err; - switch (type) { + switch (nlh_src->nlmsg_type) { case XFRM_MSG_NEWSPDINFO: err = xfrm_nla_cpy(dst, nla, nla_len(nla)); break; From 0a4f811f2e5d07bbd0c9226f4afb0a1270a831ae Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Thu, 12 Jan 2023 13:16:02 -0500 Subject: [PATCH 017/118] IB/hfi1: Restore allocated resources on failed copyout [ Upstream commit 6601fc0d15ffc20654e39486f9bef35567106d68 ] Fix a resource leak if an error occurs. Fixes: f404ca4c7ea8 ("IB/hfi1: Refactor hfi_user_exp_rcv_setup() IOCTL") Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/167354736291.2132367.10894218740150168180.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/file_ops.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index f5f9269fdc16..7c5d487ec916 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1318,12 +1318,15 @@ static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, addr = arg + offsetof(struct hfi1_tid_info, tidcnt); if (copy_to_user((void __user *)addr, &tinfo.tidcnt, sizeof(tinfo.tidcnt))) - return -EFAULT; + ret = -EFAULT; addr = arg + offsetof(struct hfi1_tid_info, length); - if (copy_to_user((void __user *)addr, &tinfo.length, + if (!ret && copy_to_user((void __user *)addr, &tinfo.length, sizeof(tinfo.length))) ret = -EFAULT; + + if (ret) + hfi1_user_exp_rcv_invalid(fd, &tinfo); } return ret; From 419674224390fca298020fc0751a20812f84b12d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jan 2023 13:02:49 +0000 Subject: [PATCH 018/118] xfrm/compat: prevent potential spectre v1 gadget in xfrm_xlate32_attr() [ Upstream commit b6ee896385380aa621102e8ea402ba12db1cabff ] int type = nla_type(nla); if (type > XFRMA_MAX) { return -EOPNOTSUPP; } @type is then used as an array index and can be used as a Spectre v1 gadget. if (nla_len(nla) < compat_policy[type].len) { array_index_nospec() can be used to prevent leaking content of kernel memory to malicious users. Fixes: 5106f4a8acff ("xfrm/compat: Add 32=>64-bit messages translator") Signed-off-by: Eric Dumazet Cc: Dmitry Safonov Cc: Steffen Klassert Reviewed-by: Dmitry Safonov Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_compat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index 12405aa5bce8..8cbf45a8bcdc 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -5,6 +5,7 @@ * Based on code and translator idea by: Florian Westphal */ #include +#include #include #include @@ -437,6 +438,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, NL_SET_ERR_MSG(extack, "Bad attribute"); return -EOPNOTSUPP; } + type = array_index_nospec(type, XFRMA_MAX + 1); if (nla_len(nla) < compat_policy[type].len) { NL_SET_ERR_MSG(extack, "Attribute bad length"); return -EOPNOTSUPP; From 7197460dcd43ff0e4a502ba855dd82d37c2848cc Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 24 Jan 2023 20:24:18 +0200 Subject: [PATCH 019/118] IB/IPoIB: Fix legacy IPoIB due to wrong number of queues [ Upstream commit e632291a2dbce45a24cddeb5fe28fe71d724ba43 ] The cited commit creates child PKEY interfaces over netlink will multiple tx and rx queues, but some devices doesn't support more than 1 tx and 1 rx queues. This causes to a crash when traffic is sent over the PKEY interface due to the parent having a single queue but the child having multiple queues. This patch fixes the number of queues to 1 for legacy IPoIB at the earliest possible point in time. BUG: kernel NULL pointer dereference, address: 000000000000036b PGD 0 P4D 0 Oops: 0000 [#1] SMP CPU: 4 PID: 209665 Comm: python3 Not tainted 6.1.0_for_upstream_min_debug_2022_12_12_17_02 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:kmem_cache_alloc+0xcb/0x450 Code: ce 7e 49 8b 50 08 49 83 78 10 00 4d 8b 28 0f 84 cb 02 00 00 4d 85 ed 0f 84 c2 02 00 00 41 8b 44 24 28 48 8d 4a 01 49 8b 3c 24 <49> 8b 5c 05 00 4c 89 e8 65 48 0f c7 0f 0f 94 c0 84 c0 74 b8 41 8b RSP: 0018:ffff88822acbbab8 EFLAGS: 00010202 RAX: 0000000000000070 RBX: ffff8881c28e3e00 RCX: 00000000064f8dae RDX: 00000000064f8dad RSI: 0000000000000a20 RDI: 0000000000030d00 RBP: 0000000000000a20 R08: ffff8882f5d30d00 R09: ffff888104032f40 R10: ffff88810fade828 R11: 736f6d6570736575 R12: ffff88810081c000 R13: 00000000000002fb R14: ffffffff817fc865 R15: 0000000000000000 FS: 00007f9324ff9700(0000) GS:ffff8882f5d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000036b CR3: 00000001125af004 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: skb_clone+0x55/0xd0 ip6_finish_output2+0x3fe/0x690 ip6_finish_output+0xfa/0x310 ip6_send_skb+0x1e/0x60 udp_v6_send_skb+0x1e5/0x420 udpv6_sendmsg+0xb3c/0xe60 ? ip_mc_finish_output+0x180/0x180 ? __switch_to_asm+0x3a/0x60 ? __switch_to_asm+0x34/0x60 sock_sendmsg+0x33/0x40 __sys_sendto+0x103/0x160 ? _copy_to_user+0x21/0x30 ? kvm_clock_get_cycles+0xd/0x10 ? ktime_get_ts64+0x49/0xe0 __x64_sys_sendto+0x25/0x30 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f9374f1ed14 Code: 42 41 f8 ff 44 8b 4c 24 2c 4c 8b 44 24 20 89 c5 44 8b 54 24 28 48 8b 54 24 18 b8 2c 00 00 00 48 8b 74 24 10 8b 7c 24 08 0f 05 <48> 3d 00 f0 ff ff 77 34 89 ef 48 89 44 24 08 e8 68 41 f8 ff 48 8b RSP: 002b:00007f9324ff7bd0 EFLAGS: 00000293 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007f9324ff7cc8 RCX: 00007f9374f1ed14 RDX: 00000000000002fb RSI: 00007f93000052f0 RDI: 0000000000000030 RBP: 0000000000000000 R08: 00007f9324ff7d40 R09: 000000000000001c R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000000000 R13: 000000012a05f200 R14: 0000000000000001 R15: 00007f9374d57bdc Fixes: dbc94a0fb817 ("IB/IPoIB: Fix queue count inconsistency for PKEY child interfaces") Signed-off-by: Dragos Tatulea Link: https://lore.kernel.org/r/95eb6b74c7cf49fa46281f9d056d685c9fa11d38.1674584576.git.leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index ac25fc80fb33..f10d4bcf87d2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2200,6 +2200,14 @@ int ipoib_intf_init(struct ib_device *hca, u32 port, const char *name, rn->attach_mcast = ipoib_mcast_attach; rn->detach_mcast = ipoib_mcast_detach; rn->hca = hca; + + rc = netif_set_real_num_tx_queues(dev, 1); + if (rc) + goto out; + + rc = netif_set_real_num_rx_queues(dev, 1); + if (rc) + goto out; } priv->rn_ops = dev->netdev_ops; From f6d8b6762bee15551e538eef021b93477634d38f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Jan 2023 11:21:30 +0000 Subject: [PATCH 020/118] xfrm: annotate data-race around use_time [ Upstream commit 0a9e5794b21e2d1303759ff8fe5f9215db7757ba ] KCSAN reported multiple cpus can update use_time at the same time. Adds READ_ONCE()/WRITE_ONCE() annotations. Note that 32bit arches are not fully protected, but they will probably no longer be supported/used in 2106. BUG: KCSAN: data-race in __xfrm_policy_check / __xfrm_policy_check write to 0xffff88813e7ec108 of 8 bytes by interrupt on cpu 0: __xfrm_policy_check+0x6ae/0x17f0 net/xfrm/xfrm_policy.c:3664 __xfrm_policy_check2 include/net/xfrm.h:1174 [inline] xfrm_policy_check include/net/xfrm.h:1179 [inline] xfrm6_policy_check+0x2e9/0x320 include/net/xfrm.h:1189 udpv6_queue_rcv_one_skb+0x48/0xa30 net/ipv6/udp.c:703 udpv6_queue_rcv_skb+0x2d6/0x310 net/ipv6/udp.c:792 udp6_unicast_rcv_skb+0x16b/0x190 net/ipv6/udp.c:935 __udp6_lib_rcv+0x84b/0x9b0 net/ipv6/udp.c:1020 udpv6_rcv+0x4b/0x50 net/ipv6/udp.c:1133 ip6_protocol_deliver_rcu+0x99e/0x1020 net/ipv6/ip6_input.c:439 ip6_input_finish net/ipv6/ip6_input.c:484 [inline] NF_HOOK include/linux/netfilter.h:302 [inline] ip6_input+0xca/0x180 net/ipv6/ip6_input.c:493 dst_input include/net/dst.h:454 [inline] ip6_rcv_finish+0x1e9/0x2d0 net/ipv6/ip6_input.c:79 NF_HOOK include/linux/netfilter.h:302 [inline] ipv6_rcv+0x85/0x140 net/ipv6/ip6_input.c:309 __netif_receive_skb_one_core net/core/dev.c:5482 [inline] __netif_receive_skb+0x8b/0x1b0 net/core/dev.c:5596 process_backlog+0x23f/0x3b0 net/core/dev.c:5924 __napi_poll+0x65/0x390 net/core/dev.c:6485 napi_poll net/core/dev.c:6552 [inline] net_rx_action+0x37e/0x730 net/core/dev.c:6663 __do_softirq+0xf2/0x2c7 kernel/softirq.c:571 do_softirq+0xb1/0xf0 kernel/softirq.c:472 __local_bh_enable_ip+0x6f/0x80 kernel/softirq.c:396 __raw_read_unlock_bh include/linux/rwlock_api_smp.h:257 [inline] _raw_read_unlock_bh+0x17/0x20 kernel/locking/spinlock.c:284 wg_socket_send_skb_to_peer+0x107/0x120 drivers/net/wireguard/socket.c:184 wg_packet_create_data_done drivers/net/wireguard/send.c:251 [inline] wg_packet_tx_worker+0x142/0x360 drivers/net/wireguard/send.c:276 process_one_work+0x3d3/0x720 kernel/workqueue.c:2289 worker_thread+0x618/0xa70 kernel/workqueue.c:2436 kthread+0x1a9/0x1e0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 write to 0xffff88813e7ec108 of 8 bytes by interrupt on cpu 1: __xfrm_policy_check+0x6ae/0x17f0 net/xfrm/xfrm_policy.c:3664 __xfrm_policy_check2 include/net/xfrm.h:1174 [inline] xfrm_policy_check include/net/xfrm.h:1179 [inline] xfrm6_policy_check+0x2e9/0x320 include/net/xfrm.h:1189 udpv6_queue_rcv_one_skb+0x48/0xa30 net/ipv6/udp.c:703 udpv6_queue_rcv_skb+0x2d6/0x310 net/ipv6/udp.c:792 udp6_unicast_rcv_skb+0x16b/0x190 net/ipv6/udp.c:935 __udp6_lib_rcv+0x84b/0x9b0 net/ipv6/udp.c:1020 udpv6_rcv+0x4b/0x50 net/ipv6/udp.c:1133 ip6_protocol_deliver_rcu+0x99e/0x1020 net/ipv6/ip6_input.c:439 ip6_input_finish net/ipv6/ip6_input.c:484 [inline] NF_HOOK include/linux/netfilter.h:302 [inline] ip6_input+0xca/0x180 net/ipv6/ip6_input.c:493 dst_input include/net/dst.h:454 [inline] ip6_rcv_finish+0x1e9/0x2d0 net/ipv6/ip6_input.c:79 NF_HOOK include/linux/netfilter.h:302 [inline] ipv6_rcv+0x85/0x140 net/ipv6/ip6_input.c:309 __netif_receive_skb_one_core net/core/dev.c:5482 [inline] __netif_receive_skb+0x8b/0x1b0 net/core/dev.c:5596 process_backlog+0x23f/0x3b0 net/core/dev.c:5924 __napi_poll+0x65/0x390 net/core/dev.c:6485 napi_poll net/core/dev.c:6552 [inline] net_rx_action+0x37e/0x730 net/core/dev.c:6663 __do_softirq+0xf2/0x2c7 kernel/softirq.c:571 do_softirq+0xb1/0xf0 kernel/softirq.c:472 __local_bh_enable_ip+0x6f/0x80 kernel/softirq.c:396 __raw_read_unlock_bh include/linux/rwlock_api_smp.h:257 [inline] _raw_read_unlock_bh+0x17/0x20 kernel/locking/spinlock.c:284 wg_socket_send_skb_to_peer+0x107/0x120 drivers/net/wireguard/socket.c:184 wg_packet_create_data_done drivers/net/wireguard/send.c:251 [inline] wg_packet_tx_worker+0x142/0x360 drivers/net/wireguard/send.c:276 process_one_work+0x3d3/0x720 kernel/workqueue.c:2289 worker_thread+0x618/0xa70 kernel/workqueue.c:2436 kthread+0x1a9/0x1e0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 value changed: 0x0000000063c62d6f -> 0x0000000063c62d70 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 4185 Comm: kworker/1:2 Tainted: G W 6.2.0-rc4-syzkaller-00009-gd532dd102151-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Workqueue: wg-crypt-wg0 wg_packet_tx_worker Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Steffen Klassert Cc: Arnd Bergmann Acked-by: Arnd Bergmann Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_policy.c | 11 +++++++---- net/xfrm/xfrm_state.c | 10 +++++----- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index e392d8d05e0c..52538d536067 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -336,7 +336,7 @@ static void xfrm_policy_timer(struct timer_list *t) } if (xp->lft.hard_use_expires_seconds) { time64_t tmo = xp->lft.hard_use_expires_seconds + - (xp->curlft.use_time ? : xp->curlft.add_time) - now; + (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now; if (tmo <= 0) goto expired; if (tmo < next) @@ -354,7 +354,7 @@ static void xfrm_policy_timer(struct timer_list *t) } if (xp->lft.soft_use_expires_seconds) { time64_t tmo = xp->lft.soft_use_expires_seconds + - (xp->curlft.use_time ? : xp->curlft.add_time) - now; + (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now; if (tmo <= 0) { warn = 1; tmo = XFRM_KM_TIMEOUT; @@ -3586,7 +3586,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, return 1; } - pol->curlft.use_time = ktime_get_real_seconds(); + /* This lockless write can happen from different cpus. */ + WRITE_ONCE(pol->curlft.use_time, ktime_get_real_seconds()); pols[0] = pol; npols++; @@ -3601,7 +3602,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, xfrm_pol_put(pols[0]); return 0; } - pols[1]->curlft.use_time = ktime_get_real_seconds(); + /* This write can happen from different cpus. */ + WRITE_ONCE(pols[1]->curlft.use_time, + ktime_get_real_seconds()); npols++; } } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3d2fe7712ac5..0f88cb6fc3c2 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -572,7 +572,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) } if (x->lft.hard_use_expires_seconds) { long tmo = x->lft.hard_use_expires_seconds + - (x->curlft.use_time ? : now) - now; + (READ_ONCE(x->curlft.use_time) ? : now) - now; if (tmo <= 0) goto expired; if (tmo < next) @@ -594,7 +594,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) } if (x->lft.soft_use_expires_seconds) { long tmo = x->lft.soft_use_expires_seconds + - (x->curlft.use_time ? : now) - now; + (READ_ONCE(x->curlft.use_time) ? : now) - now; if (tmo <= 0) warn = 1; else if (tmo < next) @@ -1754,7 +1754,7 @@ out: hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT); - if (x1->curlft.use_time) + if (READ_ONCE(x1->curlft.use_time)) xfrm_state_check_expire(x1); if (x->props.smark.m || x->props.smark.v || x->if_id) { @@ -1786,8 +1786,8 @@ EXPORT_SYMBOL(xfrm_state_update); int xfrm_state_check_expire(struct xfrm_state *x) { - if (!x->curlft.use_time) - x->curlft.use_time = ktime_get_real_seconds(); + if (!READ_ONCE(x->curlft.use_time)) + WRITE_ONCE(x->curlft.use_time, ktime_get_real_seconds()); if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { From 360682fe7df262d94fae54f737c487bec0f9190d Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Thu, 26 Jan 2023 10:52:30 -0800 Subject: [PATCH 021/118] RDMA/irdma: Fix potential NULL-ptr-dereference [ Upstream commit 5d9745cead1f121974322b94ceadfb4d1e67960e ] in_dev_get() can return NULL which will cause a failure once idev is dereferenced in in_dev_for_each_ifa_rtnl(). This patch adds a check for NULL value in idev beforehand. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 146b9756f14c ("RDMA/irdma: Add connection manager") Signed-off-by: Nikita Zhandarovich Link: https://lore.kernel.org/r/20230126185230.62464-1-n.zhandarovich@fintech.ru Reviewed-by: Sindhu Devale Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/cm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 7b086fe63a24..195aa9ea18b6 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -1722,6 +1722,9 @@ static int irdma_add_mqh_4(struct irdma_device *iwdev, continue; idev = in_dev_get(ip_dev); + if (!idev) + continue; + in_dev_for_each_ifa_rtnl(ifa, idev) { ibdev_dbg(&iwdev->ibdev, "CM: Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n", From d53903cc2396e0c18ab5e60f47f7cf7c10f5e492 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sun, 29 Jan 2023 17:37:57 +0800 Subject: [PATCH 022/118] RDMA/usnic: use iommu_map_atomic() under spin_lock() [ Upstream commit b7e08a5a63a11627601915473c3b569c1f6c6c06 ] usnic_uiom_map_sorted_intervals() is called under spin_lock(), iommu_map() might sleep, use iommu_map_atomic() to avoid potential sleep in atomic context. Fixes: e3cf00d0a87f ("IB/usnic: Add Cisco VIC low-level hardware driver") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20230129093757.637354-1-yangyingliang@huawei.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/usnic/usnic_uiom.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 67923ced6e2d..b343f6f1bda5 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -277,8 +277,8 @@ iter_chunk: size = pa_end - pa_start + PAGE_SIZE; usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x", va_start, &pa_start, size, flags); - err = iommu_map(pd->domain, va_start, pa_start, - size, flags); + err = iommu_map_atomic(pd->domain, va_start, + pa_start, size, flags); if (err) { usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", va_start, &pa_start, size, err); @@ -294,8 +294,8 @@ iter_chunk: size = pa - pa_start + PAGE_SIZE; usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n", va_start, &pa_start, size, flags); - err = iommu_map(pd->domain, va_start, pa_start, - size, flags); + err = iommu_map_atomic(pd->domain, va_start, + pa_start, size, flags); if (err) { usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", va_start, &pa_start, size, err); From 5954eaab6bb70aff29fbdc9ae991ed68c7b7331f Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 26 Jan 2023 11:33:50 -0500 Subject: [PATCH 023/118] xfrm: fix bug with DSCP copy to v6 from v4 tunnel [ Upstream commit 6028da3f125fec34425dbd5fec18e85d372b2af6 ] When copying the DSCP bits for decap-dscp into IPv6 don't assume the outer encap is always IPv6. Instead, as with the inner IPv4 case, copy the DSCP bits from the correctly saved "tos" value in the control block. Fixes: 227620e29509 ("[IPSEC]: Separate inner/outer mode processing on input") Signed-off-by: Christian Hopps Acked-by: Herbert Xu Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 97074f6f2bde..2defd89da700 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -279,8 +279,7 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) goto out; if (x->props.flags & XFRM_STATE_DECAP_DSCP) - ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)), - ipipv6_hdr(skb)); + ipv6_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipipv6_hdr(skb)); if (!(x->props.flags & XFRM_STATE_NOECN)) ipip6_ecn_decapsulate(skb); From a7f0ec26cd67e7ad6e56753694fddb67a9730f27 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Wed, 1 Feb 2023 10:22:47 -0600 Subject: [PATCH 024/118] of: Make OF framebuffer device names unique [ Upstream commit 241d2fb56a18473af5f2ff0d512992a996eb64dd ] Since Linux 5.19 this error is observed: sysfs: cannot create duplicate filename '/devices/platform/of-display' This is because multiple devices with the same name 'of-display' are created on the same bus. Update the code to create numbered device names for the displays. Also, fix a node refcounting issue when exiting the boot display loop. cc: linuxppc-dev@lists.ozlabs.org References: https://bugzilla.kernel.org/show_bug.cgi?id=216095 Fixes: 52b1b46c39ae ("of: Create platform devices for OF framebuffers") Reported-by: Erhard F. Suggested-by: Thomas Zimmermann Signed-off-by: Michal Suchanek Link: https://lore.kernel.org/r/20230201162247.3575506-1-robh@kernel.org [robh: Rework to avoid node refcount leaks] Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/platform.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 3507095a69f6..6e93fd37ccd1 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -526,6 +526,7 @@ static int __init of_platform_default_populate_init(void) if (IS_ENABLED(CONFIG_PPC)) { struct device_node *boot_display = NULL; struct platform_device *dev; + int display_number = 0; int ret; /* Check if we have a MacOS display without a node spec */ @@ -556,16 +557,23 @@ static int __init of_platform_default_populate_init(void) if (!of_get_property(node, "linux,opened", NULL) || !of_get_property(node, "linux,boot-display", NULL)) continue; - dev = of_platform_device_create(node, "of-display", NULL); + dev = of_platform_device_create(node, "of-display.0", NULL); + of_node_put(node); if (WARN_ON(!dev)) return -ENOMEM; boot_display = node; + display_number++; break; } for_each_node_by_type(node, "display") { + char buf[14]; + const char *of_display_format = "of-display.%d"; + if (!of_get_property(node, "linux,opened", NULL) || node == boot_display) continue; - of_platform_device_create(node, "of-display", NULL); + ret = snprintf(buf, sizeof(buf), of_display_format, display_number++); + if (ret < sizeof(buf)) + of_platform_device_create(node, buf, NULL); } } else { From a65723e299dc0fea45d8160590f8c0e109405579 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Tue, 31 Jan 2023 11:02:42 +0100 Subject: [PATCH 025/118] net: phylink: move phy_device_free() to correctly release phy device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ce93fdb5f2ca5c9e2a9668411cc39091507f8dc9 ] After calling fwnode_phy_find_device(), the phy device refcount is incremented. Then, when the phy device is attached to a netdev with phy_attach_direct(), the refcount is also incremented but only decremented in the caller if phy_attach_direct() fails. Move phy_device_free() before the "if" to always release it correctly. Indeed, either phy_attach_direct() failed and we don't want to keep a reference to the phydev or it succeeded and a reference has been taken internally. Fixes: 25396f680dd6 ("net: phylink: introduce phylink_fwnode_phy_connect()") Signed-off-by: Clément Léger Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/phylink.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 2805b04d6402..a202ce6611fd 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1793,10 +1793,9 @@ int phylink_fwnode_phy_connect(struct phylink *pl, ret = phy_attach_direct(pl->netdev, phy_dev, flags, pl->link_interface); - if (ret) { - phy_device_free(phy_dev); + phy_device_free(phy_dev); + if (ret) return ret; - } ret = phylink_bringup_phy(pl, phy_dev, pl->link_config.interface); if (ret) From d974330c1d9915ae0940828df779fa8b0cfbb80c Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Thu, 2 Feb 2023 17:32:55 +0800 Subject: [PATCH 026/118] bonding: fix error checking in bond_debug_reregister() [ Upstream commit cbe83191d40d8925b7a99969d037d2a0caf69294 ] Since commit ff9fb72bc077 ("debugfs: return error values, not NULL") changed return value of debugfs_rename() in error cases from %NULL to %ERR_PTR(-ERROR), we should also check error values instead of NULL. Fixes: ff9fb72bc077 ("debugfs: return error values, not NULL") Signed-off-by: Qi Zheng Acked-by: Jay Vosburgh Link: https://lore.kernel.org/r/20230202093256.32458-1-zhengqi.arch@bytedance.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c index 4f9b4a18c74c..594094526648 100644 --- a/drivers/net/bonding/bond_debugfs.c +++ b/drivers/net/bonding/bond_debugfs.c @@ -76,7 +76,7 @@ void bond_debug_reregister(struct bonding *bond) d = debugfs_rename(bonding_debug_root, bond->debug_dir, bonding_debug_root, bond->dev->name); - if (d) { + if (!IS_ERR(d)) { bond->debug_dir = d; } else { netdev_warn(bond->dev, "failed to reregister, so just unregister old one\n"); From 9c6a896b8d40da37ac22ee2ef821b241698977f6 Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Thu, 2 Feb 2023 17:56:19 +0530 Subject: [PATCH 027/118] net: macb: Perform zynqmp dynamic configuration only for SGMII interface [ Upstream commit c9011b028e956c3b6baa6f131d9eec43e4e52020 ] In zynqmp platforms where firmware supports dynamic SGMII configuration but has other non-SGMII ethernet devices, it fails them with no packets received at the RX interface. To fix this behaviour perform SGMII dynamic configuration only for the SGMII phy interface. Fixes: 32cee7818111 ("net: macb: Add zynqmp SGMII dynamic configuration support") Signed-off-by: Radhey Shyam Pandey Reviewed-by: Jiri Pirko Reported-by: Michal Simek Tested-by: Michal Simek Reviewed-by: Claudiu Beznea Link: https://lore.kernel.org/r/1675340779-27499-1-git-send-email-radhey.shyam.pandey@amd.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/cadence/macb_main.c | 31 ++++++++++++------------ 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 300f47ca42e3..e255780f3867 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4614,25 +4614,26 @@ static int init_reset_optional(struct platform_device *pdev) if (ret) return dev_err_probe(&pdev->dev, ret, "failed to init SGMII PHY\n"); - } - ret = zynqmp_pm_is_function_supported(PM_IOCTL, IOCTL_SET_GEM_CONFIG); - if (!ret) { - u32 pm_info[2]; + ret = zynqmp_pm_is_function_supported(PM_IOCTL, IOCTL_SET_GEM_CONFIG); + if (!ret) { + u32 pm_info[2]; - ret = of_property_read_u32_array(pdev->dev.of_node, "power-domains", - pm_info, ARRAY_SIZE(pm_info)); - if (ret) { - dev_err(&pdev->dev, "Failed to read power management information\n"); - goto err_out_phy_exit; + ret = of_property_read_u32_array(pdev->dev.of_node, "power-domains", + pm_info, ARRAY_SIZE(pm_info)); + if (ret) { + dev_err(&pdev->dev, "Failed to read power management information\n"); + goto err_out_phy_exit; + } + ret = zynqmp_pm_set_gem_config(pm_info[1], GEM_CONFIG_FIXED, 0); + if (ret) + goto err_out_phy_exit; + + ret = zynqmp_pm_set_gem_config(pm_info[1], GEM_CONFIG_SGMII_MODE, 1); + if (ret) + goto err_out_phy_exit; } - ret = zynqmp_pm_set_gem_config(pm_info[1], GEM_CONFIG_FIXED, 0); - if (ret) - goto err_out_phy_exit; - ret = zynqmp_pm_set_gem_config(pm_info[1], GEM_CONFIG_SGMII_MODE, 1); - if (ret) - goto err_out_phy_exit; } /* Fully reset controller at hardware level if mapped in device tree */ From 95e29c11abe566e814aec9802581916def981a41 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 2 Feb 2023 21:45:36 +0100 Subject: [PATCH 028/118] net: phy: meson-gxl: use MMD access dummy stubs for GXL, internal PHY [ Upstream commit 69ff53e4a4c9498eeed7d1441f68a1481dc69251 ] Jerome provided the information that also the GXL internal PHY doesn't support MMD register access and EEE. MMD reads return 0xffff, what results in e.g. completely wrong ethtool --show-eee output. Therefore use the MMD dummy stubs. Fixes: d853d145ea3e ("net: phy: add an option to disable EEE advertisement") Suggested-by: Jerome Brunet Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/84432fe4-0be4-bc82-4e5c-557206b40f56@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/phy/meson-gxl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c index 5e41658b1e2f..a6015cd03bff 100644 --- a/drivers/net/phy/meson-gxl.c +++ b/drivers/net/phy/meson-gxl.c @@ -261,6 +261,8 @@ static struct phy_driver meson_gxl_phy[] = { .handle_interrupt = meson_gxl_handle_interrupt, .suspend = genphy_suspend, .resume = genphy_resume, + .read_mmd = genphy_read_mmd_unsupported, + .write_mmd = genphy_write_mmd_unsupported, }, { PHY_ID_MATCH_EXACT(0x01803301), .name = "Meson G12A Internal PHY", From db34b5352e92155cdb8f4566469343526b2eeb1b Mon Sep 17 00:00:00 2001 From: Neel Patel Date: Thu, 2 Feb 2023 13:55:35 -0800 Subject: [PATCH 029/118] ionic: clean interrupt before enabling queue to avoid credit race [ Upstream commit e8797a058466b60fc5a3291b92430c93ba90eaff ] Clear the interrupt credits before enabling the queue rather than after to be sure that the enabled queue starts at 0 and that we don't wipe away possible credits after enabling the queue. Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Neel Patel Signed-off-by: Shannon Nelson Reviewed-by: Leon Romanovsky Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 19d4848df17d..147e23435c3d 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -269,6 +269,7 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) .oper = IONIC_Q_ENABLE, }, }; + int ret; idev = &lif->ionic->idev; dev = lif->ionic->dev; @@ -276,16 +277,24 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) dev_dbg(dev, "q_enable.index %d q_enable.qtype %d\n", ctx.cmd.q_control.index, ctx.cmd.q_control.type); + if (qcq->flags & IONIC_QCQ_F_INTR) + ionic_intr_clean(idev->intr_ctrl, qcq->intr.index); + + ret = ionic_adminq_post_wait(lif, &ctx); + if (ret) + return ret; + + if (qcq->napi.poll) + napi_enable(&qcq->napi); + if (qcq->flags & IONIC_QCQ_F_INTR) { irq_set_affinity_hint(qcq->intr.vector, &qcq->intr.affinity_mask); - napi_enable(&qcq->napi); - ionic_intr_clean(idev->intr_ctrl, qcq->intr.index); ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, IONIC_INTR_MASK_CLEAR); } - return ionic_adminq_post_wait(lif, &ctx); + return 0; } static int ionic_qcq_disable(struct ionic_lif *lif, struct ionic_qcq *qcq, int fw_err) From a16f6ea194fe0ea24c05fa51c46cd1dc38c7380d Mon Sep 17 00:00:00 2001 From: Neel Patel Date: Wed, 26 Oct 2022 07:37:44 -0700 Subject: [PATCH 030/118] ionic: refactor use of ionic_rx_fill() [ Upstream commit e55f0f5befc26e2ba6bb8c1f945ea8e37ee0e334 ] The same pre-work code is used before each call to ionic_rx_fill(), so bring it in and make it a part of the routine. Signed-off-by: Neel Patel Signed-off-by: Shannon Nelson Signed-off-by: Jakub Kicinski Stable-dep-of: b69585bfcece ("ionic: missed doorbell workaround") Signed-off-by: Sasha Levin --- .../net/ethernet/pensando/ionic/ionic_txrx.c | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index c03986bf2628..7977de4d67b7 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -348,16 +348,25 @@ void ionic_rx_fill(struct ionic_queue *q) struct ionic_rxq_sg_desc *sg_desc; struct ionic_rxq_sg_elem *sg_elem; struct ionic_buf_info *buf_info; + unsigned int fill_threshold; struct ionic_rxq_desc *desc; unsigned int remain_len; unsigned int frag_len; unsigned int nfrags; + unsigned int n_fill; unsigned int i, j; unsigned int len; + n_fill = ionic_q_space_avail(q); + + fill_threshold = min_t(unsigned int, IONIC_RX_FILL_THRESHOLD, + q->num_descs / IONIC_RX_FILL_DIV); + if (n_fill < fill_threshold) + return; + len = netdev->mtu + ETH_HLEN + VLAN_HLEN; - for (i = ionic_q_space_avail(q); i; i--) { + for (i = n_fill; i; i--) { nfrags = 0; remain_len = len; desc_info = &q->info[q->head_idx]; @@ -511,7 +520,6 @@ int ionic_rx_napi(struct napi_struct *napi, int budget) struct ionic_cq *cq = napi_to_cq(napi); struct ionic_dev *idev; struct ionic_lif *lif; - u16 rx_fill_threshold; u32 work_done = 0; u32 flags = 0; @@ -521,10 +529,7 @@ int ionic_rx_napi(struct napi_struct *napi, int budget) work_done = ionic_cq_service(cq, budget, ionic_rx_service, NULL, NULL); - rx_fill_threshold = min_t(u16, IONIC_RX_FILL_THRESHOLD, - cq->num_descs / IONIC_RX_FILL_DIV); - if (work_done && ionic_q_space_avail(cq->bound_q) >= rx_fill_threshold) - ionic_rx_fill(cq->bound_q); + ionic_rx_fill(cq->bound_q); if (work_done < budget && napi_complete_done(napi, work_done)) { ionic_dim_update(qcq, IONIC_LIF_F_RX_DIM_INTR); @@ -550,7 +555,6 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) struct ionic_dev *idev; struct ionic_lif *lif; struct ionic_cq *txcq; - u16 rx_fill_threshold; u32 rx_work_done = 0; u32 tx_work_done = 0; u32 flags = 0; @@ -565,10 +569,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) rx_work_done = ionic_cq_service(rxcq, budget, ionic_rx_service, NULL, NULL); - rx_fill_threshold = min_t(u16, IONIC_RX_FILL_THRESHOLD, - rxcq->num_descs / IONIC_RX_FILL_DIV); - if (rx_work_done && ionic_q_space_avail(rxcq->bound_q) >= rx_fill_threshold) - ionic_rx_fill(rxcq->bound_q); + ionic_rx_fill(rxcq->bound_q); if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) { ionic_dim_update(qcq, 0); From 07154f94c213d59f73ef7ca2e7f60379714816a9 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Thu, 2 Feb 2023 13:55:37 -0800 Subject: [PATCH 031/118] ionic: missed doorbell workaround [ Upstream commit b69585bfceceeffda940906cabfdaee4b47bde92 ] In one version of the HW there is a remote possibility that it will miss the doorbell ring. This adds a bit of protection to be sure we don't stall a queue from a missed doorbell. Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Allen Hubbe Signed-off-by: Shannon Nelson Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/pensando/ionic/ionic_dev.c | 9 +- .../net/ethernet/pensando/ionic/ionic_dev.h | 12 +++ .../net/ethernet/pensando/ionic/ionic_lif.c | 41 ++++++++- .../net/ethernet/pensando/ionic/ionic_lif.h | 2 + .../net/ethernet/pensando/ionic/ionic_main.c | 29 +++++++ .../net/ethernet/pensando/ionic/ionic_txrx.c | 87 ++++++++++++++++++- 6 files changed, 176 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index 9d0514cfeb5c..344a3924627d 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -694,9 +694,16 @@ void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb, q->lif->index, q->name, q->hw_type, q->hw_index, q->head_idx, ring_doorbell); - if (ring_doorbell) + if (ring_doorbell) { ionic_dbell_ring(lif->kern_dbpage, q->hw_type, q->dbval | q->head_idx); + + q->dbell_jiffies = jiffies; + + if (q_to_qcq(q)->napi_qcq) + mod_timer(&q_to_qcq(q)->napi_qcq->napi_deadline, + jiffies + IONIC_NAPI_DEADLINE); + } } static bool ionic_q_is_posted(struct ionic_queue *q, unsigned int pos) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index 563c302eb033..ad8a2a4453b7 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -25,6 +25,12 @@ #define IONIC_DEV_INFO_REG_COUNT 32 #define IONIC_DEV_CMD_REG_COUNT 32 +#define IONIC_NAPI_DEADLINE (HZ / 200) /* 5ms */ +#define IONIC_ADMIN_DOORBELL_DEADLINE (HZ / 2) /* 500ms */ +#define IONIC_TX_DOORBELL_DEADLINE (HZ / 100) /* 10ms */ +#define IONIC_RX_MIN_DOORBELL_DEADLINE (HZ / 100) /* 10ms */ +#define IONIC_RX_MAX_DOORBELL_DEADLINE (HZ * 5) /* 5s */ + struct ionic_dev_bar { void __iomem *vaddr; phys_addr_t bus_addr; @@ -214,6 +220,8 @@ struct ionic_queue { struct ionic_lif *lif; struct ionic_desc_info *info; u64 dbval; + unsigned long dbell_deadline; + unsigned long dbell_jiffies; u16 head_idx; u16 tail_idx; unsigned int index; @@ -358,4 +366,8 @@ void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info, int ionic_heartbeat_check(struct ionic *ionic); bool ionic_is_fw_running(struct ionic_dev *idev); +bool ionic_adminq_poke_doorbell(struct ionic_queue *q); +bool ionic_txq_poke_doorbell(struct ionic_queue *q); +bool ionic_rxq_poke_doorbell(struct ionic_queue *q); + #endif /* _IONIC_DEV_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 147e23435c3d..159bfcc76498 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -16,6 +16,7 @@ #include "ionic.h" #include "ionic_bus.h" +#include "ionic_dev.h" #include "ionic_lif.h" #include "ionic_txrx.h" #include "ionic_ethtool.h" @@ -200,6 +201,13 @@ void ionic_link_status_check_request(struct ionic_lif *lif, bool can_sleep) } } +static void ionic_napi_deadline(struct timer_list *timer) +{ + struct ionic_qcq *qcq = container_of(timer, struct ionic_qcq, napi_deadline); + + napi_schedule(&qcq->napi); +} + static irqreturn_t ionic_isr(int irq, void *data) { struct napi_struct *napi = data; @@ -325,6 +333,7 @@ static int ionic_qcq_disable(struct ionic_lif *lif, struct ionic_qcq *qcq, int f synchronize_irq(qcq->intr.vector); irq_set_affinity_hint(qcq->intr.vector, NULL); napi_disable(&qcq->napi); + del_timer_sync(&qcq->napi_deadline); } /* If there was a previous fw communcation error, don't bother with @@ -460,6 +469,7 @@ static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq, n_qcq->intr.vector = src_qcq->intr.vector; n_qcq->intr.index = src_qcq->intr.index; + n_qcq->napi_qcq = src_qcq->napi_qcq; } static int ionic_alloc_qcq_interrupt(struct ionic_lif *lif, struct ionic_qcq *qcq) @@ -782,8 +792,14 @@ static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq) dev_dbg(dev, "txq->hw_type %d\n", q->hw_type); dev_dbg(dev, "txq->hw_index %d\n", q->hw_index); - if (test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state)) + q->dbell_deadline = IONIC_TX_DOORBELL_DEADLINE; + q->dbell_jiffies = jiffies; + + if (test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state)) { netif_napi_add(lif->netdev, &qcq->napi, ionic_tx_napi); + qcq->napi_qcq = qcq; + timer_setup(&qcq->napi_deadline, ionic_napi_deadline, 0); + } qcq->flags |= IONIC_QCQ_F_INITED; @@ -837,11 +853,17 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq) dev_dbg(dev, "rxq->hw_type %d\n", q->hw_type); dev_dbg(dev, "rxq->hw_index %d\n", q->hw_index); + q->dbell_deadline = IONIC_RX_MIN_DOORBELL_DEADLINE; + q->dbell_jiffies = jiffies; + if (test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state)) netif_napi_add(lif->netdev, &qcq->napi, ionic_rx_napi); else netif_napi_add(lif->netdev, &qcq->napi, ionic_txrx_napi); + qcq->napi_qcq = qcq; + timer_setup(&qcq->napi_deadline, ionic_napi_deadline, 0); + qcq->flags |= IONIC_QCQ_F_INITED; return 0; @@ -1159,6 +1181,7 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget) struct ionic_dev *idev = &lif->ionic->idev; unsigned long irqflags; unsigned int flags = 0; + bool resched = false; int rx_work = 0; int tx_work = 0; int n_work = 0; @@ -1196,6 +1219,16 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget) ionic_intr_credits(idev->intr_ctrl, intr->index, credits, flags); } + if (!a_work && ionic_adminq_poke_doorbell(&lif->adminqcq->q)) + resched = true; + if (lif->hwstamp_rxq && !rx_work && ionic_rxq_poke_doorbell(&lif->hwstamp_rxq->q)) + resched = true; + if (lif->hwstamp_txq && !tx_work && ionic_txq_poke_doorbell(&lif->hwstamp_txq->q)) + resched = true; + if (resched) + mod_timer(&lif->adminqcq->napi_deadline, + jiffies + IONIC_NAPI_DEADLINE); + return work_done; } @@ -3175,8 +3208,14 @@ static int ionic_lif_adminq_init(struct ionic_lif *lif) dev_dbg(dev, "adminq->hw_type %d\n", q->hw_type); dev_dbg(dev, "adminq->hw_index %d\n", q->hw_index); + q->dbell_deadline = IONIC_ADMIN_DOORBELL_DEADLINE; + q->dbell_jiffies = jiffies; + netif_napi_add(lif->netdev, &qcq->napi, ionic_adminq_napi); + qcq->napi_qcq = qcq; + timer_setup(&qcq->napi_deadline, ionic_napi_deadline, 0); + napi_enable(&qcq->napi); if (qcq->flags & IONIC_QCQ_F_INTR) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index a53984bf3544..734519895614 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -74,8 +74,10 @@ struct ionic_qcq { struct ionic_queue q; struct ionic_cq cq; struct ionic_intr_info intr; + struct timer_list napi_deadline; struct napi_struct napi; unsigned int flags; + struct ionic_qcq *napi_qcq; struct dentry *dentry; }; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 5456c2b15d9b..79272f5f380c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -289,6 +289,35 @@ static void ionic_adminq_cb(struct ionic_queue *q, complete_all(&ctx->work); } +bool ionic_adminq_poke_doorbell(struct ionic_queue *q) +{ + struct ionic_lif *lif = q->lif; + unsigned long now, then, dif; + unsigned long irqflags; + + spin_lock_irqsave(&lif->adminq_lock, irqflags); + + if (q->tail_idx == q->head_idx) { + spin_unlock_irqrestore(&lif->adminq_lock, irqflags); + return false; + } + + now = READ_ONCE(jiffies); + then = q->dbell_jiffies; + dif = now - then; + + if (dif > q->dbell_deadline) { + ionic_dbell_ring(q->lif->kern_dbpage, q->hw_type, + q->dbval | q->head_idx); + + q->dbell_jiffies = now; + } + + spin_unlock_irqrestore(&lif->adminq_lock, irqflags); + + return true; +} + int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) { struct ionic_desc_info *desc_info; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 7977de4d67b7..f8f5eb130768 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -22,6 +22,67 @@ static inline void ionic_rxq_post(struct ionic_queue *q, bool ring_dbell, ionic_q_post(q, ring_dbell, cb_func, cb_arg); } +bool ionic_txq_poke_doorbell(struct ionic_queue *q) +{ + unsigned long now, then, dif; + struct netdev_queue *netdev_txq; + struct net_device *netdev; + + netdev = q->lif->netdev; + netdev_txq = netdev_get_tx_queue(netdev, q->index); + + HARD_TX_LOCK(netdev, netdev_txq, smp_processor_id()); + + if (q->tail_idx == q->head_idx) { + HARD_TX_UNLOCK(netdev, netdev_txq); + return false; + } + + now = READ_ONCE(jiffies); + then = q->dbell_jiffies; + dif = now - then; + + if (dif > q->dbell_deadline) { + ionic_dbell_ring(q->lif->kern_dbpage, q->hw_type, + q->dbval | q->head_idx); + + q->dbell_jiffies = now; + } + + HARD_TX_UNLOCK(netdev, netdev_txq); + + return true; +} + +bool ionic_rxq_poke_doorbell(struct ionic_queue *q) +{ + unsigned long now, then, dif; + + /* no lock, called from rx napi or txrx napi, nothing else can fill */ + + if (q->tail_idx == q->head_idx) + return false; + + now = READ_ONCE(jiffies); + then = q->dbell_jiffies; + dif = now - then; + + if (dif > q->dbell_deadline) { + ionic_dbell_ring(q->lif->kern_dbpage, q->hw_type, + q->dbval | q->head_idx); + + q->dbell_jiffies = now; + + dif = 2 * q->dbell_deadline; + if (dif > IONIC_RX_MAX_DOORBELL_DEADLINE) + dif = IONIC_RX_MAX_DOORBELL_DEADLINE; + + q->dbell_deadline = dif; + } + + return true; +} + static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q) { return netdev_get_tx_queue(q->lif->netdev, q->index); @@ -424,6 +485,12 @@ void ionic_rx_fill(struct ionic_queue *q) ionic_dbell_ring(q->lif->kern_dbpage, q->hw_type, q->dbval | q->head_idx); + + q->dbell_deadline = IONIC_RX_MIN_DOORBELL_DEADLINE; + q->dbell_jiffies = jiffies; + + mod_timer(&q_to_qcq(q)->napi_qcq->napi_deadline, + jiffies + IONIC_NAPI_DEADLINE); } void ionic_rx_empty(struct ionic_queue *q) @@ -511,6 +578,9 @@ int ionic_tx_napi(struct napi_struct *napi, int budget) work_done, flags); } + if (!work_done && ionic_txq_poke_doorbell(&qcq->q)) + mod_timer(&qcq->napi_deadline, jiffies + IONIC_NAPI_DEADLINE); + return work_done; } @@ -544,23 +614,29 @@ int ionic_rx_napi(struct napi_struct *napi, int budget) work_done, flags); } + if (!work_done && ionic_rxq_poke_doorbell(&qcq->q)) + mod_timer(&qcq->napi_deadline, jiffies + IONIC_NAPI_DEADLINE); + return work_done; } int ionic_txrx_napi(struct napi_struct *napi, int budget) { - struct ionic_qcq *qcq = napi_to_qcq(napi); + struct ionic_qcq *rxqcq = napi_to_qcq(napi); struct ionic_cq *rxcq = napi_to_cq(napi); unsigned int qi = rxcq->bound_q->index; + struct ionic_qcq *txqcq; struct ionic_dev *idev; struct ionic_lif *lif; struct ionic_cq *txcq; + bool resched = false; u32 rx_work_done = 0; u32 tx_work_done = 0; u32 flags = 0; lif = rxcq->bound_q->lif; idev = &lif->ionic->idev; + txqcq = lif->txqcqs[qi]; txcq = &lif->txqcqs[qi]->cq; tx_work_done = ionic_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT, @@ -572,7 +648,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) ionic_rx_fill(rxcq->bound_q); if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) { - ionic_dim_update(qcq, 0); + ionic_dim_update(rxqcq, 0); flags |= IONIC_INTR_CRED_UNMASK; rxcq->bound_intr->rearm_count++; } @@ -583,6 +659,13 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) tx_work_done + rx_work_done, flags); } + if (!rx_work_done && ionic_rxq_poke_doorbell(&rxqcq->q)) + resched = true; + if (!tx_work_done && ionic_txq_poke_doorbell(&txqcq->q)) + resched = true; + if (resched) + mod_timer(&rxqcq->napi_deadline, jiffies + IONIC_NAPI_DEADLINE); + return rx_work_done; } From c97c7c3103a3ebe30ae215a2f5fee1a45430b973 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 2 Feb 2023 14:00:23 -0800 Subject: [PATCH 032/118] cpufreq: qcom-hw: Fix cpufreq_driver->get() for non-LMH systems [ Upstream commit 51be2fffd65d9f9cb427030ab0ee85d791b4437d ] On a sc7180-based Chromebook, when I go to /sys/devices/system/cpu/cpu0/cpufreq I can see: cpuinfo_cur_freq:2995200 cpuinfo_max_freq:1804800 scaling_available_frequencies:300000 576000 ... 1708800 1804800 scaling_cur_freq:1804800 scaling_max_freq:1804800 As you can see the `cpuinfo_cur_freq` is bogus. It turns out that this bogus info started showing up as of commit c72cf0cb1d77 ("cpufreq: qcom-hw: Fix the frequency returned by cpufreq_driver->get()"). That commit seems to assume that everyone is on the LMH bandwagon, but sc7180 isn't. Let's go back to the old code in the case where LMH isn't used. Fixes: c72cf0cb1d77 ("cpufreq: qcom-hw: Fix the frequency returned by cpufreq_driver->get()") Signed-off-by: Douglas Anderson Reviewed-by: Konrad Dybcio Reviewed-by: Manivannan Sadhasivam [ Viresh: Fixed the 'fixes' tag ] Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/qcom-cpufreq-hw.c | 32 ++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 3c623a0bc147..d10bf7635a0d 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -137,21 +137,6 @@ static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) return lval * xo_rate; } -/* Get the current frequency of the CPU (after throttling) */ -static unsigned int qcom_cpufreq_hw_get(unsigned int cpu) -{ - struct qcom_cpufreq_data *data; - struct cpufreq_policy *policy; - - policy = cpufreq_cpu_get_raw(cpu); - if (!policy) - return 0; - - data = policy->driver_data; - - return qcom_lmh_get_throttle_freq(data) / HZ_PER_KHZ; -} - /* Get the frequency requested by the cpufreq core for the CPU */ static unsigned int qcom_cpufreq_get_freq(unsigned int cpu) { @@ -173,6 +158,23 @@ static unsigned int qcom_cpufreq_get_freq(unsigned int cpu) return policy->freq_table[index].frequency; } +static unsigned int qcom_cpufreq_hw_get(unsigned int cpu) +{ + struct qcom_cpufreq_data *data; + struct cpufreq_policy *policy; + + policy = cpufreq_cpu_get_raw(cpu); + if (!policy) + return 0; + + data = policy->driver_data; + + if (data->throttle_irq >= 0) + return qcom_lmh_get_throttle_freq(data) / HZ_PER_KHZ; + + return qcom_cpufreq_get_freq(cpu); +} + static unsigned int qcom_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { From 89c0c69fc7398f3ea3bc900a7ae3cab0c8c97f4e Mon Sep 17 00:00:00 2001 From: "Herton R. Krzesinski" Date: Fri, 3 Feb 2023 13:04:48 -0300 Subject: [PATCH 033/118] uapi: add missing ip/ipv6 header dependencies for linux/stddef.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 03702d4d29be4e2510ec80b248dbbde4e57030d9 ] Since commit 58e0be1ef6118 ("net: use struct_group to copy ip/ipv6 header addresses"), ip and ipv6 headers started to use the __struct_group definition, which is defined at include/uapi/linux/stddef.h. However, linux/stddef.h isn't explicitly included in include/uapi/linux/{ip,ipv6}.h, which breaks build of xskxceiver bpf selftest if you install the uapi headers in the system: $ make V=1 xskxceiver -C tools/testing/selftests/bpf ... make: Entering directory '(...)/tools/testing/selftests/bpf' gcc -g -O0 -rdynamic -Wall -Werror (...) In file included from xskxceiver.c:79: /usr/include/linux/ip.h:103:9: error: expected specifier-qualifier-list before ‘__struct_group’ 103 | __struct_group(/* no tag */, addrs, /* no attrs */, | ^~~~~~~~~~~~~~ ... Include the missing dependency in ip.h and do the same for the ipv6.h header. Fixes: 58e0be1ef611 ("net: use struct_group to copy ip/ipv6 header addresses") Signed-off-by: Herton R. Krzesinski Reviewed-by: Carlos O'Donell Tested-by: Carlos O'Donell Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/uapi/linux/ip.h | 1 + include/uapi/linux/ipv6.h | 1 + 2 files changed, 2 insertions(+) diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 874a92349bf5..283dec7e3645 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -18,6 +18,7 @@ #ifndef _UAPI_LINUX_IP_H #define _UAPI_LINUX_IP_H #include +#include #include #define IPTOS_TOS_MASK 0x1E diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 81f4243bebb1..53326dfc59ec 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -4,6 +4,7 @@ #include #include +#include #include #include From c0eb3c0cdd1bb6501ad72521b13d3b3316e038e9 Mon Sep 17 00:00:00 2001 From: Casper Andersson Date: Fri, 3 Feb 2023 09:55:57 +0100 Subject: [PATCH 034/118] net: microchip: sparx5: fix PTP init/deinit not checking all ports [ Upstream commit d7d94b2612f5dc25d61dc7bf58aafe7b31f40191 ] Check all ports instead of just port_count ports. PTP init was only checking ports 0 to port_count. If the hardware ports are not mapped starting from 0 then they would be missed, e.g. if only ports 20-30 were mapped it would attempt to init ports 0-10, resulting in NULL pointers when attempting to timestamp. Now it will init all mapped ports. Fixes: 70dfe25cd866 ("net: sparx5: Update extraction/injection for timestamping") Signed-off-by: Casper Andersson Reviewed-by: Horatiu Vultur Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c index 0ed1ea7727c5..69e76634f9aa 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c @@ -633,7 +633,7 @@ int sparx5_ptp_init(struct sparx5 *sparx5) /* Enable master counters */ spx5_wr(PTP_PTP_DOM_CFG_PTP_ENA_SET(0x7), sparx5, PTP_PTP_DOM_CFG); - for (i = 0; i < sparx5->port_count; i++) { + for (i = 0; i < SPX5_PORTS; i++) { port = sparx5->ports[i]; if (!port) continue; @@ -649,7 +649,7 @@ void sparx5_ptp_deinit(struct sparx5 *sparx5) struct sparx5_port *port; int i; - for (i = 0; i < sparx5->port_count; i++) { + for (i = 0; i < SPX5_PORTS; i++) { port = sparx5->ports[i]; if (!port) continue; From 16da5f6d38ac35a2b598df5acf45b03c2eb1211a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 3 Feb 2023 16:08:49 -0600 Subject: [PATCH 035/118] HID: amd_sfh: if no sensors are enabled, clean up [ Upstream commit 7bcfdab3f0c6672ca52be3cb65a0550d8b99554b ] It was reported that commit b300667b33b2 ("HID: amd_sfh: Disable the interrupt for all command") had caused increased resume time on HP Envy x360. Before this commit 3 sensors were reported, but they were not actually functional. After this commit the sensors are no longer reported, but also the resume time increased. To avoid this problem explicitly look for the number of disabled sensors. If all the sensors are disabled, clean everything up. Fixes: b300667b33b2 ("HID: amd_sfh: Disable the interrupt for all command") Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2115 Reported-by: Xaver Hugl Signed-off-by: Mario Limonciello Acked-by: Basavaraj Natikar Link: https://lore.kernel.org/r/20230203220850.13924-1-mario.limonciello@amd.com Signed-off-by: Benjamin Tissoires Signed-off-by: Sasha Levin --- drivers/hid/amd-sfh-hid/amd_sfh_client.c | 13 +++++++++++-- drivers/hid/amd-sfh-hid/amd_sfh_hid.h | 1 + 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index 1fb0f7105fb2..c751d12f5df8 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -227,6 +227,7 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) cl_data->num_hid_devices = amd_mp2_get_sensor_num(privdata, &cl_data->sensor_idx[0]); if (cl_data->num_hid_devices == 0) return -ENODEV; + cl_data->is_any_sensor_enabled = false; INIT_DELAYED_WORK(&cl_data->work, amd_sfh_work); INIT_DELAYED_WORK(&cl_data->work_buffer, amd_sfh_work_buffer); @@ -287,6 +288,7 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) status = amd_sfh_wait_for_response (privdata, cl_data->sensor_idx[i], SENSOR_ENABLED); if (status == SENSOR_ENABLED) { + cl_data->is_any_sensor_enabled = true; cl_data->sensor_sts[i] = SENSOR_ENABLED; rc = amdtp_hid_probe(cl_data->cur_hid_dev, cl_data); if (rc) { @@ -301,19 +303,26 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) cl_data->sensor_sts[i]); goto cleanup; } + } else { + cl_data->sensor_sts[i] = SENSOR_DISABLED; + dev_dbg(dev, "sid 0x%x (%s) status 0x%x\n", + cl_data->sensor_idx[i], + get_sensor_name(cl_data->sensor_idx[i]), + cl_data->sensor_sts[i]); } dev_dbg(dev, "sid 0x%x (%s) status 0x%x\n", cl_data->sensor_idx[i], get_sensor_name(cl_data->sensor_idx[i]), cl_data->sensor_sts[i]); } - if (mp2_ops->discovery_status && mp2_ops->discovery_status(privdata) == 0) { + if (!cl_data->is_any_sensor_enabled || + (mp2_ops->discovery_status && mp2_ops->discovery_status(privdata) == 0)) { amd_sfh_hid_client_deinit(privdata); for (i = 0; i < cl_data->num_hid_devices; i++) { devm_kfree(dev, cl_data->feature_report[i]); devm_kfree(dev, in_data->input_report[i]); devm_kfree(dev, cl_data->report_descr[i]); } - dev_warn(dev, "Failed to discover, sensors not enabled\n"); + dev_warn(dev, "Failed to discover, sensors not enabled is %d\n", cl_data->is_any_sensor_enabled); return -EOPNOTSUPP; } schedule_delayed_work(&cl_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP)); diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.h b/drivers/hid/amd-sfh-hid/amd_sfh_hid.h index 3754fb423e3a..528036892c9d 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.h +++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.h @@ -32,6 +32,7 @@ struct amd_input_data { struct amdtp_cl_data { u8 init_done; u32 cur_hid_dev; + bool is_any_sensor_enabled; u32 hid_dev_count; u32 num_hid_devices; struct device_info *hid_devices; From d05b72328810897a28fe313735481789edfef7e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 31 Jan 2023 02:21:24 +0200 Subject: [PATCH 036/118] drm/i915: Don't do the WM0->WM1 copy w/a if WM1 is already enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 90d5e8301ac24550be80d193aa5582cab56c29fc ] Due to a workaround we have to make sure the WM1 watermarks block/lines values are sensible even when WM1 is disabled. To that end we copy those values from WM0. However since we now keep each wm level enabled on a per-plane basis it doesn't seem necessary to do that copy when we already have an enabled WM1 on the current plane. That is, we might be in a situation where another plane can only do WM0 (and thus needs the copy) but the current plane's WM1 is still perfectly valid (ie. fits into the current DDB allocation). Skipping the copy could avoid reprogramming the plane's registers needlessly in some cases. Fixes: a301cb0fca2d ("drm/i915: Keep plane watermarks enabled more aggressively") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230131002127.29305-1-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy (cherry picked from commit c580c2d27ac8754cc6f01da1d715b7272f5f9cbb) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/display/skl_watermark.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 18178b01375e..a7adf02476f6 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -1587,7 +1587,8 @@ skl_crtc_allocate_plane_ddb(struct intel_atomic_state *state, skl_check_wm_level(&wm->wm[level], ddb); if (icl_need_wm1_wa(i915, plane_id) && - level == 1 && wm->wm[0].enable) { + level == 1 && !wm->wm[level].enable && + wm->wm[0].enable) { wm->wm[level].blocks = wm->wm[0].blocks; wm->wm[level].lines = wm->wm[0].lines; wm->wm[level].ignore_lines = wm->wm[0].ignore_lines; From 5489930113dc9dbdd12c947591e981bb0afa1d59 Mon Sep 17 00:00:00 2001 From: Ryan Neph Date: Fri, 3 Feb 2023 15:33:44 -0800 Subject: [PATCH 037/118] drm/virtio: exbuf->fence_fd unmodified on interrupted wait [ Upstream commit 8f20660f053cefd4693e69cfff9cf58f4f7c4929 ] An interrupted dma_fence_wait() becomes an -ERESTARTSYS returned to userspace ioctl(DRM_IOCTL_VIRTGPU_EXECBUFFER) calls, prompting to retry the ioctl(), but the passed exbuf->fence_fd has been reset to -1, making the retry attempt fail at sync_file_get_fence(). The uapi for DRM_IOCTL_VIRTGPU_EXECBUFFER is changed to retain the passed value for exbuf->fence_fd when returning anything besides a successful result from the ioctl. Fixes: 2cd7b6f08bc4 ("drm/virtio: add in/out fence support for explicit synchronization") Signed-off-by: Ryan Neph Reviewed-by: Rob Clark Reviewed-by: Dmitry Osipenko Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20230203233345.2477767-1-ryanneph@chromium.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 5 +---- include/uapi/drm/virtgpu_drm.h | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 9f4a90493aea..da45215a933d 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -126,7 +126,6 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, void __user *user_bo_handles = NULL; struct virtio_gpu_object_array *buflist = NULL; struct sync_file *sync_file; - int in_fence_fd = exbuf->fence_fd; int out_fence_fd = -1; void *buf; uint64_t fence_ctx; @@ -152,13 +151,11 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, ring_idx = exbuf->ring_idx; } - exbuf->fence_fd = -1; - virtio_gpu_create_context(dev, file); if (exbuf->flags & VIRTGPU_EXECBUF_FENCE_FD_IN) { struct dma_fence *in_fence; - in_fence = sync_file_get_fence(in_fence_fd); + in_fence = sync_file_get_fence(exbuf->fence_fd); if (!in_fence) return -EINVAL; diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index 0512fde5e697..7b158fcb02b4 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -64,6 +64,7 @@ struct drm_virtgpu_map { __u32 pad; }; +/* fence_fd is modified on success if VIRTGPU_EXECBUF_FENCE_FD_OUT flag is set. */ struct drm_virtgpu_execbuffer { __u32 flags; __u32 size; From c47c2b173d0cd027897fbeb3b80b840d2c775f6e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 31 Jan 2023 22:17:19 +0000 Subject: [PATCH 038/118] cpuset: Call set_cpus_allowed_ptr() with appropriate mask for task [ Upstream commit 7a2127e66a00e073db8d90f9aac308f4a8a64226 ] set_cpus_allowed_ptr() will fail with -EINVAL if the requested affinity mask is not a subset of the task_cpu_possible_mask() for the task being updated. Consequently, on a heterogeneous system with cpusets spanning the different CPU types, updates to the cgroup hierarchy can silently fail to update task affinities when the effective affinity mask for the cpuset is expanded. For example, consider an arm64 system with 4 CPUs, where CPUs 2-3 are the only cores capable of executing 32-bit tasks. Attaching a 32-bit task to a cpuset containing CPUs 0-2 will correctly affine the task to CPU 2. Extending the cpuset to CPUs 0-3, however, will fail to extend the affinity mask of the 32-bit task because update_tasks_cpumask() will pass the full 0-3 mask to set_cpus_allowed_ptr(). Extend update_tasks_cpumask() to take a temporary 'cpumask' paramater and use it to mask the 'effective_cpus' mask with the possible mask for each task being updated. Fixes: 431c69fac05b ("cpuset: Honour task_cpu_possible_mask() in guarantee_online_cpus()") Signed-off-by: Will Deacon Acked-by: Waiman Long Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- kernel/cgroup/cpuset.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index a753adcbc7c7..fac426036620 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1201,12 +1201,13 @@ void rebuild_sched_domains(void) /** * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed + * @new_cpus: the temp variable for the new effective_cpus mask * * Iterate through each task of @cs updating its cpus_allowed to the * effective cpuset's. As this function is called with cpuset_rwsem held, * cpuset membership stays stable. */ -static void update_tasks_cpumask(struct cpuset *cs) +static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus) { struct css_task_iter it; struct task_struct *task; @@ -1220,7 +1221,10 @@ static void update_tasks_cpumask(struct cpuset *cs) if (top_cs && (task->flags & PF_KTHREAD) && kthread_is_per_cpu(task)) continue; - set_cpus_allowed_ptr(task, cs->effective_cpus); + + cpumask_and(new_cpus, cs->effective_cpus, + task_cpu_possible_mask(task)); + set_cpus_allowed_ptr(task, new_cpus); } css_task_iter_end(&it); } @@ -1505,7 +1509,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, spin_unlock_irq(&callback_lock); if (adding || deleting) - update_tasks_cpumask(parent); + update_tasks_cpumask(parent, tmp->new_cpus); /* * Set or clear CS_SCHED_LOAD_BALANCE when partcmd_update, if necessary. @@ -1657,7 +1661,7 @@ update_parent_subparts: WARN_ON(!is_in_v2_mode() && !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); - update_tasks_cpumask(cp); + update_tasks_cpumask(cp, tmp->new_cpus); /* * On legacy hierarchy, if the effective cpumask of any non- @@ -2305,7 +2309,7 @@ static int update_prstate(struct cpuset *cs, int new_prs) } } - update_tasks_cpumask(parent); + update_tasks_cpumask(parent, tmpmask.new_cpus); if (parent->child_ecpus_count) update_sibling_cpumasks(parent, cs, &tmpmask); @@ -3318,7 +3322,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs, * as the tasks will be migrated to an ancestor. */ if (cpus_updated && !cpumask_empty(cs->cpus_allowed)) - update_tasks_cpumask(cs); + update_tasks_cpumask(cs, new_cpus); if (mems_updated && !nodes_empty(cs->mems_allowed)) update_tasks_nodemask(cs); @@ -3355,7 +3359,7 @@ hotplug_update_tasks(struct cpuset *cs, spin_unlock_irq(&callback_lock); if (cpus_updated) - update_tasks_cpumask(cs); + update_tasks_cpumask(cs, new_cpus); if (mems_updated) update_tasks_nodemask(cs); } From 437f8892ece746821f824e29f826e610a596054f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 6 Feb 2023 07:05:28 +1000 Subject: [PATCH 039/118] nvidiafb: detect the hardware support before removing console. [ Upstream commit 04119ab1a49fc41cb70f0472be5455af268fa260 ] This driver removed the console, but hasn't yet decided if it could take over the console yet. Instead of doing that, probe the hw for support and then remove the console afterwards. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216859 Fixes: 145eed48de27 ("fbdev: Remove conflicting devices on PCI bus") Reported-by: Zeno Davatz Tested-by: Zeno Davatz Tested-by: Thomas Zimmermann Reviewed-by: Thomas Zimmermann Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230205210751.3842103-1-airlied@gmail.com Signed-off-by: Sasha Levin --- drivers/video/fbdev/nvidia/nvidia.c | 81 +++++++++++++++-------------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/drivers/video/fbdev/nvidia/nvidia.c b/drivers/video/fbdev/nvidia/nvidia.c index 329e2e8133c6..a6c3bc222246 100644 --- a/drivers/video/fbdev/nvidia/nvidia.c +++ b/drivers/video/fbdev/nvidia/nvidia.c @@ -1197,17 +1197,17 @@ static int nvidia_set_fbinfo(struct fb_info *info) return nvidiafb_check_var(&info->var, info); } -static u32 nvidia_get_chipset(struct fb_info *info) +static u32 nvidia_get_chipset(struct pci_dev *pci_dev, + volatile u32 __iomem *REGS) { - struct nvidia_par *par = info->par; - u32 id = (par->pci_dev->vendor << 16) | par->pci_dev->device; + u32 id = (pci_dev->vendor << 16) | pci_dev->device; printk(KERN_INFO PFX "Device ID: %x \n", id); if ((id & 0xfff0) == 0x00f0 || (id & 0xfff0) == 0x02e0) { /* pci-e */ - id = NV_RD32(par->REGS, 0x1800); + id = NV_RD32(REGS, 0x1800); if ((id & 0x0000ffff) == 0x000010DE) id = 0x10DE0000 | (id >> 16); @@ -1220,12 +1220,11 @@ static u32 nvidia_get_chipset(struct fb_info *info) return id; } -static u32 nvidia_get_arch(struct fb_info *info) +static u32 nvidia_get_arch(u32 Chipset) { - struct nvidia_par *par = info->par; u32 arch = 0; - switch (par->Chipset & 0x0ff0) { + switch (Chipset & 0x0ff0) { case 0x0100: /* GeForce 256 */ case 0x0110: /* GeForce2 MX */ case 0x0150: /* GeForce2 */ @@ -1278,16 +1277,44 @@ static int nvidiafb_probe(struct pci_dev *pd, const struct pci_device_id *ent) struct fb_info *info; unsigned short cmd; int ret; + volatile u32 __iomem *REGS; + int Chipset; + u32 Architecture; NVTRACE_ENTER(); assert(pd != NULL); + if (pci_enable_device(pd)) { + printk(KERN_ERR PFX "cannot enable PCI device\n"); + return -ENODEV; + } + + /* enable IO and mem if not already done */ + pci_read_config_word(pd, PCI_COMMAND, &cmd); + cmd |= (PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + pci_write_config_word(pd, PCI_COMMAND, cmd); + + nvidiafb_fix.mmio_start = pci_resource_start(pd, 0); + nvidiafb_fix.mmio_len = pci_resource_len(pd, 0); + + REGS = ioremap(nvidiafb_fix.mmio_start, nvidiafb_fix.mmio_len); + if (!REGS) { + printk(KERN_ERR PFX "cannot ioremap MMIO base\n"); + return -ENODEV; + } + + Chipset = nvidia_get_chipset(pd, REGS); + Architecture = nvidia_get_arch(Chipset); + if (Architecture == 0) { + printk(KERN_ERR PFX "unknown NV_ARCH\n"); + goto err_out; + } + ret = aperture_remove_conflicting_pci_devices(pd, "nvidiafb"); if (ret) - return ret; + goto err_out; info = framebuffer_alloc(sizeof(struct nvidia_par), &pd->dev); - if (!info) goto err_out; @@ -1298,11 +1325,6 @@ static int nvidiafb_probe(struct pci_dev *pd, const struct pci_device_id *ent) if (info->pixmap.addr == NULL) goto err_out_kfree; - if (pci_enable_device(pd)) { - printk(KERN_ERR PFX "cannot enable PCI device\n"); - goto err_out_enable; - } - if (pci_request_regions(pd, "nvidiafb")) { printk(KERN_ERR PFX "cannot request PCI regions\n"); goto err_out_enable; @@ -1318,34 +1340,17 @@ static int nvidiafb_probe(struct pci_dev *pd, const struct pci_device_id *ent) par->paneltweak = paneltweak; par->reverse_i2c = reverse_i2c; - /* enable IO and mem if not already done */ - pci_read_config_word(pd, PCI_COMMAND, &cmd); - cmd |= (PCI_COMMAND_IO | PCI_COMMAND_MEMORY); - pci_write_config_word(pd, PCI_COMMAND, cmd); - - nvidiafb_fix.mmio_start = pci_resource_start(pd, 0); nvidiafb_fix.smem_start = pci_resource_start(pd, 1); - nvidiafb_fix.mmio_len = pci_resource_len(pd, 0); - par->REGS = ioremap(nvidiafb_fix.mmio_start, nvidiafb_fix.mmio_len); + par->REGS = REGS; - if (!par->REGS) { - printk(KERN_ERR PFX "cannot ioremap MMIO base\n"); - goto err_out_free_base0; - } - - par->Chipset = nvidia_get_chipset(info); - par->Architecture = nvidia_get_arch(info); - - if (par->Architecture == 0) { - printk(KERN_ERR PFX "unknown NV_ARCH\n"); - goto err_out_arch; - } + par->Chipset = Chipset; + par->Architecture = Architecture; sprintf(nvidiafb_fix.id, "NV%x", (pd->device & 0x0ff0) >> 4); if (NVCommonSetup(info)) - goto err_out_arch; + goto err_out_free_base0; par->FbAddress = nvidiafb_fix.smem_start; par->FbMapSize = par->RamAmountKBytes * 1024; @@ -1401,7 +1406,6 @@ static int nvidiafb_probe(struct pci_dev *pd, const struct pci_device_id *ent) goto err_out_iounmap_fb; } - printk(KERN_INFO PFX "PCI nVidia %s framebuffer (%dMB @ 0x%lX)\n", info->fix.id, @@ -1415,15 +1419,14 @@ err_out_iounmap_fb: err_out_free_base1: fb_destroy_modedb(info->monspecs.modedb); nvidia_delete_i2c_busses(par); -err_out_arch: - iounmap(par->REGS); - err_out_free_base0: +err_out_free_base0: pci_release_regions(pd); err_out_enable: kfree(info->pixmap.addr); err_out_kfree: framebuffer_release(info); err_out: + iounmap(REGS); return -ENODEV; } From df59e05401450973c8c7e96fd74b49e24442dc1f Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Mon, 30 Jan 2023 14:06:40 -0800 Subject: [PATCH 040/118] ice: Do not use WQ_MEM_RECLAIM flag for workqueue [ Upstream commit 4d159f7884f78b1aacb99b4fc37d1e3cb1194e39 ] When both ice and the irdma driver are loaded, a warning in check_flush_dependency is being triggered. This is due to ice driver workqueue being allocated with the WQ_MEM_RECLAIM flag and the irdma one is not. According to kernel documentation, this flag should be set if the workqueue will be involved in the kernel's memory reclamation flow. Since it is not, there is no need for the ice driver's WQ to have this flag set so remove it. Example trace: [ +0.000004] workqueue: WQ_MEM_RECLAIM ice:ice_service_task [ice] is flushing !WQ_MEM_RECLAIM infiniband:0x0 [ +0.000139] WARNING: CPU: 0 PID: 728 at kernel/workqueue.c:2632 check_flush_dependency+0x178/0x1a0 [ +0.000011] Modules linked in: bonding tls xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_cha in_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink bridge stp llc rfkill vfat fat intel_rapl_msr intel _rapl_common isst_if_common skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct1 0dif_pclmul crc32_pclmul ghash_clmulni_intel rapl intel_cstate rpcrdma sunrpc rdma_ucm ib_srpt ib_isert iscsi_target_mod target_ core_mod ib_iser libiscsi scsi_transport_iscsi rdma_cm ib_cm iw_cm iTCO_wdt iTCO_vendor_support ipmi_ssif irdma mei_me ib_uverbs ib_core intel_uncore joydev pcspkr i2c_i801 acpi_ipmi mei lpc_ich i2c_smbus intel_pch_thermal ioatdma ipmi_si acpi_power_meter acpi_pad xfs libcrc32c sd_mod t10_pi crc64_rocksoft crc64 sg ahci ixgbe libahci ice i40e igb crc32c_intel mdio i2c_algo_bit liba ta dca wmi dm_mirror dm_region_hash dm_log dm_mod ipmi_devintf ipmi_msghandler fuse [ +0.000161] [last unloaded: bonding] [ +0.000006] CPU: 0 PID: 728 Comm: kworker/0:2 Tainted: G S 6.2.0-rc2_next-queue-13jan-00458-gc20aabd57164 #1 [ +0.000006] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0010.010620200716 01/06/2020 [ +0.000003] Workqueue: ice ice_service_task [ice] [ +0.000127] RIP: 0010:check_flush_dependency+0x178/0x1a0 [ +0.000005] Code: 89 8e 02 01 e8 49 3d 40 00 49 8b 55 18 48 8d 8d d0 00 00 00 48 8d b3 d0 00 00 00 4d 89 e0 48 c7 c7 e0 3b 08 9f e8 bb d3 07 01 <0f> 0b e9 be fe ff ff 80 3d 24 89 8e 02 00 0f 85 6b ff ff ff e9 06 [ +0.000004] RSP: 0018:ffff88810a39f990 EFLAGS: 00010282 [ +0.000005] RAX: 0000000000000000 RBX: ffff888141bc2400 RCX: 0000000000000000 [ +0.000004] RDX: 0000000000000001 RSI: dffffc0000000000 RDI: ffffffffa1213a80 [ +0.000003] RBP: ffff888194bf3400 R08: ffffed117b306112 R09: ffffed117b306112 [ +0.000003] R10: ffff888bd983088b R11: ffffed117b306111 R12: 0000000000000000 [ +0.000003] R13: ffff888111f84d00 R14: ffff88810a3943ac R15: ffff888194bf3400 [ +0.000004] FS: 0000000000000000(0000) GS:ffff888bd9800000(0000) knlGS:0000000000000000 [ +0.000003] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.000003] CR2: 000056035b208b60 CR3: 000000017795e005 CR4: 00000000007706f0 [ +0.000003] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ +0.000003] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ +0.000002] PKRU: 55555554 [ +0.000003] Call Trace: [ +0.000002] [ +0.000003] __flush_workqueue+0x203/0x840 [ +0.000006] ? mutex_unlock+0x84/0xd0 [ +0.000008] ? __pfx_mutex_unlock+0x10/0x10 [ +0.000004] ? __pfx___flush_workqueue+0x10/0x10 [ +0.000006] ? mutex_lock+0xa3/0xf0 [ +0.000005] ib_cache_cleanup_one+0x39/0x190 [ib_core] [ +0.000174] __ib_unregister_device+0x84/0xf0 [ib_core] [ +0.000094] ib_unregister_device+0x25/0x30 [ib_core] [ +0.000093] irdma_ib_unregister_device+0x97/0xc0 [irdma] [ +0.000064] ? __pfx_irdma_ib_unregister_device+0x10/0x10 [irdma] [ +0.000059] ? up_write+0x5c/0x90 [ +0.000005] irdma_remove+0x36/0x90 [irdma] [ +0.000062] auxiliary_bus_remove+0x32/0x50 [ +0.000007] device_release_driver_internal+0xfa/0x1c0 [ +0.000005] bus_remove_device+0x18a/0x260 [ +0.000007] device_del+0x2e5/0x650 [ +0.000005] ? __pfx_device_del+0x10/0x10 [ +0.000003] ? mutex_unlock+0x84/0xd0 [ +0.000004] ? __pfx_mutex_unlock+0x10/0x10 [ +0.000004] ? _raw_spin_unlock+0x18/0x40 [ +0.000005] ice_unplug_aux_dev+0x52/0x70 [ice] [ +0.000160] ice_service_task+0x1309/0x14f0 [ice] [ +0.000134] ? __pfx___schedule+0x10/0x10 [ +0.000006] process_one_work+0x3b1/0x6c0 [ +0.000008] worker_thread+0x69/0x670 [ +0.000005] ? __kthread_parkme+0xec/0x110 [ +0.000007] ? __pfx_worker_thread+0x10/0x10 [ +0.000005] kthread+0x17f/0x1b0 [ +0.000005] ? __pfx_kthread+0x10/0x10 [ +0.000004] ret_from_fork+0x29/0x50 [ +0.000009] Fixes: 940b61af02f4 ("ice: Initialize PF and setup miscellaneous interrupt") Signed-off-by: Anirudh Venkataramanan Signed-off-by: Marcin Szycik Tested-by: Jakub Andrysiak Signed-off-by: Tony Nguyen Reviewed-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 1ac5f0018c7e..333582dabba1 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5518,7 +5518,7 @@ static int __init ice_module_init(void) pr_info("%s\n", ice_driver_string); pr_info("%s\n", ice_copyright); - ice_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, KBUILD_MODNAME); + ice_wq = alloc_workqueue("%s", 0, 0, KBUILD_MODNAME); if (!ice_wq) { pr_err("Failed to create workqueue\n"); return -ENOMEM; From 8eeca43d8e7e34c06ff0a2d97f1d7f9a82a70423 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 16 Jan 2023 13:34:58 +0100 Subject: [PATCH 041/118] ice: Fix disabling Rx VLAN filtering with port VLAN enabled [ Upstream commit c793f8ea15e312789b5b6b4a5e7b0b92315be5cb ] If the user turns on the vf-true-promiscuous-support flag, then Rx VLAN filtering will be disabled if the VF requests to enable promiscuous mode. When the VF is in a port VLAN, this is the incorrect behavior because it will allow the VF to receive traffic outside of its port VLAN domain. Fortunately this only resulted in the VF(s) receiving broadcast traffic outside of the VLAN domain because all of the VLAN promiscuous rules are based on the port VLAN ID. Fix this by setting the .disable_rx_filtering VLAN op to a no-op when a port VLAN is enabled on the VF. Also, make sure to make this fix for both Single VLAN Mode and Double VLAN Mode enabled devices. Fixes: c31af68a1b94 ("ice: Add outer_vlan_ops and VSI specific VLAN ops implementations") Signed-off-by: Brett Creeley Signed-off-by: Karen Ostrowska Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- .../net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c index 5ecc0ee9a78e..b1ffb81893d4 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c @@ -44,13 +44,17 @@ void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi) /* outer VLAN ops regardless of port VLAN config */ vlan_ops->add_vlan = ice_vsi_add_vlan; - vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering; vlan_ops->ena_tx_filtering = ice_vsi_ena_tx_vlan_filtering; vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering; if (ice_vf_is_port_vlan_ena(vf)) { /* setup outer VLAN ops */ vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan; + /* all Rx traffic should be in the domain of the + * assigned port VLAN, so prevent disabling Rx VLAN + * filtering + */ + vlan_ops->dis_rx_filtering = noop_vlan; vlan_ops->ena_rx_filtering = ice_vsi_ena_rx_vlan_filtering; @@ -63,6 +67,9 @@ void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi) vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion; vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion; } else { + vlan_ops->dis_rx_filtering = + ice_vsi_dis_rx_vlan_filtering; + if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags)) vlan_ops->ena_rx_filtering = noop_vlan; else @@ -96,7 +103,14 @@ void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi) vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan; vlan_ops->ena_rx_filtering = ice_vsi_ena_rx_vlan_filtering; + /* all Rx traffic should be in the domain of the + * assigned port VLAN, so prevent disabling Rx VLAN + * filtering + */ + vlan_ops->dis_rx_filtering = noop_vlan; } else { + vlan_ops->dis_rx_filtering = + ice_vsi_dis_rx_vlan_filtering; if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags)) vlan_ops->ena_rx_filtering = noop_vlan; else From 47f4ff6f23f00f5501ff2d7054c1a37c170a7aa0 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Mon, 12 Dec 2022 15:11:26 -0800 Subject: [PATCH 042/118] ice: switch: fix potential memleak in ice_add_adv_recipe() [ Upstream commit 4a606ce68426c88ff2563382b33cc34f3485fe57 ] When ice_add_special_words() fails, the 'rm' is not released, which will lead to a memory leak. Fix this up by going to 'err_unroll' label. Compile tested only. Fixes: 8b032a55c1bd ("ice: low level support for tunnels") Signed-off-by: Zhang Changzhong Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen Reviewed-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_switch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 9b762f7972ce..61f844d22512 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -5420,7 +5420,7 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, */ status = ice_add_special_words(rinfo, lkup_exts, ice_is_dvm_ena(hw)); if (status) - goto err_free_lkup_exts; + goto err_unroll; /* Group match words into recipes using preferred recipe grouping * criteria. From 139c759823b066d8e85c9bce469ec0559fc9018e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 5 Feb 2023 16:07:13 +0200 Subject: [PATCH 043/118] net: dsa: mt7530: don't change PVC_EG_TAG when CPU port becomes VLAN-aware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0b6d6425103a676e2b6a81f3fd35d7ea4f9b90ec ] Frank reports that in a mt7530 setup where some ports are standalone and some are in a VLAN-aware bridge, 8021q uppers of the standalone ports lose their VLAN tag on xmit, as seen by the link partner. This seems to occur because once the other ports join the VLAN-aware bridge, mt7530_port_vlan_filtering() also calls mt7530_port_set_vlan_aware(ds, cpu_dp->index), and this affects the way that the switch processes the traffic of the standalone port. Relevant is the PVC_EG_TAG bit. The MT7530 documentation says about it: EG_TAG: Incoming Port Egress Tag VLAN Attribution 0: disabled (system default) 1: consistent (keep the original ingress tag attribute) My interpretation is that this setting applies on the ingress port, and "disabled" is basically the normal behavior, where the egress tag format of the packet (tagged or untagged) is decided by the VLAN table (MT7530_VLAN_EGRESS_UNTAG or MT7530_VLAN_EGRESS_TAG). But there is also an option of overriding the system default behavior, and for the egress tagging format of packets to be decided not by the VLAN table, but simply by copying the ingress tag format (if ingress was tagged, egress is tagged; if ingress was untagged, egress is untagged; aka "consistent). This is useful in 2 scenarios: - VLAN-unaware bridge ports will always encounter a miss in the VLAN table. They should forward a packet as-is, though. So we use "consistent" there. See commit e045124e9399 ("net: dsa: mt7530: fix tagged frames pass-through in VLAN-unaware mode"). - Traffic injected from the CPU port. The operating system is in god mode; if it wants a packet to exit as VLAN-tagged, it sends it as VLAN-tagged. Otherwise it sends it as VLAN-untagged*. *This is true only if we don't consider the bridge TX forwarding offload feature, which mt7530 doesn't support. So for now, make the CPU port always stay in "consistent" mode to allow software VLANs to be forwarded to their egress ports with the VLAN tag intact, and not stripped. Link: https://lore.kernel.org/netdev/trinity-e6294d28-636c-4c40-bb8b-b523521b00be-1674233135062@3c-app-gmx-bs36/ Fixes: e045124e9399 ("net: dsa: mt7530: fix tagged frames pass-through in VLAN-unaware mode") Reported-by: Frank Wunderlich Tested-by: Frank Wunderlich Signed-off-by: Vladimir Oltean Tested-by: Arınç ÜNAL Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20230205140713.1609281-1-vladimir.oltean@nxp.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/mt7530.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index e74c6b406172..a884f6f6a8c2 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1309,14 +1309,26 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port) if (!priv->ports[port].pvid) mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK, MT7530_VLAN_ACC_TAGGED); - } - /* Set the port as a user port which is to be able to recognize VID - * from incoming packets before fetching entry within the VLAN table. - */ - mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK | PVC_EG_TAG_MASK, - VLAN_ATTR(MT7530_VLAN_USER) | - PVC_EG_TAG(MT7530_VLAN_EG_DISABLED)); + /* Set the port as a user port which is to be able to recognize + * VID from incoming packets before fetching entry within the + * VLAN table. + */ + mt7530_rmw(priv, MT7530_PVC_P(port), + VLAN_ATTR_MASK | PVC_EG_TAG_MASK, + VLAN_ATTR(MT7530_VLAN_USER) | + PVC_EG_TAG(MT7530_VLAN_EG_DISABLED)); + } else { + /* Also set CPU ports to the "user" VLAN port attribute, to + * allow VLAN classification, but keep the EG_TAG attribute as + * "consistent" (i.o.w. don't change its value) for packets + * received by the switch from the CPU, so that tagged packets + * are forwarded to user ports as tagged, and untagged as + * untagged. + */ + mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK, + VLAN_ATTR(MT7530_VLAN_USER)); + } } static void From 4d15289c22fa045ecda0b26a10f2d21bb5912c4f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 5 Feb 2023 21:24:08 +0200 Subject: [PATCH 044/118] net: mscc: ocelot: fix VCAP filters not matching on MAC with "protocol 802.1Q" [ Upstream commit f964f8399df29d3e3ced77177cf35131cd2491bf ] Alternative short title: don't instruct the hardware to match on EtherType with "protocol 802.1Q" flower filters. It doesn't work for the reasons detailed below. With a command such as the following: tc filter add dev $swp1 ingress chain $(IS1 2) pref 3 \ protocol 802.1Q flower skip_sw vlan_id 200 src_mac $h1_mac \ action vlan modify id 300 \ action goto chain $(IS2 0 0) the created filter is set by ocelot_flower_parse_key() to be of type OCELOT_VCAP_KEY_ETYPE, and etype is set to {value=0x8100, mask=0xffff}. This gets propagated all the way to is1_entry_set() which commits it to hardware (the VCAP_IS1_HK_ETYPE field of the key). Compare this to the case where src_mac isn't specified - the key type is OCELOT_VCAP_KEY_ANY, and is1_entry_set() doesn't populate VCAP_IS1_HK_ETYPE. The problem is that for VLAN-tagged frames, the hardware interprets the ETYPE field as holding the encapsulated VLAN protocol. So the above filter will only match those packets which have an encapsulated protocol of 0x8100, rather than all packets with VLAN ID 200 and the given src_mac. The reason why this is allowed to occur is because, although we have a block of code in ocelot_flower_parse_key() which sets "match_protocol" to false when VLAN keys are present, that code executes too late. There is another block of code, which executes for Ethernet addresses, and has a "goto finished_key_parsing" and skips the VLAN header parsing. By skipping it, "match_protocol" remains with the value it was initialized with, i.e. "true", and "proto" is set to f->common.protocol, or 0x8100. The concept of ignoring some keys rather than erroring out when they are present but can't be offloaded is dubious in itself, but is present since the initial commit fe3490e6107e ("net: mscc: ocelot: Hardware ofload for tc flower filter"), and it's outside of the scope of this patch to change that. The problem was introduced when the driver started to interpret the flower filter's protocol, and populate the VCAP filter's ETYPE field based on it. To fix this, it is sufficient to move the code that parses the VLAN keys earlier than the "goto finished_key_parsing" instruction. This will ensure that if we have a flower filter with both VLAN and Ethernet address keys, it won't match on ETYPE 0x8100, because the VLAN key parsing sets "match_protocol = false". Fixes: 86b956de119c ("net: mscc: ocelot: support matching on EtherType") Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230205192409.1796428-1-vladimir.oltean@nxp.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/mscc/ocelot_flower.c | 24 +++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 7c0897e779dc..ee052404eb55 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -605,6 +605,18 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, flow_rule_match_control(rule, &match); } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + filter->key_type = OCELOT_VCAP_KEY_ANY; + filter->vlan.vid.value = match.key->vlan_id; + filter->vlan.vid.mask = match.mask->vlan_id; + filter->vlan.pcp.value[0] = match.key->vlan_priority; + filter->vlan.pcp.mask[0] = match.mask->vlan_priority; + match_protocol = false; + } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct flow_match_eth_addrs match; @@ -737,18 +749,6 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, match_protocol = false; } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { - struct flow_match_vlan match; - - flow_rule_match_vlan(rule, &match); - filter->key_type = OCELOT_VCAP_KEY_ANY; - filter->vlan.vid.value = match.key->vlan_id; - filter->vlan.vid.mask = match.mask->vlan_id; - filter->vlan.pcp.value[0] = match.key->vlan_priority; - filter->vlan.pcp.mask[0] = match.mask->vlan_priority; - match_protocol = false; - } - finished_key_parsing: if (match_protocol && proto != ETH_P_ALL) { if (filter->block_id == VCAP_ES0) { From aee099bb0cd4ee96bc9912df2c17e679ad306e5c Mon Sep 17 00:00:00 2001 From: Adham Faris Date: Sun, 8 Jan 2023 18:09:32 +0200 Subject: [PATCH 045/118] net/mlx5e: Update rx ring hw mtu upon each rx-fcs flag change [ Upstream commit 1e66220948df815d7b37e0ff8b4627ce10433738 ] rq->hw_mtu is used in function en_rx.c/mlx5e_skb_from_cqe_mpwrq_linear() to catch oversized packets. If FCS is concatenated to the end of the packet then the check should be updated accordingly. Rx rings initialization (mlx5e_init_rxq_rq()) invoked for every new set of channels, as part of mlx5e_safe_switch_params(), unknowingly if it runs with default configuration or not. Current rq->hw_mtu initialization assumes default configuration and ignores params->scatter_fcs_en flag state. Fix this, by accounting for params->scatter_fcs_en flag state during rq->hw_mtu initialization. In addition, updating rq->hw_mtu value during ingress traffic might lead to packets drop and oversize_pkts_sw_drop counter increase with no good reason. Hence we remove this optimization and switch the set of channels with a new one, to make sure we don't get false positives on the oversize_pkts_sw_drop counter. Fixes: 102722fc6832 ("net/mlx5e: Add support for RXFCS feature flag") Signed-off-by: Adham Faris Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 86 ++++--------------- 1 file changed, 15 insertions(+), 71 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4dc149ef618c..3e0d910b085d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -591,7 +591,8 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param rq->ix = c->ix; rq->channel = c; rq->mdev = mdev; - rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->hw_mtu = + MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN * !params->scatter_fcs_en; rq->xdpsq = &c->rq_xdpsq; rq->stats = &c->priv->channel_stats[c->ix]->rq; rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); @@ -1014,35 +1015,6 @@ int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state) return mlx5e_rq_to_ready(rq, curr_state); } -static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable) -{ - struct mlx5_core_dev *mdev = rq->mdev; - - void *in; - void *rqc; - int inlen; - int err; - - inlen = MLX5_ST_SZ_BYTES(modify_rq_in); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - - rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); - - MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); - MLX5_SET64(modify_rq_in, in, modify_bitmask, - MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_SCATTER_FCS); - MLX5_SET(rqc, rqc, scatter_fcs, enable); - MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); - - err = mlx5_core_modify_rq(mdev, rq->rqn, in); - - kvfree(in); - - return err; -} - static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) { struct mlx5_core_dev *mdev = rq->mdev; @@ -3301,20 +3273,6 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) mlx5e_destroy_tises(priv); } -static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable) -{ - int err = 0; - int i; - - for (i = 0; i < chs->num; i++) { - err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable); - if (err) - return err; - } - - return 0; -} - static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) { int err; @@ -3890,41 +3848,27 @@ static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable) return mlx5_set_ports_check(mdev, in, sizeof(in)); } +static int mlx5e_set_rx_port_ts_wrap(struct mlx5e_priv *priv, void *ctx) +{ + struct mlx5_core_dev *mdev = priv->mdev; + bool enable = *(bool *)ctx; + + return mlx5e_set_rx_port_ts(mdev, enable); +} + static int set_feature_rx_fcs(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_channels *chs = &priv->channels; - struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params new_params; int err; mutex_lock(&priv->state_lock); - if (enable) { - err = mlx5e_set_rx_port_ts(mdev, false); - if (err) - goto out; - - chs->params.scatter_fcs_en = true; - err = mlx5e_modify_channels_scatter_fcs(chs, true); - if (err) { - chs->params.scatter_fcs_en = false; - mlx5e_set_rx_port_ts(mdev, true); - } - } else { - chs->params.scatter_fcs_en = false; - err = mlx5e_modify_channels_scatter_fcs(chs, false); - if (err) { - chs->params.scatter_fcs_en = true; - goto out; - } - err = mlx5e_set_rx_port_ts(mdev, true); - if (err) { - mlx5_core_warn(mdev, "Failed to set RX port timestamp %d\n", err); - err = 0; - } - } - -out: + new_params = chs->params; + new_params.scatter_fcs_en = enable; + err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_set_rx_port_ts_wrap, + &new_params.scatter_fcs_en, true); mutex_unlock(&priv->state_lock); return err; } From 1e9b6d279db1025dfd8dc425850e7ccc5d3a8547 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 26 Jan 2023 14:47:12 +0100 Subject: [PATCH 046/118] net/mlx5: Bridge, fix ageing of peer FDB entries [ Upstream commit da0c52426cd23f8728eff72c2b2d2a3eb6b451f5 ] SWITCHDEV_FDB_ADD_TO_BRIDGE event handler that updates FDB entry 'lastuse' field is only executed for eswitch that owns the entry. However, if peer entry processed packets at least once it will have hardware counter 'used' value greater than entry 'lastuse' from that point on, which will cause FDB entry not being aged out. Process the event on all eswitch instances. Fixes: ff9b7521468b ("net/mlx5: Bridge, support LAG") Signed-off-by: Vlad Buslov Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 4 ---- drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 8099a21e674c..ce85b48d327d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -438,10 +438,6 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, switch (event) { case SWITCHDEV_FDB_ADD_TO_BRIDGE: - /* only handle the event on native eswtich of representor */ - if (!mlx5_esw_bridge_is_local(dev, rep, esw)) - break; - fdb_info = container_of(info, struct switchdev_notifier_fdb_info, info); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index 4fbff7bcc155..d0b2676c3214 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -1715,7 +1715,7 @@ void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 struct mlx5_esw_bridge *bridge; port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); - if (!port || port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER) + if (!port) return; bridge = port->bridge; From 026eb3e01a3dc67ff8313a0613f1f4f1f5ade428 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Sun, 8 Jan 2023 15:54:46 +0200 Subject: [PATCH 047/118] net/mlx5e: Fix crash unsetting rx-vlan-filter in switchdev mode [ Upstream commit 8974aa9638df557f4642acef707af15648a03555 ] Moving to switchdev mode with rx-vlan-filter on and then setting it off causes the kernel to crash since fs->vlan is freed during nic profile cleanup flow. RX VLAN filtering is not supported in switchdev mode so unset it when changing to switchdev and restore its value when switching back to legacy. trace: [] RIP: 0010:mlx5e_disable_cvlan_filter+0x43/0x70 [] set_feature_cvlan_filter+0x37/0x40 [mlx5_core] [] mlx5e_handle_feature+0x3a/0x60 [mlx5_core] [] mlx5e_set_features+0x6d/0x160 [mlx5_core] [] __netdev_update_features+0x288/0xa70 [] ethnl_set_features+0x309/0x380 [] ? __nla_parse+0x21/0x30 [] genl_family_rcv_msg_doit.isra.17+0x110/0x150 [] genl_rcv_msg+0x112/0x260 [] ? features_reply_size+0xe0/0xe0 [] ? genl_family_rcv_msg_doit.isra.17+0x150/0x150 [] netlink_rcv_skb+0x4e/0x100 [] genl_rcv+0x24/0x40 [] netlink_unicast+0x1ab/0x290 [] netlink_sendmsg+0x257/0x4f0 [] sock_sendmsg+0x5c/0x70 Fixes: cb67b832921c ("net/mlx5e: Introduce SRIOV VF representors") Signed-off-by: Amir Tzin Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 1892ccb889b3..7cd36f4ac3ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -443,7 +443,7 @@ void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc) void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc) { - if (fs->vlan->cvlan_filter_disabled) + if (!fs->vlan || fs->vlan->cvlan_filter_disabled) return; fs->vlan->cvlan_filter_disabled = true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3e0d910b085d..142ed2d98cd5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4005,6 +4005,10 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev if (netdev->features & NETIF_F_GRO_HW) netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n"); + features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + if (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER) + netdev_warn(netdev, "Disabling HW_VLAN CTAG FILTERING, not supported in switchdev mode\n"); + return features; } From 5ccc3707e97527d815e0fe1129b7f659828754b9 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 23 Jan 2023 16:06:32 +0200 Subject: [PATCH 048/118] net/mlx5e: IPoIB, Show unknown speed instead of error [ Upstream commit 8aa5f171d51c1cb69e5e3106df4dd1a446102823 ] ethtool is returning an error for unknown speeds for the IPoIB interface: $ ethtool ib0 netlink error: failed to retrieve link settings netlink error: Invalid argument netlink error: failed to retrieve link settings netlink error: Invalid argument Settings for ib0: Link detected: no After this change, ethtool will return success and show "unknown speed": $ ethtool ib0 Settings for ib0: Supported ports: [ ] Supported link modes: Not reported Supported pause frame use: No Supports auto-negotiation: No Supported FEC modes: Not reported Advertised link modes: Not reported Advertised pause frame use: No Advertised auto-negotiation: No Advertised FEC modes: Not reported Speed: Unknown! Duplex: Full Auto-negotiation: off Port: Other PHYAD: 0 Transceiver: internal Link detected: no Fixes: eb234ee9d541 ("net/mlx5e: IPoIB, Add support for get_link_ksettings in ethtool") Signed-off-by: Dragos Tatulea Reviewed-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index eff92dc0927c..e09518f887a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -189,16 +189,16 @@ static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate) } } -static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper) +static u32 mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper) { int rate, width; rate = mlx5_ptys_rate_enum_to_int(ib_proto_oper); if (rate < 0) - return -EINVAL; + return SPEED_UNKNOWN; width = mlx5_ptys_width_enum_to_int(ib_link_width_oper); if (width < 0) - return -EINVAL; + return SPEED_UNKNOWN; return rate * width; } @@ -221,16 +221,13 @@ static int mlx5i_get_link_ksettings(struct net_device *netdev, ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); speed = mlx5i_get_speed_settings(ib_link_width_oper, ib_proto_oper); - if (speed < 0) - return -EINVAL; + link_ksettings->base.speed = speed; + link_ksettings->base.duplex = speed == SPEED_UNKNOWN ? DUPLEX_UNKNOWN : DUPLEX_FULL; - link_ksettings->base.duplex = DUPLEX_FULL; link_ksettings->base.port = PORT_OTHER; link_ksettings->base.autoneg = AUTONEG_DISABLE; - link_ksettings->base.speed = speed; - return 0; } From bbdfebb08ff5eb67ae65539c8cbb728c8f246552 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Sun, 22 Jan 2023 21:09:40 +0200 Subject: [PATCH 049/118] net/mlx5: Store page counters in a single array [ Upstream commit c3bdbaea654d8df39112de33037106134a520dc7 ] Currently, an independent page counter is used for tracking memory usage for each function type such as VF, PF and host PF (DPU). For better code-readibilty, use a single array that stores the number of allocated memory pages for each function type. Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed Stable-dep-of: 9965bbebae59 ("net/mlx5: Expose SF firmware pages counter") Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/debugfs.c | 4 +- .../net/ethernet/mellanox/mlx5/core/ecpf.c | 2 +- .../ethernet/mellanox/mlx5/core/pagealloc.c | 37 +++++++++++-------- .../net/ethernet/mellanox/mlx5/core/sriov.c | 2 +- include/linux/mlx5/driver.h | 12 ++++-- 5 files changed, 34 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 3e232a65a0c3..c3e7c24a0971 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -245,8 +245,8 @@ void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev) pages = dev->priv.dbg.pages_debugfs; debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages); - debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.vfs_pages); - debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.host_pf_pages); + debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.page_counters[MLX5_VF]); + debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.page_counters[MLX5_HOST_PF]); debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed); debugfs_create_u32("fw_pages_give_dropped", 0400, pages, &dev->priv.give_pages_dropped); debugfs_create_u32("fw_pages_reclaim_discard", 0400, pages, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 464eb3a18450..cdc87ecae5d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -87,7 +87,7 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev) mlx5_host_pf_cleanup(dev); - err = mlx5_wait_for_pages(dev, &dev->priv.host_pf_pages); + err = mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_HOST_PF]); if (err) mlx5_core_warn(dev, "Timeout reclaiming external host PF pages err(%d)\n", err); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 60596357bfc7..9f99292ab5ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -74,6 +74,14 @@ static u32 get_function(u16 func_id, bool ec_function) return (u32)func_id | (ec_function << 16); } +static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_function) +{ + if (!func_id) + return mlx5_core_is_ecpf(dev) && !ec_function ? MLX5_HOST_PF : MLX5_PF; + + return MLX5_VF; +} + static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function) { struct rb_root *root; @@ -332,6 +340,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); int notify_fail = event; + u16 func_type; u64 addr; int err; u32 *in; @@ -383,11 +392,9 @@ retry: goto out_dropped; } + func_type = func_id_to_type(dev, func_id, ec_function); + dev->priv.page_counters[func_type] += npages; dev->priv.fw_pages += npages; - if (func_id) - dev->priv.vfs_pages += npages; - else if (mlx5_core_is_ecpf(dev) && !ec_function) - dev->priv.host_pf_pages += npages; mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n", npages, ec_function, func_id, err); @@ -414,6 +421,7 @@ static void release_all_pages(struct mlx5_core_dev *dev, u16 func_id, struct rb_root *root; struct rb_node *p; int npages = 0; + u16 func_type; root = xa_load(&dev->priv.page_root_xa, function); if (WARN_ON_ONCE(!root)) @@ -428,11 +436,9 @@ static void release_all_pages(struct mlx5_core_dev *dev, u16 func_id, free_fwp(dev, fwp, fwp->free_count); } + func_type = func_id_to_type(dev, func_id, ec_function); + dev->priv.page_counters[func_type] -= npages; dev->priv.fw_pages -= npages; - if (func_id) - dev->priv.vfs_pages -= npages; - else if (mlx5_core_is_ecpf(dev) && !ec_function) - dev->priv.host_pf_pages -= npages; mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x\n", npages, ec_function, func_id); @@ -498,6 +504,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {}; int num_claimed; + u16 func_type; u32 *out; int err; int i; @@ -549,11 +556,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, if (nclaimed) *nclaimed = num_claimed; + func_type = func_id_to_type(dev, func_id, ec_function); + dev->priv.page_counters[func_type] -= num_claimed; dev->priv.fw_pages -= num_claimed; - if (func_id) - dev->priv.vfs_pages -= num_claimed; - else if (mlx5_core_is_ecpf(dev) && !ec_function) - dev->priv.host_pf_pages -= num_claimed; out_free: kvfree(out); @@ -706,12 +711,12 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) WARN(dev->priv.fw_pages, "FW pages counter is %d after reclaiming all pages\n", dev->priv.fw_pages); - WARN(dev->priv.vfs_pages, + WARN(dev->priv.page_counters[MLX5_VF], "VFs FW pages counter is %d after reclaiming all pages\n", - dev->priv.vfs_pages); - WARN(dev->priv.host_pf_pages, + dev->priv.page_counters[MLX5_VF]); + WARN(dev->priv.page_counters[MLX5_HOST_PF], "External host PF FW pages counter is %d after reclaiming all pages\n", - dev->priv.host_pf_pages); + dev->priv.page_counters[MLX5_HOST_PF]); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index c0e6c487c63c..3008e9ce2bbf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -147,7 +147,7 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) mlx5_eswitch_disable_sriov(dev->priv.eswitch, clear_vf); - if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages)) + if (mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF])) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ad55470a9fb9..9b300aa4eb95 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -573,6 +573,13 @@ struct mlx5_debugfs_entries { struct dentry *lag_debugfs; }; +enum mlx5_func_type { + MLX5_PF, + MLX5_VF, + MLX5_HOST_PF, + MLX5_FUNC_TYPE_NUM, +}; + struct mlx5_ft_pool; struct mlx5_priv { /* IRQ table valid only for real pci devices PF or VF */ @@ -583,11 +590,10 @@ struct mlx5_priv { struct mlx5_nb pg_nb; struct workqueue_struct *pg_wq; struct xarray page_root_xa; - u32 fw_pages; atomic_t reg_pages; struct list_head free_list; - u32 vfs_pages; - u32 host_pf_pages; + u32 fw_pages; + u32 page_counters[MLX5_FUNC_TYPE_NUM]; u32 fw_pages_alloc_failed; u32 give_pages_dropped; u32 reclaim_pages_discard; From ee128b700fd0c6689785196577d00ec968857e8e Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Sun, 22 Jan 2023 23:24:56 +0200 Subject: [PATCH 050/118] net/mlx5: Expose SF firmware pages counter [ Upstream commit 9965bbebae59b3563a4d95e4aed121e8965dfdc2 ] Currently, each core device has VF pages counter which stores number of fw pages used by its VFs and SFs. The current design led to a hang when performing firmware reset on DPU, where the DPU PFs stalled in sriov unload flow due to waiting on release of SFs pages instead of waiting on only VFs pages. Thus, Add a separate counter for SF firmware pages, which will prevent the stall scenario described above. Fixes: 1958fc2f0712 ("net/mlx5: SF, Add auxiliary device driver") Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 2 +- include/linux/mlx5/driver.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index c3e7c24a0971..bb95b40d25eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -246,6 +246,7 @@ void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev) debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages); debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.page_counters[MLX5_VF]); + debugfs_create_u32("fw_pages_sfs", 0400, pages, &dev->priv.page_counters[MLX5_SF]); debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.page_counters[MLX5_HOST_PF]); debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed); debugfs_create_u32("fw_pages_give_dropped", 0400, pages, &dev->priv.give_pages_dropped); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 9f99292ab5ce..0eb50be175cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -79,7 +79,7 @@ static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_funct if (!func_id) return mlx5_core_is_ecpf(dev) && !ec_function ? MLX5_HOST_PF : MLX5_PF; - return MLX5_VF; + return func_id <= mlx5_core_max_vfs(dev) ? MLX5_VF : MLX5_SF; } static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9b300aa4eb95..fff61e6d6d4d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -576,6 +576,7 @@ struct mlx5_debugfs_entries { enum mlx5_func_type { MLX5_PF, MLX5_VF, + MLX5_SF, MLX5_HOST_PF, MLX5_FUNC_TYPE_NUM, }; From 525e29974e3ce3e5f61c66ee8f3eb97a0eccf892 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 9 Jan 2023 15:27:40 +0200 Subject: [PATCH 051/118] net/mlx5: fw_tracer, Clear load bit when freeing string DBs buffers [ Upstream commit db561fed6b8fa3878e74d5df6512a4a38152b63e ] Whenever the driver is reading the string DBs into buffers, the driver is setting the load bit, but the driver never clears this bit. As a result, in case load bit is on and the driver query the device for new string DBs, the driver won't read again the string DBs. Fix it by clearing the load bit when query the device for new string DBs. Fixes: 2d69356752ff ("net/mlx5: Add support for fw live patch event") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 21831386b26e..d82e98a0cdfa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -64,6 +64,7 @@ static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer) MLX5_GET(mtrc_cap, out, num_string_trace); tracer->str_db.num_string_db = MLX5_GET(mtrc_cap, out, num_string_db); tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner); + tracer->str_db.loaded = false; for (i = 0; i < tracer->str_db.num_string_db; i++) { mtrc_cap_sp = MLX5_ADDR_OF(mtrc_cap, out, string_db_param[i]); From b266f3158171b128b43b6ad32b32175edb05fc90 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 25 Jan 2023 17:39:36 +0200 Subject: [PATCH 052/118] net/mlx5: fw_tracer, Zero consumer index when reloading the tracer [ Upstream commit 184e1e4474dbcfebc4dbd1fa823a329978f25506 ] When tracer is reloaded, the device will log the traces at the beginning of the log buffer. Also, driver is reading the log buffer in chunks in accordance to the consumer index. Hence, zero consumer index when reloading the tracer. Fixes: 4383cfcc65e7 ("net/mlx5: Add devlink reload") Signed-off-by: Shay Drory Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index d82e98a0cdfa..5b05b884b5fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -757,6 +757,7 @@ static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer) if (err) mlx5_core_warn(dev, "FWTracer: Failed to set tracer configurations %d\n", err); + tracer->buff.consumer_index = 0; return err; } @@ -821,7 +822,6 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work) mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner); if (tracer->owner) { tracer->owner = false; - tracer->buff.consumer_index = 0; return; } From 190296d51b4690e6175f25230ea53c8b2f7cdf64 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 14 Dec 2022 22:16:23 +0200 Subject: [PATCH 053/118] net/mlx5: Serialize module cleanup with reload and remove [ Upstream commit 8f0d1451ecf7b3bd5a06ffc866c753d0f3ab4683 ] Currently, remove and reload flows can run in parallel to module cleanup. This design is error prone. For example: aux_drivers callbacks are called from both cleanup and remove flows with different lockings, which can cause a deadlock[1]. Hence, serialize module cleanup with reload and remove. [1] cleanup remove ------- ------ auxiliary_driver_unregister(); devl_lock() auxiliary_device_delete(mlx5e_aux) device_lock(mlx5e_aux) devl_lock() device_lock(mlx5e_aux) Fixes: 912cebf420c2 ("net/mlx5e: Connect ethernet part to auxiliary bus") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d4db1adae3e3..f07175549a87 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -2094,7 +2094,7 @@ static int __init mlx5_init(void) mlx5_core_verify_params(); mlx5_register_debugfs(); - err = pci_register_driver(&mlx5_core_driver); + err = mlx5e_init(); if (err) goto err_debug; @@ -2102,16 +2102,16 @@ static int __init mlx5_init(void) if (err) goto err_sf; - err = mlx5e_init(); + err = pci_register_driver(&mlx5_core_driver); if (err) - goto err_en; + goto err_pci; return 0; -err_en: +err_pci: mlx5_sf_driver_unregister(); err_sf: - pci_unregister_driver(&mlx5_core_driver); + mlx5e_cleanup(); err_debug: mlx5_unregister_debugfs(); return err; @@ -2119,9 +2119,9 @@ err_debug: static void __exit mlx5_cleanup(void) { - mlx5e_cleanup(); - mlx5_sf_driver_unregister(); pci_unregister_driver(&mlx5_core_driver); + mlx5_sf_driver_unregister(); + mlx5e_cleanup(); mlx5_unregister_debugfs(); } From 5f9c656ab2c4c36f3b85819c7a9a8bec5711cfb5 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Mon, 6 Feb 2023 15:58:18 -0800 Subject: [PATCH 054/118] igc: Add ndo_tx_timeout support [ Upstream commit 9b275176270efd18f2f4e328b32be1bad34c4c0d ] On some platforms, 100/1000/2500 speeds seem to have sometimes problems reporting false positive tx unit hang during stressful UDP traffic. Likely other Intel drivers introduce responses to a tx hang. Update the 'tx hang' comparator with the comparison of the head and tail of ring pointers and restore the tx_timeout_factor to the previous value (one). This can be test by using netperf or iperf3 applications. Example: iperf3 -s -p 5001 iperf3 -c 192.168.0.2 --udp -p 5001 --time 600 -b 0 netserver -p 16604 netperf -H 192.168.0.2 -l 600 -p 16604 -t UDP_STREAM -- -m 64000 Fixes: b27b8dc77b5e ("igc: Increase timeout value for Speed 100/1000/2500") Signed-off-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20230206235818.662384-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_main.c | 25 +++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 34db1c006b20..3b5b36206c44 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2942,7 +2942,9 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) if (tx_buffer->next_to_watch && time_after(jiffies, tx_buffer->time_stamp + (adapter->tx_timeout_factor * HZ)) && - !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) { + !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) && + (rd32(IGC_TDH(tx_ring->reg_idx)) != + readl(tx_ring->tail))) { /* detected Tx unit hang */ netdev_err(tx_ring->netdev, "Detected Tx Unit Hang\n" @@ -5068,6 +5070,24 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu) return 0; } +/** + * igc_tx_timeout - Respond to a Tx Hang + * @netdev: network interface device structure + * @txqueue: queue number that timed out + **/ +static void igc_tx_timeout(struct net_device *netdev, + unsigned int __always_unused txqueue) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + + /* Do the reset outside of interrupt context */ + adapter->tx_timeout_count++; + schedule_work(&adapter->reset_task); + wr32(IGC_EICS, + (adapter->eims_enable_mask & ~adapter->eims_other)); +} + /** * igc_get_stats64 - Get System Network Statistics * @netdev: network interface device structure @@ -5495,7 +5515,7 @@ static void igc_watchdog_task(struct work_struct *work) case SPEED_100: case SPEED_1000: case SPEED_2500: - adapter->tx_timeout_factor = 7; + adapter->tx_timeout_factor = 1; break; } @@ -6313,6 +6333,7 @@ static const struct net_device_ops igc_netdev_ops = { .ndo_set_rx_mode = igc_set_rx_mode, .ndo_set_mac_address = igc_set_mac, .ndo_change_mtu = igc_change_mtu, + .ndo_tx_timeout = igc_tx_timeout, .ndo_get_stats64 = igc_get_stats64, .ndo_fix_features = igc_fix_features, .ndo_set_features = igc_set_features, From 44a265af4aa926bc60e45f7f6dd60c201aef6d62 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 6 Feb 2023 22:47:03 +0200 Subject: [PATCH 055/118] net: ethernet: mtk_eth_soc: fix wrong parameters order in __xdp_rxq_info_reg() [ Upstream commit c966153d120222cd4e85e1e1601584d7d4d91dcb ] Parameters 'queue_index' and 'napi_id' are passed in a swapped order. Fix it here. Fixes: 23233e577ef9 ("net: ethernet: mtk_eth_soc: rely on page_pool for single page buffers") Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 9aa1892a609c..53ee9dea6638 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1495,8 +1495,8 @@ static struct page_pool *mtk_create_page_pool(struct mtk_eth *eth, if (IS_ERR(pp)) return pp; - err = __xdp_rxq_info_reg(xdp_q, ð->dummy_dev, eth->rx_napi.napi_id, - id, PAGE_SIZE); + err = __xdp_rxq_info_reg(xdp_q, ð->dummy_dev, id, + eth->rx_napi.napi_id, PAGE_SIZE); if (err < 0) goto err_free_pp; From 0ae9d81109e9bf3d0151df1f60632efb98ac497f Mon Sep 17 00:00:00 2001 From: Kevin Yang Date: Tue, 7 Feb 2023 02:08:20 +0000 Subject: [PATCH 056/118] txhash: fix sk->sk_txrehash default [ Upstream commit c11204c78d6966c5bda6dd05c3ac5cbb193f93e3 ] This code fix a bug that sk->sk_txrehash gets its default enable value from sysctl_txrehash only when the socket is a TCP listener. We should have sysctl_txrehash to set the default sk->sk_txrehash, no matter TCP, nor listerner/connector. Tested by following packetdrill: 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 socket(..., SOCK_DGRAM, IPPROTO_UDP) = 4 // SO_TXREHASH == 74, default to sysctl_txrehash == 1 +0 getsockopt(3, SOL_SOCKET, 74, [1], [4]) = 0 +0 getsockopt(4, SOL_SOCKET, 74, [1], [4]) = 0 Fixes: 26859240e4ee ("txhash: Add socket option to control TX hash rethink behavior") Signed-off-by: Kevin Yang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/sock.c | 3 ++- net/ipv4/af_inet.c | 1 + net/ipv4/inet_connection_sock.c | 3 --- net/ipv6/af_inet6.c | 1 + 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 30407b2dd2ac..ba6ea61b3458 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1524,6 +1524,8 @@ set_sndbuf: ret = -EINVAL; break; } + if ((u8)val == SOCK_TXREHASH_DEFAULT) + val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash); /* Paired with READ_ONCE() in tcp_rtx_synack() */ WRITE_ONCE(sk->sk_txrehash, (u8)val); break; @@ -3428,7 +3430,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_pacing_rate = ~0UL; WRITE_ONCE(sk->sk_pacing_shift, 10); sk->sk_incoming_cpu = -1; - sk->sk_txrehash = SOCK_TXREHASH_DEFAULT; sk_rx_queue_clear(sk); /* diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 92d423786251..5b19b77d5d75 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -347,6 +347,7 @@ lookup_protocol: sk->sk_destruct = inet_sock_destruct; sk->sk_protocol = protocol; sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; + sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash); inet->uc_ttl = -1; inet->mc_loop = 1; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 647b3c6b575e..7152ede18f11 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1225,9 +1225,6 @@ int inet_csk_listen_start(struct sock *sk) sk->sk_ack_backlog = 0; inet_csk_delack_init(sk); - if (sk->sk_txrehash == SOCK_TXREHASH_DEFAULT) - sk->sk_txrehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash); - /* There is race window here: we announce ourselves listening, * but this transition is still not validated by get_port(). * It is OK, because this socket enters to hash table only diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7b0cd54da452..fb1bf6eb0ff8 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -221,6 +221,7 @@ lookup_protocol: np->pmtudisc = IPV6_PMTUDISC_WANT; np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED; sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; + sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash); /* Init the ipv4 part of the socket since we can have sockets * using v6 API for ipv4. From da32c2d9f64b959145e86f9f2afdf974ea351be3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 7 Feb 2023 16:18:19 +0200 Subject: [PATCH 057/118] selftests: Fix failing VXLAN VNI filtering test [ Upstream commit b963d9d5b9437a6b99504987310f98537c9e77d4 ] iproute2 does not recognize the "group6" and "remote6" keywords. Fix by using "group" and "remote" instead. Before: # ./test_vxlan_vnifiltering.sh [...] Tests passed: 25 Tests failed: 2 After: # ./test_vxlan_vnifiltering.sh [...] Tests passed: 27 Tests failed: 0 Fixes: 3edf5f66c12a ("selftests: add new tests for vxlan vnifiltering") Signed-off-by: Ido Schimmel Reviewed-by: Alexander Duyck Link: https://lore.kernel.org/r/20230207141819.256689-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../selftests/net/test_vxlan_vnifiltering.sh | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh index 704997ffc244..8c3ac0a72545 100755 --- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh +++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh @@ -293,19 +293,11 @@ setup-vm() { elif [[ -n $vtype && $vtype == "vnifilterg" ]]; then # Add per vni group config with 'bridge vni' api if [ -n "$group" ]; then - if [ "$family" == "v4" ]; then - if [ $mcast -eq 1 ]; then - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group $group - else - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote $group - fi - else - if [ $mcast -eq 1 ]; then - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group6 $group - else - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote6 $group - fi - fi + if [ $mcast -eq 1 ]; then + bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group $group + else + bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote $group + fi fi fi done From 1d52bbfd469af69fbcae88c67f160ce1b968e7f3 Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Tue, 7 Feb 2023 18:26:34 +0000 Subject: [PATCH 058/118] rds: rds_rm_zerocopy_callback() use list_first_entry() [ Upstream commit f753a68980cf4b59a80fe677619da2b1804f526d ] rds_rm_zerocopy_callback() uses list_entry() on the head of a list causing a type confusion. Use list_first_entry() to actually access the first element of the rs_zcookie_queue list. Fixes: 9426bbc6de99 ("rds: use list structure to track information for zerocopy completion notification") Reviewed-by: Willem de Bruijn Signed-off-by: Pietro Borrello Link: https://lore.kernel.org/r/20230202-rds-zerocopy-v3-1-83b0df974f9a@diag.uniroma1.it Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/rds/message.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/rds/message.c b/net/rds/message.c index 44dbc612ef54..9402bc941823 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -104,9 +104,9 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs, spin_lock_irqsave(&q->lock, flags); head = &q->zcookie_head; if (!list_empty(head)) { - info = list_entry(head, struct rds_msg_zcopy_info, - rs_zcookie_next); - if (info && rds_zcookie_add(info, cookie)) { + info = list_first_entry(head, struct rds_msg_zcopy_info, + rs_zcookie_next); + if (rds_zcookie_add(info, cookie)) { spin_unlock_irqrestore(&q->lock, flags); kfree(rds_info_from_znotifier(znotif)); /* caller invokes rds_wake_sk_sleep() */ From 399f1cccc447546e77967cc957414bb1dd0f7b4e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 7 Feb 2023 20:31:17 +0200 Subject: [PATCH 059/118] net: mscc: ocelot: fix all IPv6 getting trapped to CPU when PTP timestamping is used [ Upstream commit 2fcde9fe258ec8b88d41def38e43ca4da32c0a9a ] While running this selftest which usually passes: ~/selftests/drivers/net/dsa# ./local_termination.sh eno0 swp0 TEST: swp0: Unicast IPv4 to primary MAC address [ OK ] TEST: swp0: Unicast IPv4 to macvlan MAC address [ OK ] TEST: swp0: Unicast IPv4 to unknown MAC address [ OK ] TEST: swp0: Unicast IPv4 to unknown MAC address, promisc [ OK ] TEST: swp0: Unicast IPv4 to unknown MAC address, allmulti [ OK ] TEST: swp0: Multicast IPv4 to joined group [ OK ] TEST: swp0: Multicast IPv4 to unknown group [ OK ] TEST: swp0: Multicast IPv4 to unknown group, promisc [ OK ] TEST: swp0: Multicast IPv4 to unknown group, allmulti [ OK ] TEST: swp0: Multicast IPv6 to joined group [ OK ] TEST: swp0: Multicast IPv6 to unknown group [ OK ] TEST: swp0: Multicast IPv6 to unknown group, promisc [ OK ] TEST: swp0: Multicast IPv6 to unknown group, allmulti [ OK ] if I start PTP timestamping then run it again (debug prints added by me), the unknown IPv6 MC traffic is seen by the CPU port even when it should have been dropped: ~/selftests/drivers/net/dsa# ptp4l -i swp0 -2 -P -m ptp4l[225.410]: selected /dev/ptp1 as PTP clock [ 225.445746] mscc_felix 0000:00:00.5: ocelot_l2_ptp_trap_add: port 0 adding L2 PTP trap [ 225.453815] mscc_felix 0000:00:00.5: ocelot_ipv4_ptp_trap_add: port 0 adding IPv4 PTP event trap [ 225.462703] mscc_felix 0000:00:00.5: ocelot_ipv4_ptp_trap_add: port 0 adding IPv4 PTP general trap [ 225.471768] mscc_felix 0000:00:00.5: ocelot_ipv6_ptp_trap_add: port 0 adding IPv6 PTP event trap [ 225.480651] mscc_felix 0000:00:00.5: ocelot_ipv6_ptp_trap_add: port 0 adding IPv6 PTP general trap ptp4l[225.488]: port 1: INITIALIZING to LISTENING on INIT_COMPLETE ptp4l[225.488]: port 0: INITIALIZING to LISTENING on INIT_COMPLETE ^C ~/selftests/drivers/net/dsa# ./local_termination.sh eno0 swp0 TEST: swp0: Unicast IPv4 to primary MAC address [ OK ] TEST: swp0: Unicast IPv4 to macvlan MAC address [ OK ] TEST: swp0: Unicast IPv4 to unknown MAC address [ OK ] TEST: swp0: Unicast IPv4 to unknown MAC address, promisc [ OK ] TEST: swp0: Unicast IPv4 to unknown MAC address, allmulti [ OK ] TEST: swp0: Multicast IPv4 to joined group [ OK ] TEST: swp0: Multicast IPv4 to unknown group [ OK ] TEST: swp0: Multicast IPv4 to unknown group, promisc [ OK ] TEST: swp0: Multicast IPv4 to unknown group, allmulti [ OK ] TEST: swp0: Multicast IPv6 to joined group [ OK ] TEST: swp0: Multicast IPv6 to unknown group [FAIL] reception succeeded, but should have failed TEST: swp0: Multicast IPv6 to unknown group, promisc [ OK ] TEST: swp0: Multicast IPv6 to unknown group, allmulti [ OK ] The PGID_MCIPV6 is configured correctly to not flood to the CPU, I checked that. Furthermore, when I disable back PTP RX timestamping (ptp4l doesn't do that when it exists), packets are RX filtered again as they should be: ~/selftests/drivers/net/dsa# hwstamp_ctl -i swp0 -r 0 [ 218.202854] mscc_felix 0000:00:00.5: ocelot_l2_ptp_trap_del: port 0 removing L2 PTP trap [ 218.212656] mscc_felix 0000:00:00.5: ocelot_ipv4_ptp_trap_del: port 0 removing IPv4 PTP event trap [ 218.222975] mscc_felix 0000:00:00.5: ocelot_ipv4_ptp_trap_del: port 0 removing IPv4 PTP general trap [ 218.233133] mscc_felix 0000:00:00.5: ocelot_ipv6_ptp_trap_del: port 0 removing IPv6 PTP event trap [ 218.242251] mscc_felix 0000:00:00.5: ocelot_ipv6_ptp_trap_del: port 0 removing IPv6 PTP general trap current settings: tx_type 1 rx_filter 12 new settings: tx_type 1 rx_filter 0 ~/selftests/drivers/net/dsa# ./local_termination.sh eno0 swp0 TEST: swp0: Unicast IPv4 to primary MAC address [ OK ] TEST: swp0: Unicast IPv4 to macvlan MAC address [ OK ] TEST: swp0: Unicast IPv4 to unknown MAC address [ OK ] TEST: swp0: Unicast IPv4 to unknown MAC address, promisc [ OK ] TEST: swp0: Unicast IPv4 to unknown MAC address, allmulti [ OK ] TEST: swp0: Multicast IPv4 to joined group [ OK ] TEST: swp0: Multicast IPv4 to unknown group [ OK ] TEST: swp0: Multicast IPv4 to unknown group, promisc [ OK ] TEST: swp0: Multicast IPv4 to unknown group, allmulti [ OK ] TEST: swp0: Multicast IPv6 to joined group [ OK ] TEST: swp0: Multicast IPv6 to unknown group [ OK ] TEST: swp0: Multicast IPv6 to unknown group, promisc [ OK ] TEST: swp0: Multicast IPv6 to unknown group, allmulti [ OK ] So it's clear that something in the PTP RX trapping logic went wrong. Looking a bit at the code, I can see that there are 4 typos, which populate "ipv4" VCAP IS2 key filter fields for IPv6 keys. VCAP IS2 keys of type OCELOT_VCAP_KEY_IPV4 and OCELOT_VCAP_KEY_IPV6 are handled by is2_entry_set(). OCELOT_VCAP_KEY_IPV4 looks at &filter->key.ipv4, and OCELOT_VCAP_KEY_IPV6 at &filter->key.ipv6. Simply put, when we populate the wrong key field, &filter->key.ipv6 fields "proto.mask" and "proto.value" remain all zeroes (or "don't care"). So is2_entry_set() will enter the "else" of this "if" condition: if (msk == 0xff && (val == IPPROTO_TCP || val == IPPROTO_UDP)) and proceed to ignore the "proto" field. The resulting rule will match on all IPv6 traffic, trapping it to the CPU. This is the reason why the local_termination.sh selftest sees it, because control traps are stronger than the PGID_MCIPV6 used for flooding (from the forwarding data path). But the problem is in fact much deeper. We trap all IPv6 traffic to the CPU, but if we're bridged, we set skb->offload_fwd_mark = 1, so software forwarding will not take place and IPv6 traffic will never reach its destination. The fix is simple - correct the typos. I was intentionally inaccurate in the commit message about the breakage occurring when any PTP timestamping is enabled. In fact it only happens when L4 timestamping is requested (HWTSTAMP_FILTER_PTP_V2_EVENT or HWTSTAMP_FILTER_PTP_V2_L4_EVENT). But ptp4l requests a larger RX timestamping filter than it needs for "-2": HWTSTAMP_FILTER_PTP_V2_EVENT. I wanted people skimming through git logs to not think that the bug doesn't affect them because they only use ptp4l in L2 mode. Fixes: 96ca08c05838 ("net: mscc: ocelot: set up traps for PTP packets") Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230207183117.1745754-1-vladimir.oltean@nxp.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/mscc/ocelot_ptp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c index 1a82f10c8853..2180ae94c744 100644 --- a/drivers/net/ethernet/mscc/ocelot_ptp.c +++ b/drivers/net/ethernet/mscc/ocelot_ptp.c @@ -335,8 +335,8 @@ static void ocelot_populate_ipv6_ptp_event_trap_key(struct ocelot_vcap_filter *trap) { trap->key_type = OCELOT_VCAP_KEY_IPV6; - trap->key.ipv4.proto.value[0] = IPPROTO_UDP; - trap->key.ipv4.proto.mask[0] = 0xff; + trap->key.ipv6.proto.value[0] = IPPROTO_UDP; + trap->key.ipv6.proto.mask[0] = 0xff; trap->key.ipv6.dport.value = PTP_EV_PORT; trap->key.ipv6.dport.mask = 0xffff; } @@ -355,8 +355,8 @@ static void ocelot_populate_ipv6_ptp_general_trap_key(struct ocelot_vcap_filter *trap) { trap->key_type = OCELOT_VCAP_KEY_IPV6; - trap->key.ipv4.proto.value[0] = IPPROTO_UDP; - trap->key.ipv4.proto.mask[0] = 0xff; + trap->key.ipv6.proto.value[0] = IPPROTO_UDP; + trap->key.ipv6.proto.mask[0] = 0xff; trap->key.ipv6.dport.value = PTP_GEN_PORT; trap->key.ipv6.dport.mask = 0xffff; } From 81685c59dc4b8a790955b560fe47eec4f4f2d694 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 8 Feb 2023 11:21:10 +0800 Subject: [PATCH 060/118] selftests: forwarding: lib: quote the sysctl values [ Upstream commit 3a082086aa200852545cf15159213582c0c80eba ] When set/restore sysctl value, we should quote the value as some keys may have multi values, e.g. net.ipv4.ping_group_range Fixes: f5ae57784ba8 ("selftests: forwarding: lib: Add sysctl_set(), sysctl_restore()") Signed-off-by: Hangbin Liu Reviewed-by: Petr Machata Link: https://lore.kernel.org/r/20230208032110.879205-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- tools/testing/selftests/net/forwarding/lib.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 3ffb9d6c0950..f4721f1b2886 100755 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -906,14 +906,14 @@ sysctl_set() local value=$1; shift SYSCTL_ORIG[$key]=$(sysctl -n $key) - sysctl -qw $key=$value + sysctl -qw $key="$value" } sysctl_restore() { local key=$1; shift - sysctl -qw $key=${SYSCTL_ORIG["$key"]} + sysctl -qw $key="${SYSCTL_ORIG[$key]}" } forwarding_enable() From 10150d35091778beb5840b6a6ecf371e127f4e69 Mon Sep 17 00:00:00 2001 From: Arnaud Ferraris Date: Thu, 15 Dec 2022 11:19:47 +0100 Subject: [PATCH 061/118] arm64: dts: rockchip: fix input enable pinconf on rk3399 [ Upstream commit 6f515b663d49a14fb63f8c5d0a2a4ae53d44790a ] When the input enable pinconf was introduced, a default drive-strength value of 2 was set for the pull up/down configs. However, this parameter is unneeded when configuring the pin as input, and having a single hardcoded value here is actually harmful: GPIOs on the RK3399 have various same drive-strength capabilities depending on the bank and port they belong to. As an example, trying to configure the GPIO4_PD3 pin as an input with pull-up enabled fails with the following output: [ 10.706542] rockchip-pinctrl pinctrl: unsupported driver strength 2 [ 10.713661] rockchip-pinctrl pinctrl: pin_config_set op failed for pin 155 (acceptable drive-strength values for this pin being 3, 6, 9 and 12) Let's drop the drive-strength property from all input pinconfs in order to solve this issue. Fixes: ec48c3e82ca3 ("arm64: dts: rockchip: add an input enable pinconf to rk3399") Signed-off-by: Arnaud Ferraris Reviewed-by: Caleb Connolly Link: https://lore.kernel.org/r/20221215101947.254896-1-arnaud.ferraris@collabora.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 92c2207e686c..59858f2dc8b9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -2221,13 +2221,11 @@ pcfg_input_pull_up: pcfg-input-pull-up { input-enable; bias-pull-up; - drive-strength = <2>; }; pcfg_input_pull_down: pcfg-input-pull-down { input-enable; bias-pull-down; - drive-strength = <2>; }; clock { From 65473ade50628fd4c798482eaca07d499aae3841 Mon Sep 17 00:00:00 2001 From: Dan Johansen Date: Sat, 28 Jan 2023 12:24:32 +0100 Subject: [PATCH 062/118] arm64: dts: rockchip: set sdmmc0 speed to sd-uhs-sdr50 on rock-3a [ Upstream commit bc121b707e816616567683e51fd9194c2309977a ] As other rk336x based devices, the Rock 3 Model A has issues with high speed SD cards, so lower the speed to 50 instead of 104 in the same manor has the Quartz64 Model B has. Fixes: 22a442e6586c ("arm64: dts: rockchip: add basic dts for the radxa rock3 model a") Signed-off-by: Dan Johansen Link: https://lore.kernel.org/r/20230128112432.132302-1-strit@manjaro.org Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts index 539ef8cc7792..44313a18e484 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts @@ -642,7 +642,7 @@ disable-wp; pinctrl-names = "default"; pinctrl-0 = <&sdmmc0_bus4 &sdmmc0_clk &sdmmc0_cmd &sdmmc0_det>; - sd-uhs-sdr104; + sd-uhs-sdr50; vmmc-supply = <&vcc3v3_sd>; vqmmc-supply = <&vccio_sd>; status = "okay"; From 885f729b249f371cd9a3eef3c27d6a33482f5a6c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 31 Jan 2023 13:02:13 +0300 Subject: [PATCH 063/118] ALSA: pci: lx6464es: fix a debug loop [ Upstream commit 5dac9f8dc25fefd9d928b98f6477ff3daefd73e3 ] This loop accidentally reuses the "i" iterator for both the inside and the outside loop. The value of MAX_STREAM_BUFFER is 5. I believe that chip->rmh.stat_len is in the 2-12 range. If the value of .stat_len is 4 or more then it will loop exactly one time, but if it's less then it is a forever loop. It looks like it was supposed to combined into one loop where conditions are checked. Fixes: 8e6320064c33 ("ALSA: lx_core: Remove useless #if 0 .. #endif") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/Y9jnJTis/mRFJAQp@kili Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/lx6464es/lx_core.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sound/pci/lx6464es/lx_core.c b/sound/pci/lx6464es/lx_core.c index d3f58a3d17fb..b5b0d43bb8dc 100644 --- a/sound/pci/lx6464es/lx_core.c +++ b/sound/pci/lx6464es/lx_core.c @@ -493,12 +493,11 @@ int lx_buffer_ask(struct lx6464es *chip, u32 pipe, int is_capture, dev_dbg(chip->card->dev, "CMD_08_ASK_BUFFERS: needed %d, freed %d\n", *r_needed, *r_freed); - for (i = 0; i < MAX_STREAM_BUFFER; ++i) { - for (i = 0; i != chip->rmh.stat_len; ++i) - dev_dbg(chip->card->dev, - " stat[%d]: %x, %x\n", i, - chip->rmh.stat[i], - chip->rmh.stat[i] & MASK_DATA_SIZE); + for (i = 0; i < MAX_STREAM_BUFFER && i < chip->rmh.stat_len; + ++i) { + dev_dbg(chip->card->dev, " stat[%d]: %x, %x\n", i, + chip->rmh.stat[i], + chip->rmh.stat[i] & MASK_DATA_SIZE); } } From 12773070b09b8be2a396881a09f377bc2660cfa2 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Wed, 7 Dec 2022 10:50:38 +0800 Subject: [PATCH 064/118] riscv: stacktrace: Fix missing the first frame [ Upstream commit cb80242cc679d6397e77d8a964deeb3ff218d2b5 ] When running kfence_test, I found some testcases failed like this: # test_out_of_bounds_read: EXPECTATION FAILED at mm/kfence/kfence_test.c:346 Expected report_matches(&expect) to be true, but is false not ok 1 - test_out_of_bounds_read The corresponding call-trace is: BUG: KFENCE: out-of-bounds read in kunit_try_run_case+0x38/0x84 Out-of-bounds read at 0x(____ptrval____) (32B right of kfence-#10): kunit_try_run_case+0x38/0x84 kunit_generic_run_threadfn_adapter+0x12/0x1e kthread+0xc8/0xde ret_from_exception+0x0/0xc The kfence_test using the first frame of call trace to check whether the testcase is succeed or not. Commit 6a00ef449370 ("riscv: eliminate unreliable __builtin_frame_address(1)") skip first frame for all case, which results the kfence_test failed. Indeed, we only need to skip the first frame for case (task==NULL || task==current). With this patch, the call-trace will be: BUG: KFENCE: out-of-bounds read in test_out_of_bounds_read+0x88/0x19e Out-of-bounds read at 0x(____ptrval____) (1B left of kfence-#7): test_out_of_bounds_read+0x88/0x19e kunit_try_run_case+0x38/0x84 kunit_generic_run_threadfn_adapter+0x12/0x1e kthread+0xc8/0xde ret_from_exception+0x0/0xc Fixes: 6a00ef449370 ("riscv: eliminate unreliable __builtin_frame_address(1)") Signed-off-by: Liu Shixin Tested-by: Samuel Holland Link: https://lore.kernel.org/r/20221207025038.1022045-1-liushixin2@huawei.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/stacktrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index bcfe9eb55f80..85cd5442d2f8 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -30,6 +30,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = (unsigned long)__builtin_frame_address(0); sp = current_stack_pointer; pc = (unsigned long)walk_stackframe; + level = -1; } else { /* task blocked in __switch_to */ fp = task->thread.s[0]; @@ -41,7 +42,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, unsigned long low, high; struct stackframe *frame; - if (unlikely(!__kernel_text_address(pc) || (level++ >= 1 && !fn(arg, pc)))) + if (unlikely(!__kernel_text_address(pc) || (level++ >= 0 && !fn(arg, pc)))) break; /* Validate frame pointer */ From 0f3d5c12b612b0220f8d90982207bf1996e0f493 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 2 Feb 2023 18:40:14 +0800 Subject: [PATCH 065/118] arm64: dts: mediatek: mt8195: Fix vdosys* compatible strings [ Upstream commit 97801cfcf9565247bcc53b67ea47fa87b1704375 ] When vdosys1 was initially added, it was incorrectly assumed to be compatible with vdosys0, and thus both had the same mt8195-mmsys compatible attached. This has since been corrected in commit b237efd47df7 ("dt-bindings: arm: mediatek: mmsys: change compatible for MT8195") and commit 82219cfbef18 ("dt-bindings: arm: mediatek: mmsys: add vdosys1 compatible for MT8195"). The device tree needs to be fixed as well, otherwise the vdosys1 block fails to work, and causes its dependent power domain controller to not work either. Change the compatible string of vdosys1 to "mediatek,mt8195-vdosys1". While at it, also add the new "mediatek,mt8195-vdosys0" compatible to vdosys0. Fixes: 6aa5b46d1755 ("arm64: dts: mt8195: Add vdosys and vppsys clock nodes") Signed-off-by: Chen-Yu Tsai Tested-by: AngeloGioacchino Del Regno Reviewed-by: AngeloGioacchino Del Regno Acked-by: Matthias Brugger Link: https://lore.kernel.org/r/20230202104014.2931517-1-wenst@chromium.org Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 0b85b5874a4f..6f5fa7ca4901 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -1966,7 +1966,7 @@ }; vdosys0: syscon@1c01a000 { - compatible = "mediatek,mt8195-mmsys", "syscon"; + compatible = "mediatek,mt8195-vdosys0", "mediatek,mt8195-mmsys", "syscon"; reg = <0 0x1c01a000 0 0x1000>; mboxes = <&gce0 0 CMDQ_THR_PRIO_4>; #clock-cells = <1>; @@ -2101,7 +2101,7 @@ }; vdosys1: syscon@1c100000 { - compatible = "mediatek,mt8195-mmsys", "syscon"; + compatible = "mediatek,mt8195-vdosys1", "syscon"; reg = <0 0x1c100000 0 0x1000>; #clock-cells = <1>; }; From 3e984873e4ac2bcec2749a3d92506ba2a3f0ac95 Mon Sep 17 00:00:00 2001 From: Daniel Beer Date: Thu, 27 Oct 2022 21:28:31 +1300 Subject: [PATCH 066/118] ASoC: tas5805m: rework to avoid scheduling while atomic. [ Upstream commit 147323792693bf013f60dca160be1d32bd4d180a ] There's some setup we need to do in order to get the DSP initialized, and this can't be done until a bit-clock is ready. In an earlier version of this driver, this work was done in a DAPM callback. The DAPM callback doesn't guarantee that the bit-clock is running, so the work was moved instead to the trigger callback. Unfortunately this callback runs in atomic context, and the setup code needs to do I2C transactions. Here we use a work_struct to kick off the setup in a thread instead. Fixes: ec45268467f4 ("ASoC: add support for TAS5805M digital amplifier") Signed-off-by: Daniel Beer Link: https://lore.kernel.org/r/85d8ba405cb009a7a3249b556dc8f3bdb1754fdf.1675497326.git.daniel.beer@igorinstitute.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas5805m.c | 132 ++++++++++++++++++++++++------------ 1 file changed, 89 insertions(+), 43 deletions(-) diff --git a/sound/soc/codecs/tas5805m.c b/sound/soc/codecs/tas5805m.c index beb4ec629a03..6e2edf045446 100644 --- a/sound/soc/codecs/tas5805m.c +++ b/sound/soc/codecs/tas5805m.c @@ -154,6 +154,7 @@ static const uint32_t tas5805m_volume[] = { #define TAS5805M_VOLUME_MIN 0 struct tas5805m_priv { + struct i2c_client *i2c; struct regulator *pvdd; struct gpio_desc *gpio_pdn_n; @@ -165,6 +166,9 @@ struct tas5805m_priv { int vol[2]; bool is_powered; bool is_muted; + + struct work_struct work; + struct mutex lock; }; static void set_dsp_scale(struct regmap *rm, int offset, int vol) @@ -181,13 +185,11 @@ static void set_dsp_scale(struct regmap *rm, int offset, int vol) regmap_bulk_write(rm, offset, v, ARRAY_SIZE(v)); } -static void tas5805m_refresh(struct snd_soc_component *component) +static void tas5805m_refresh(struct tas5805m_priv *tas5805m) { - struct tas5805m_priv *tas5805m = - snd_soc_component_get_drvdata(component); struct regmap *rm = tas5805m->regmap; - dev_dbg(component->dev, "refresh: is_muted=%d, vol=%d/%d\n", + dev_dbg(&tas5805m->i2c->dev, "refresh: is_muted=%d, vol=%d/%d\n", tas5805m->is_muted, tas5805m->vol[0], tas5805m->vol[1]); regmap_write(rm, REG_PAGE, 0x00); @@ -226,8 +228,11 @@ static int tas5805m_vol_get(struct snd_kcontrol *kcontrol, struct tas5805m_priv *tas5805m = snd_soc_component_get_drvdata(component); + mutex_lock(&tas5805m->lock); ucontrol->value.integer.value[0] = tas5805m->vol[0]; ucontrol->value.integer.value[1] = tas5805m->vol[1]; + mutex_unlock(&tas5805m->lock); + return 0; } @@ -243,11 +248,13 @@ static int tas5805m_vol_put(struct snd_kcontrol *kcontrol, snd_soc_kcontrol_component(kcontrol); struct tas5805m_priv *tas5805m = snd_soc_component_get_drvdata(component); + int ret = 0; if (!(volume_is_valid(ucontrol->value.integer.value[0]) && volume_is_valid(ucontrol->value.integer.value[1]))) return -EINVAL; + mutex_lock(&tas5805m->lock); if (tas5805m->vol[0] != ucontrol->value.integer.value[0] || tas5805m->vol[1] != ucontrol->value.integer.value[1]) { tas5805m->vol[0] = ucontrol->value.integer.value[0]; @@ -256,11 +263,12 @@ static int tas5805m_vol_put(struct snd_kcontrol *kcontrol, tas5805m->vol[0], tas5805m->vol[1], tas5805m->is_powered); if (tas5805m->is_powered) - tas5805m_refresh(component); - return 1; + tas5805m_refresh(tas5805m); + ret = 1; } + mutex_unlock(&tas5805m->lock); - return 0; + return ret; } static const struct snd_kcontrol_new tas5805m_snd_controls[] = { @@ -294,50 +302,18 @@ static int tas5805m_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_component *component = dai->component; struct tas5805m_priv *tas5805m = snd_soc_component_get_drvdata(component); - struct regmap *rm = tas5805m->regmap; - unsigned int chan, global1, global2; switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - dev_dbg(component->dev, "DSP startup\n"); - - /* We mustn't issue any I2C transactions until the I2S - * clock is stable. Furthermore, we must allow a 5ms - * delay after the first set of register writes to - * allow the DSP to boot before configuring it. - */ - usleep_range(5000, 10000); - send_cfg(rm, dsp_cfg_preboot, - ARRAY_SIZE(dsp_cfg_preboot)); - usleep_range(5000, 15000); - send_cfg(rm, tas5805m->dsp_cfg_data, - tas5805m->dsp_cfg_len); - - tas5805m->is_powered = true; - tas5805m_refresh(component); + dev_dbg(component->dev, "clock start\n"); + schedule_work(&tas5805m->work); break; case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - dev_dbg(component->dev, "DSP shutdown\n"); - - tas5805m->is_powered = false; - - regmap_write(rm, REG_PAGE, 0x00); - regmap_write(rm, REG_BOOK, 0x00); - - regmap_read(rm, REG_CHAN_FAULT, &chan); - regmap_read(rm, REG_GLOBAL_FAULT1, &global1); - regmap_read(rm, REG_GLOBAL_FAULT2, &global2); - - dev_dbg(component->dev, - "fault regs: CHAN=%02x, GLOBAL1=%02x, GLOBAL2=%02x\n", - chan, global1, global2); - - regmap_write(rm, REG_DEVICE_CTRL_2, DCTRL2_MODE_HIZ); break; default: @@ -347,6 +323,67 @@ static int tas5805m_trigger(struct snd_pcm_substream *substream, int cmd, return 0; } +static void do_work(struct work_struct *work) +{ + struct tas5805m_priv *tas5805m = + container_of(work, struct tas5805m_priv, work); + struct regmap *rm = tas5805m->regmap; + + dev_dbg(&tas5805m->i2c->dev, "DSP startup\n"); + + mutex_lock(&tas5805m->lock); + /* We mustn't issue any I2C transactions until the I2S + * clock is stable. Furthermore, we must allow a 5ms + * delay after the first set of register writes to + * allow the DSP to boot before configuring it. + */ + usleep_range(5000, 10000); + send_cfg(rm, dsp_cfg_preboot, ARRAY_SIZE(dsp_cfg_preboot)); + usleep_range(5000, 15000); + send_cfg(rm, tas5805m->dsp_cfg_data, tas5805m->dsp_cfg_len); + + tas5805m->is_powered = true; + tas5805m_refresh(tas5805m); + mutex_unlock(&tas5805m->lock); +} + +static int tas5805m_dac_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct tas5805m_priv *tas5805m = + snd_soc_component_get_drvdata(component); + struct regmap *rm = tas5805m->regmap; + + if (event & SND_SOC_DAPM_PRE_PMD) { + unsigned int chan, global1, global2; + + dev_dbg(component->dev, "DSP shutdown\n"); + cancel_work_sync(&tas5805m->work); + + mutex_lock(&tas5805m->lock); + if (tas5805m->is_powered) { + tas5805m->is_powered = false; + + regmap_write(rm, REG_PAGE, 0x00); + regmap_write(rm, REG_BOOK, 0x00); + + regmap_read(rm, REG_CHAN_FAULT, &chan); + regmap_read(rm, REG_GLOBAL_FAULT1, &global1); + regmap_read(rm, REG_GLOBAL_FAULT2, &global2); + + dev_dbg(component->dev, "fault regs: CHAN=%02x, " + "GLOBAL1=%02x, GLOBAL2=%02x\n", + chan, global1, global2); + + regmap_write(rm, REG_DEVICE_CTRL_2, DCTRL2_MODE_HIZ); + } + mutex_unlock(&tas5805m->lock); + } + + return 0; +} + static const struct snd_soc_dapm_route tas5805m_audio_map[] = { { "DAC", NULL, "DAC IN" }, { "OUT", NULL, "DAC" }, @@ -354,7 +391,8 @@ static const struct snd_soc_dapm_route tas5805m_audio_map[] = { static const struct snd_soc_dapm_widget tas5805m_dapm_widgets[] = { SND_SOC_DAPM_AIF_IN("DAC IN", "Playback", 0, SND_SOC_NOPM, 0, 0), - SND_SOC_DAPM_DAC("DAC", NULL, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_DAC_E("DAC", NULL, SND_SOC_NOPM, 0, 0, + tas5805m_dac_event, SND_SOC_DAPM_PRE_PMD), SND_SOC_DAPM_OUTPUT("OUT") }; @@ -375,11 +413,14 @@ static int tas5805m_mute(struct snd_soc_dai *dai, int mute, int direction) struct tas5805m_priv *tas5805m = snd_soc_component_get_drvdata(component); + mutex_lock(&tas5805m->lock); dev_dbg(component->dev, "set mute=%d (is_powered=%d)\n", mute, tas5805m->is_powered); + tas5805m->is_muted = mute; if (tas5805m->is_powered) - tas5805m_refresh(component); + tas5805m_refresh(tas5805m); + mutex_unlock(&tas5805m->lock); return 0; } @@ -434,6 +475,7 @@ static int tas5805m_i2c_probe(struct i2c_client *i2c) if (!tas5805m) return -ENOMEM; + tas5805m->i2c = i2c; tas5805m->pvdd = devm_regulator_get(dev, "pvdd"); if (IS_ERR(tas5805m->pvdd)) { dev_err(dev, "failed to get pvdd supply: %ld\n", @@ -507,6 +549,9 @@ static int tas5805m_i2c_probe(struct i2c_client *i2c) gpiod_set_value(tas5805m->gpio_pdn_n, 1); usleep_range(10000, 15000); + INIT_WORK(&tas5805m->work, do_work); + mutex_init(&tas5805m->lock); + /* Don't register through devm. We need to be able to unregister * the component prior to deasserting PDN# */ @@ -527,6 +572,7 @@ static void tas5805m_i2c_remove(struct i2c_client *i2c) struct device *dev = &i2c->dev; struct tas5805m_priv *tas5805m = dev_get_drvdata(dev); + cancel_work_sync(&tas5805m->work); snd_soc_unregister_component(dev); gpiod_set_value(tas5805m->gpio_pdn_n, 0); usleep_range(10000, 15000); From 84560e33cea5d7f95e51a5a2ffff7525bbef5e94 Mon Sep 17 00:00:00 2001 From: Daniel Beer Date: Thu, 27 Oct 2022 21:38:38 +1300 Subject: [PATCH 067/118] ASoC: tas5805m: add missing page switch. [ Upstream commit e0576cd642ced1ac65370b4516b7be9f536a0498 ] In tas5805m_refresh, we switch pages to update the DSP volume control, but we need to switch back to page 0 before trying to alter the soft-mute control. This latter page-switch was missing. Fixes: ec45268467f4 ("ASoC: add support for TAS5805M digital amplifier") Signed-off-by: Daniel Beer Link: https://lore.kernel.org/r/1fea38a71ea6ab0225d19ab28d1fa12828d762d0.1675497326.git.daniel.beer@igorinstitute.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas5805m.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/tas5805m.c b/sound/soc/codecs/tas5805m.c index 6e2edf045446..4e38eb7acea1 100644 --- a/sound/soc/codecs/tas5805m.c +++ b/sound/soc/codecs/tas5805m.c @@ -203,6 +203,9 @@ static void tas5805m_refresh(struct tas5805m_priv *tas5805m) set_dsp_scale(rm, 0x24, tas5805m->vol[0]); set_dsp_scale(rm, 0x28, tas5805m->vol[1]); + regmap_write(rm, REG_PAGE, 0x00); + regmap_write(rm, REG_BOOK, 0x00); + /* Set/clear digital soft-mute */ regmap_write(rm, REG_DEVICE_CTRL_2, (tas5805m->is_muted ? DCTRL2_MUTE : 0) | From 3b858f23cefb2398701459cf821b150523a791de Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Tue, 7 Feb 2023 17:04:24 +0800 Subject: [PATCH 068/118] ASoC: fsl_sai: fix getting version from VERID [ Upstream commit 29aab38823b61e482995c24644bd2d8acfe56185 ] The version information is at the bit31 ~ bit16 in the VERID register, so need to right shift 16bit to get it, otherwise the result of comparison "sai->verid.version >= 0x0301" is wrong. Fixes: 99c1e74f25d4 ("ASoC: fsl_sai: store full version instead of major/minor") Signed-off-by: Shengjiu Wang Reviewed-by: Iuliana Prodan Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/1675760664-25193-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/fsl_sai.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index e60c7b344562..8205b3217149 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -1141,6 +1141,7 @@ static int fsl_sai_check_version(struct device *dev) sai->verid.version = val & (FSL_SAI_VERID_MAJOR_MASK | FSL_SAI_VERID_MINOR_MASK); + sai->verid.version >>= FSL_SAI_VERID_MINOR_SHIFT; sai->verid.feature = val & FSL_SAI_VERID_FEATURE_MASK; ret = regmap_read(sai->regmap, FSL_SAI_PARAM, &val); From dcfe5431fb0fcc130958f33472a6b8327e5cba8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Tue, 7 Feb 2023 22:04:28 +0100 Subject: [PATCH 069/118] ASoC: topology: Return -ENOMEM on memory allocation failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c173ee5b2fa6195066674d66d1d7e191010fb1ff ] When handling error path, ret needs to be set to correct value. Reported-by: kernel test robot Reported-by: Dan Carpenter Fixes: d29d41e28eea ("ASoC: topology: Add support for multiple kcontrol types to a widget") Reviewed-by: Cezary Rojewski Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20230207210428.2076354-1-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-topology.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index c3be24b2fac5..a79a2fb260b8 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1401,13 +1401,17 @@ static int soc_tplg_dapm_widget_create(struct soc_tplg *tplg, template.num_kcontrols = le32_to_cpu(w->num_kcontrols); kc = devm_kcalloc(tplg->dev, le32_to_cpu(w->num_kcontrols), sizeof(*kc), GFP_KERNEL); - if (!kc) + if (!kc) { + ret = -ENOMEM; goto hdr_err; + } kcontrol_type = devm_kcalloc(tplg->dev, le32_to_cpu(w->num_kcontrols), sizeof(unsigned int), GFP_KERNEL); - if (!kcontrol_type) + if (!kcontrol_type) { + ret = -ENOMEM; goto hdr_err; + } for (i = 0; i < le32_to_cpu(w->num_kcontrols); i++) { control_hdr = (struct snd_soc_tplg_ctl_hdr *)tplg->pos; From 6353ebac4612a9f78f2e67adb6a568ae170599f7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Jan 2023 17:45:30 +0100 Subject: [PATCH 070/118] clk: microchip: mpfs-ccc: Use devm_kasprintf() for allocating formatted strings [ Upstream commit 86d884f5287f4369c198811aaa4931a3a11f36d2 ] In various places, string buffers of a fixed size are allocated, and filled using snprintf() with the same fixed size, which is error-prone. Replace this by calling devm_kasprintf() instead, which always uses the appropriate size. While at it, remove an unneeded intermediate variable, which allows us to drop a cast as a bonus. With the initial behavior it would have been possible to have a device tree with a node address that would make "ccc_pll" exceed 18 characters. If that happened, the would be cut off & both pll 0 & 1 would be named identically. If that happens, pll1 would fail to register. Thus, the fixes tag has been added to this commit. Fixes: d39fb172760e ("clk: microchip: add PolarFire SoC fabric clock support") Signed-off-by: Geert Uytterhoeven Reviewed-by: Conor Dooley Tested-by: Conor Dooley [claudiu.beznea: added the rationale behind fixes tag] Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/f904fd28b2087d1463ea65f059924e3b1acc193c.1672764239.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- drivers/clk/microchip/clk-mpfs-ccc.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/clk/microchip/clk-mpfs-ccc.c b/drivers/clk/microchip/clk-mpfs-ccc.c index 32aae880a14f..0ddc73e07be4 100644 --- a/drivers/clk/microchip/clk-mpfs-ccc.c +++ b/drivers/clk/microchip/clk-mpfs-ccc.c @@ -164,12 +164,11 @@ static int mpfs_ccc_register_outputs(struct device *dev, struct mpfs_ccc_out_hw_ for (unsigned int i = 0; i < num_clks; i++) { struct mpfs_ccc_out_hw_clock *out_hw = &out_hws[i]; - char *name = devm_kzalloc(dev, 23, GFP_KERNEL); + char *name = devm_kasprintf(dev, GFP_KERNEL, "%s_out%u", parent->name, i); if (!name) return -ENOMEM; - snprintf(name, 23, "%s_out%u", parent->name, i); out_hw->divider.hw.init = CLK_HW_INIT_HW(name, &parent->hw, &clk_divider_ops, 0); out_hw->divider.reg = data->pll_base[i / MPFS_CCC_OUTPUTS_PER_PLL] + out_hw->reg_offset; @@ -201,14 +200,13 @@ static int mpfs_ccc_register_plls(struct device *dev, struct mpfs_ccc_pll_hw_clo for (unsigned int i = 0; i < num_clks; i++) { struct mpfs_ccc_pll_hw_clock *pll_hw = &pll_hws[i]; - char *name = devm_kzalloc(dev, 18, GFP_KERNEL); - if (!name) + pll_hw->name = devm_kasprintf(dev, GFP_KERNEL, "ccc%s_pll%u", + strchrnul(dev->of_node->full_name, '@'), i); + if (!pll_hw->name) return -ENOMEM; pll_hw->base = data->pll_base[i]; - snprintf(name, 18, "ccc%s_pll%u", strchrnul(dev->of_node->full_name, '@'), i); - pll_hw->name = (const char *)name; pll_hw->hw.init = CLK_HW_INIT_PARENTS_DATA_FIXED_SIZE(pll_hw->name, pll_hw->parents, &mpfs_ccc_pll_ops, 0); From b048b969b362405ac20334da87d282b60e34af02 Mon Sep 17 00:00:00 2001 From: Guodong Liu Date: Wed, 18 Jan 2023 14:21:16 +0800 Subject: [PATCH 071/118] pinctrl: mediatek: Fix the drive register definition of some Pins [ Upstream commit 5754a1c98b18009cb3030dc391aa37b77428a0bd ] The drive adjustment register definition of gpio13 and gpio81 is wrong: "the start address for the range" of gpio18 is corrected to 0x000, "the start bit for the first register within the range" of gpio81 is corrected to 24. Fixes: 6cf5e9ef362a ("pinctrl: add pinctrl driver on mt8195") Signed-off-by: Guodong Liu Link: https://lore.kernel.org/r/20230118062116.26315-1-Guodong.Liu@mediatek.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/mediatek/pinctrl-mt8195.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8195.c b/drivers/pinctrl/mediatek/pinctrl-mt8195.c index 89557c7ed2ab..09c4dcef9338 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8195.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8195.c @@ -659,7 +659,7 @@ static const struct mtk_pin_field_calc mt8195_pin_drv_range[] = { PIN_FIELD_BASE(10, 10, 4, 0x010, 0x10, 9, 3), PIN_FIELD_BASE(11, 11, 4, 0x000, 0x10, 24, 3), PIN_FIELD_BASE(12, 12, 4, 0x010, 0x10, 12, 3), - PIN_FIELD_BASE(13, 13, 4, 0x010, 0x10, 27, 3), + PIN_FIELD_BASE(13, 13, 4, 0x000, 0x10, 27, 3), PIN_FIELD_BASE(14, 14, 4, 0x010, 0x10, 15, 3), PIN_FIELD_BASE(15, 15, 4, 0x010, 0x10, 0, 3), PIN_FIELD_BASE(16, 16, 4, 0x010, 0x10, 18, 3), @@ -708,7 +708,7 @@ static const struct mtk_pin_field_calc mt8195_pin_drv_range[] = { PIN_FIELD_BASE(78, 78, 3, 0x000, 0x10, 15, 3), PIN_FIELD_BASE(79, 79, 3, 0x000, 0x10, 18, 3), PIN_FIELD_BASE(80, 80, 3, 0x000, 0x10, 21, 3), - PIN_FIELD_BASE(81, 81, 3, 0x000, 0x10, 28, 3), + PIN_FIELD_BASE(81, 81, 3, 0x000, 0x10, 24, 3), PIN_FIELD_BASE(82, 82, 3, 0x000, 0x10, 27, 3), PIN_FIELD_BASE(83, 83, 3, 0x010, 0x10, 0, 3), PIN_FIELD_BASE(84, 84, 3, 0x010, 0x10, 3, 3), From 9eab7b053947add8f9d6dcd78800b4db9adf66a9 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 20 Jan 2023 09:48:56 +1030 Subject: [PATCH 072/118] pinctrl: aspeed: Fix confusing types in return value [ Upstream commit 287a344a11f1ebd31055cf9b22c88d7005f108d7 ] The function signature is int, but we return a bool. Instead return a negative errno as the kerneldoc suggests. Fixes: 4d3d0e4272d8 ("pinctrl: Add core support for Aspeed SoCs") Signed-off-by: Joel Stanley Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20230119231856.52014-1-joel@jms.id.au Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/aspeed/pinctrl-aspeed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c index a30912a92f05..f93d6959cee9 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c @@ -113,7 +113,7 @@ static int aspeed_disable_sig(struct aspeed_pinmux_data *ctx, int ret = 0; if (!exprs) - return true; + return -EINVAL; while (*exprs && !ret) { ret = aspeed_sig_expr_disable(ctx, *exprs); From bcc487001a15f71f103d102cba4ac8145d7a68f2 Mon Sep 17 00:00:00 2001 From: Maxim Korotkov Date: Fri, 18 Nov 2022 13:43:32 +0300 Subject: [PATCH 073/118] pinctrl: single: fix potential NULL dereference [ Upstream commit d2d73e6d4822140445ad4a7b1c6091e0f5fe703b ] Added checking of pointer "function" in pcs_set_mux(). pinmux_generic_get_function() can return NULL and the pointer "function" was dereferenced without checking against NULL. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 571aec4df5b7 ("pinctrl: single: Use generic pinmux helpers for managing functions") Signed-off-by: Maxim Korotkov Reviewed-by: Tony Lindgren Link: https://lore.kernel.org/r/20221118104332.943-1-korotkov.maxim.s@gmail.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-single.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 414ee6bb8ac9..9ad8f7020614 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -372,6 +372,8 @@ static int pcs_set_mux(struct pinctrl_dev *pctldev, unsigned fselector, if (!pcs->fmask) return 0; function = pinmux_generic_get_function(pctldev, fselector); + if (!function) + return -EINVAL; func = function->data; if (!func) return -EINVAL; From ebe63349cd48ae314040acbf54236c9d66be8c81 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 13 Jan 2023 21:59:42 +0300 Subject: [PATCH 074/118] spi: dw: Fix wrong FIFO level setting for long xfers [ Upstream commit c63b8fd14a7db719f8252038a790638728c4eb66 ] Due to using the u16 type in the min_t() macros the SPI transfer length will be cast to word before participating in the conditional statement implied by the macro. Thus if the transfer length is greater than 64KB the Tx/Rx FIFO threshold level value will be determined by the leftover of the truncated after the type-case length. In the worst case it will cause the dramatical performance drop due to the "Tx FIFO Empty" or "Rx FIFO Full" interrupts triggered on each xfer word sent/received to/from the bus. The problem can be easily fixed by specifying the unsigned int type in the min_t() macros thus preventing the possible data loss. Fixes: ea11370fffdf ("spi: dw: get TX level without an additional variable") Reported-by: Sergey Nazarov Signed-off-by: Serge Semin Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230113185942.2516-1-Sergey.Semin@baikalelectronics.ru Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-dw-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-dw-core.c b/drivers/spi/spi-dw-core.c index 99edddf9958b..c3bfb6c84cab 100644 --- a/drivers/spi/spi-dw-core.c +++ b/drivers/spi/spi-dw-core.c @@ -366,7 +366,7 @@ static void dw_spi_irq_setup(struct dw_spi *dws) * will be adjusted at the final stage of the IRQ-based SPI transfer * execution so not to lose the leftover of the incoming data. */ - level = min_t(u16, dws->fifo_len / 2, dws->tx_len); + level = min_t(unsigned int, dws->fifo_len / 2, dws->tx_len); dw_writel(dws, DW_SPI_TXFTLR, level); dw_writel(dws, DW_SPI_RXFTLR, level - 1); From 3319d7292568ada688613629c7ee13df580408b2 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 31 Jan 2023 08:38:45 +1030 Subject: [PATCH 075/118] pinctrl: aspeed: Revert "Force to disable the function's signal" [ Upstream commit 606d4ef4922662ded34aa7218288c3043ce0a41a ] This reverts commit cf517fef601b9dde151f0afc27164d13bf1fd907. The commit cf517fef601b ("pinctrl: aspeed: Force to disable the function's signal") exposed a problem with fetching the regmap for reading the GFX register. The Romulus machine the device tree contains a gpio hog for GPIO S7. With the patch applied: Muxing pin 151 for GPIO Disabling signal VPOB9 for VPO aspeed-g5-pinctrl 1e6e2080.pinctrl: Failed to acquire regmap for IP block 1 aspeed-g5-pinctrl 1e6e2080.pinctrl: request() failed for pin 151 The code path is aspeed-gpio -> pinmux-g5 -> regmap -> clk, and the of_clock code returns an error as it doesn't have a valid struct clk_hw pointer. The regmap call happens because pinmux wants to check the GFX node (IP block 1) to query bits there. For reference, before the offending patch: Muxing pin 151 for GPIO Disabling signal VPOB9 for VPO Want SCU8C[0x00000080]=0x1, got 0x0 from 0x00000000 Disabling signal VPOB9 for VPOOFF1 Want SCU8C[0x00000080]=0x1, got 0x0 from 0x00000000 Disabling signal VPOB9 for VPOOFF2 Want SCU8C[0x00000080]=0x1, got 0x0 from 0x00000000 Enabling signal GPIOS7 for GPIOS7 Muxed pin 151 as GPIOS7 gpio-943 (seq_cont): hogged as output/low We can't skip the clock check to allow pinmux to proceed, because the write to disable VPOB9 will try to set a bit in the GFX register space which will not stick when the IP is in reset. However, we do not want to enable the IP just so pinmux can do a disable-enable dance for the pin. For now, revert the offending patch while a correct solution is found. Fixes: cf517fef601b ("pinctrl: aspeed: Force to disable the function's signal") Link: https://github.com/openbmc/linux/issues/218 Signed-off-by: Joel Stanley Link: https://lore.kernel.org/r/20230130220845.917985-1-joel@jms.id.au Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/aspeed/pinctrl-aspeed.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c index f93d6959cee9..5a12fc7cf91f 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c @@ -92,10 +92,19 @@ static int aspeed_sig_expr_enable(struct aspeed_pinmux_data *ctx, static int aspeed_sig_expr_disable(struct aspeed_pinmux_data *ctx, const struct aspeed_sig_expr *expr) { + int ret; + pr_debug("Disabling signal %s for %s\n", expr->signal, expr->function); - return aspeed_sig_expr_set(ctx, expr, false); + ret = aspeed_sig_expr_eval(ctx, expr, true); + if (ret < 0) + return ret; + + if (ret) + return aspeed_sig_expr_set(ctx, expr, false); + + return 0; } /** From 33dd18fbbb2d3cf97a409daffba57968a1c8fa24 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 6 Feb 2023 16:15:59 +0200 Subject: [PATCH 076/118] pinctrl: intel: Restore the pins that used to be in Direct IRQ mode [ Upstream commit a8520be3ffef3d25b53bf171a7ebe17ee0154175 ] If the firmware mangled the register contents too much, check the saved value for the Direct IRQ mode. If it matches, we will restore the pin state. Reported-by: Jim Minter Fixes: 6989ea4881c8 ("pinctrl: intel: Save and restore pins in "direct IRQ" mode") Tested-by: Jim Minter Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Link: https://lore.kernel.org/r/20230206141558.20916-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/intel/pinctrl-intel.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 047a8374b4fd..954a41226740 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -1676,6 +1676,12 @@ const struct intel_pinctrl_soc_data *intel_pinctrl_get_soc_data(struct platform_ EXPORT_SYMBOL_GPL(intel_pinctrl_get_soc_data); #ifdef CONFIG_PM_SLEEP +static bool __intel_gpio_is_direct_irq(u32 value) +{ + return (value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) && + (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO); +} + static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int pin) { const struct pin_desc *pd = pin_desc_get(pctrl->pctldev, pin); @@ -1709,8 +1715,7 @@ static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int * See https://bugzilla.kernel.org/show_bug.cgi?id=214749. */ value = readl(intel_get_padcfg(pctrl, pin, PADCFG0)); - if ((value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) && - (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO)) + if (__intel_gpio_is_direct_irq(value)) return true; return false; @@ -1840,7 +1845,12 @@ int intel_pinctrl_resume_noirq(struct device *dev) for (i = 0; i < pctrl->soc->npins; i++) { const struct pinctrl_pin_desc *desc = &pctrl->soc->pins[i]; - if (!intel_pinctrl_should_save(pctrl, desc->number)) + if (!(intel_pinctrl_should_save(pctrl, desc->number) || + /* + * If the firmware mangled the register contents too much, + * check the saved value for the Direct IRQ mode. + */ + __intel_gpio_is_direct_irq(pads[i].padcfg0))) continue; intel_restore_padcfg(pctrl, desc->number, PADCFG0, pads[i].padcfg0); From 3684a2f6affa1ca52a5d4a12f04d0652efdee65e Mon Sep 17 00:00:00 2001 From: ZhaoLong Wang Date: Mon, 6 Feb 2023 09:10:09 +0800 Subject: [PATCH 077/118] cifs: Fix use-after-free in rdata->read_into_pages() commit aa5465aeca3c66fecdf7efcf554aed79b4c4b211 upstream. When the network status is unstable, use-after-free may occur when read data from the server. BUG: KASAN: use-after-free in readpages_fill_pages+0x14c/0x7e0 Call Trace: dump_stack_lvl+0x38/0x4c print_report+0x16f/0x4a6 kasan_report+0xb7/0x130 readpages_fill_pages+0x14c/0x7e0 cifs_readv_receive+0x46d/0xa40 cifs_demultiplex_thread+0x121c/0x1490 kthread+0x16b/0x1a0 ret_from_fork+0x2c/0x50 Allocated by task 2535: kasan_save_stack+0x22/0x50 kasan_set_track+0x25/0x30 __kasan_kmalloc+0x82/0x90 cifs_readdata_direct_alloc+0x2c/0x110 cifs_readdata_alloc+0x2d/0x60 cifs_readahead+0x393/0xfe0 read_pages+0x12f/0x470 page_cache_ra_unbounded+0x1b1/0x240 filemap_get_pages+0x1c8/0x9a0 filemap_read+0x1c0/0x540 cifs_strict_readv+0x21b/0x240 vfs_read+0x395/0x4b0 ksys_read+0xb8/0x150 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc Freed by task 79: kasan_save_stack+0x22/0x50 kasan_set_track+0x25/0x30 kasan_save_free_info+0x2e/0x50 __kasan_slab_free+0x10e/0x1a0 __kmem_cache_free+0x7a/0x1a0 cifs_readdata_release+0x49/0x60 process_one_work+0x46c/0x760 worker_thread+0x2a4/0x6f0 kthread+0x16b/0x1a0 ret_from_fork+0x2c/0x50 Last potentially related work creation: kasan_save_stack+0x22/0x50 __kasan_record_aux_stack+0x95/0xb0 insert_work+0x2b/0x130 __queue_work+0x1fe/0x660 queue_work_on+0x4b/0x60 smb2_readv_callback+0x396/0x800 cifs_abort_connection+0x474/0x6a0 cifs_reconnect+0x5cb/0xa50 cifs_readv_from_socket.cold+0x22/0x6c cifs_read_page_from_socket+0xc1/0x100 readpages_fill_pages.cold+0x2f/0x46 cifs_readv_receive+0x46d/0xa40 cifs_demultiplex_thread+0x121c/0x1490 kthread+0x16b/0x1a0 ret_from_fork+0x2c/0x50 The following function calls will cause UAF of the rdata pointer. readpages_fill_pages cifs_read_page_from_socket cifs_readv_from_socket cifs_reconnect __cifs_reconnect cifs_abort_connection mid->callback() --> smb2_readv_callback queue_work(&rdata->work) # if the worker completes first, # the rdata is freed cifs_readv_complete kref_put cifs_readdata_release kfree(rdata) return rdata->... # UAF in readpages_fill_pages() Similarly, this problem also occurs in the uncache_fill_pages(). Fix this by adjusts the order of condition judgment in the return statement. Signed-off-by: ZhaoLong Wang Cc: stable@vger.kernel.org Acked-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 209dfc06fd6d..542f22db5f46 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3880,7 +3880,7 @@ uncached_fill_pages(struct TCP_Server_Info *server, rdata->got_bytes += result; } - return rdata->got_bytes > 0 && result != -ECONNABORTED ? + return result != -ECONNABORTED && rdata->got_bytes > 0 ? rdata->got_bytes : result; } @@ -4656,7 +4656,7 @@ readpages_fill_pages(struct TCP_Server_Info *server, rdata->got_bytes += result; } - return rdata->got_bytes > 0 && result != -ECONNABORTED ? + return result != -ECONNABORTED && rdata->got_bytes > 0 ? rdata->got_bytes : result; } From 0d2cf3fae701646061e295815bb7588d2f3671cc Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 3 Feb 2023 14:32:09 -0500 Subject: [PATCH 078/118] net: USB: Fix wrong-direction WARNING in plusb.c commit 811d581194f7412eda97acc03d17fc77824b561f upstream. The syzbot fuzzer detected a bug in the plusb network driver: A zero-length control-OUT transfer was treated as a read instead of a write. In modern kernels this error provokes a WARNING: usb 1-1: BOGUS control dir, pipe 80000280 doesn't match bRequestType c0 WARNING: CPU: 0 PID: 4645 at drivers/usb/core/urb.c:411 usb_submit_urb+0x14a7/0x1880 drivers/usb/core/urb.c:411 Modules linked in: CPU: 1 PID: 4645 Comm: dhcpcd Not tainted 6.2.0-rc6-syzkaller-00050-g9f266ccaa2f5 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/12/2023 RIP: 0010:usb_submit_urb+0x14a7/0x1880 drivers/usb/core/urb.c:411 ... Call Trace: usb_start_wait_urb+0x101/0x4b0 drivers/usb/core/message.c:58 usb_internal_control_msg drivers/usb/core/message.c:102 [inline] usb_control_msg+0x320/0x4a0 drivers/usb/core/message.c:153 __usbnet_read_cmd+0xb9/0x390 drivers/net/usb/usbnet.c:2010 usbnet_read_cmd+0x96/0xf0 drivers/net/usb/usbnet.c:2068 pl_vendor_req drivers/net/usb/plusb.c:60 [inline] pl_set_QuickLink_features drivers/net/usb/plusb.c:75 [inline] pl_reset+0x2f/0xf0 drivers/net/usb/plusb.c:85 usbnet_open+0xcc/0x5d0 drivers/net/usb/usbnet.c:889 __dev_open+0x297/0x4d0 net/core/dev.c:1417 __dev_change_flags+0x587/0x750 net/core/dev.c:8530 dev_change_flags+0x97/0x170 net/core/dev.c:8602 devinet_ioctl+0x15a2/0x1d70 net/ipv4/devinet.c:1147 inet_ioctl+0x33f/0x380 net/ipv4/af_inet.c:979 sock_do_ioctl+0xcc/0x230 net/socket.c:1169 sock_ioctl+0x1f8/0x680 net/socket.c:1286 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __x64_sys_ioctl+0x197/0x210 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd The fix is to call usbnet_write_cmd() instead of usbnet_read_cmd() and remove the USB_DIR_IN flag. Reported-and-tested-by: syzbot+2a0e7abd24f1eb90ce25@syzkaller.appspotmail.com Signed-off-by: Alan Stern Fixes: 090ffa9d0e90 ("[PATCH] USB: usbnet (9/9) module for pl2301/2302 cables") CC: stable@vger.kernel.org Link: https://lore.kernel.org/r/00000000000052099f05f3b3e298@google.com/ Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/plusb.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c index 2c82fbcaab22..7a2b0094de51 100644 --- a/drivers/net/usb/plusb.c +++ b/drivers/net/usb/plusb.c @@ -57,9 +57,7 @@ static inline int pl_vendor_req(struct usbnet *dev, u8 req, u8 val, u8 index) { - return usbnet_read_cmd(dev, req, - USB_DIR_IN | USB_TYPE_VENDOR | - USB_RECIP_DEVICE, + return usbnet_write_cmd(dev, req, USB_TYPE_VENDOR | USB_RECIP_DEVICE, val, index, NULL, 0); } From e266ad04ec3993244b663303e3d2281524bb1732 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 7 Feb 2023 14:04:13 +0100 Subject: [PATCH 079/118] mptcp: do not wait for bare sockets' timeout commit d4e85922e3e7ef2071f91f65e61629b60f3a9cf4 upstream. If the peer closes all the existing subflows for a given mptcp socket and later the application closes it, the current implementation let it survive until the timewait timeout expires. While the above is allowed by the protocol specification it consumes resources for almost no reason and additionally causes sporadic self-tests failures. Let's move the mptcp socket to the TCP_CLOSE state when there are no alive subflows at close time, so that the allocated resources will be freed immediately. Fixes: e16163b6e2b7 ("mptcp: refactor shutdown and close") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 29849d77e4bf..938cccab331d 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2908,6 +2908,7 @@ bool __mptcp_close(struct sock *sk, long timeout) struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); bool do_cancel_work = false; + int subflows_alive = 0; sk->sk_shutdown = SHUTDOWN_MASK; @@ -2933,6 +2934,8 @@ cleanup: struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast_nested(ssk); + subflows_alive += ssk->sk_state != TCP_CLOSE; + /* since the close timeout takes precedence on the fail one, * cancel the latter */ @@ -2948,6 +2951,12 @@ cleanup: } sock_orphan(sk); + /* all the subflows are closed, only timeout can change the msk + * state, let's not keep resources busy for no reasons + */ + if (subflows_alive == 0) + inet_sk_state_store(sk, TCP_CLOSE); + sock_hold(sk); pr_debug("msk=%p state=%d", sk, sk->sk_state); if (mptcp_sk(sk)->token) From ba36772a1cc94a70e55942c049b2e33fc482ff99 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 7 Feb 2023 14:04:16 +0100 Subject: [PATCH 080/118] mptcp: be careful on subflow status propagation on errors commit 1249db44a102d9d3541ed7798d4b01ffdcf03524 upstream. Currently the subflow error report callback unconditionally propagates the fallback subflow status to the owning msk. If the msk is already orphaned, the above prevents the code from correctly tracking the msk moving to the TCP_CLOSE state and doing the appropriate cleanup. All the above causes increasing memory usage over time and sporadic self-tests failures. There is a great deal of infrastructure trying to propagate correctly the fallback subflow status to the owning mptcp socket, e.g. via mptcp_subflow_eof() and subflow_sched_work_if_closed(): in the error propagation path we need only to cope with unorphaned sockets. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/339 Fixes: 15cc10453398 ("mptcp: deliver ssk errors to msk") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/subflow.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 5220435d8e34..929b0ee8b3d5 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1344,6 +1344,7 @@ void __mptcp_error_report(struct sock *sk) mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); int err = sock_error(ssk); + int ssk_state; if (!err) continue; @@ -1354,7 +1355,14 @@ void __mptcp_error_report(struct sock *sk) if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) continue; - inet_sk_state_store(sk, inet_sk_state_load(ssk)); + /* We need to propagate only transition to CLOSE state. + * Orphaned socket will see such state change via + * subflow_sched_work_if_closed() and that path will properly + * destroy the msk as needed. + */ + ssk_state = inet_sk_state_load(ssk); + if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) + inet_sk_state_store(sk, ssk_state); sk->sk_err = -err; /* This barrier is coupled with smp_rmb() in mptcp_poll() */ From b7692fb9a2b02270612f29041dde63dba73841ad Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 7 Feb 2023 14:04:17 +0100 Subject: [PATCH 081/118] selftests: mptcp: allow more slack for slow test-case commit a635a8c3df66ab68dc088c08a4e9e955e22c0e64 upstream. A test-case is frequently failing on some extremely slow VMs. The mptcp transfer completes before the script is able to do all the required PM manipulation. Address the issue in the simplest possible way, making the transfer even more slow. Additionally dump more info in case of failures, to help debugging similar problems in the future and init dump_stats var. Fixes: e274f7154008 ("selftests: mptcp: add subflow limits test-cases") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/323 Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 2eeaf4aca644..85d5535b3b72 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1688,6 +1688,7 @@ chk_subflow_nr() local subflow_nr=$3 local cnt1 local cnt2 + local dump_stats if [ -n "${need_title}" ]; then printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${msg}" @@ -1705,7 +1706,12 @@ chk_subflow_nr() echo "[ ok ]" fi - [ "${dump_stats}" = 1 ] && ( ss -N $ns1 -tOni ; ss -N $ns1 -tOni | grep token; ip -n $ns1 mptcp endpoint ) + if [ "${dump_stats}" = 1 ]; then + ss -N $ns1 -tOni + ss -N $ns1 -tOni | grep token + ip -n $ns1 mptcp endpoint + dump_stats + fi } chk_link_usage() @@ -3005,7 +3011,7 @@ endpoint_tests() pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 4 0 0 slow & + run_tests $ns1 $ns2 10.0.1.1 4 0 0 speed_20 & wait_mpj $ns2 pm_nl_del_endpoint $ns2 2 10.0.2.2 From f676c8a695880f71624ada09183b5009829626ef Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 7 Feb 2023 14:04:18 +0100 Subject: [PATCH 082/118] selftests: mptcp: stop tests earlier commit 070d6dafacbaa9d1f2e4e3edc263853d194af15e upstream. These 'endpoint' tests from 'mptcp_join.sh' selftest start a transfer in the background and check the status during this transfer. Once the expected events have been recorded, there is no reason to wait for the data transfer to finish. It can be stopped earlier to reduce the execution time by more than half. For these tests, the exchanged data were not verified. Errors, if any, were ignored but that's fine, plenty of other tests are looking at that. It is then OK to mute stderr now that we are sure errors will be printed (and still ignored) because the transfer is stopped before the end. Fixes: e274f7154008 ("selftests: mptcp: add subflow limits test-cases") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 85d5535b3b72..76a197f7b813 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -472,6 +472,12 @@ kill_wait() wait $1 2>/dev/null } +kill_tests_wait() +{ + kill -SIGUSR1 $(ip netns pids $ns2) $(ip netns pids $ns1) + wait +} + pm_nl_set_limits() { local ns=$1 @@ -2991,7 +2997,7 @@ endpoint_tests() pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow & + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow 2>/dev/null & wait_mpj $ns1 pm_nl_check_endpoint 1 "creation" \ @@ -3004,14 +3010,14 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_check_endpoint 0 "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal - wait + kill_tests_wait fi if reset "delete and re-add"; then pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 4 0 0 speed_20 & + run_tests $ns1 $ns2 10.0.1.1 4 0 0 speed_20 2>/dev/null & wait_mpj $ns2 pm_nl_del_endpoint $ns2 2 10.0.2.2 @@ -3021,7 +3027,7 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow wait_mpj $ns2 chk_subflow_nr "" "after re-add" 2 - wait + kill_tests_wait fi } From 1ab4bed3fc8bf279eeb0865936c56bef49759a57 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 10 Jan 2023 14:56:39 +0000 Subject: [PATCH 083/118] btrfs: simplify update of last_dir_index_offset when logging a directory commit 6afaed53cc9adde69d8a76ff5b4d740d5efbc54c upstream. When logging a directory, we always set the inode's last_dir_index_offset to the offset of the last dir index item we found. This is using an extra field in the log context structure, and it makes more sense to update it only after we insert dir index items, and we could directly update the inode's last_dir_index_offset field instead. So make this simpler by updating the inode's last_dir_index_offset only when we actually insert dir index keys in the log tree, and getting rid of the last_dir_item_offset field in the log context structure. Reported-by: David Arendt Link: https://lore.kernel.org/linux-btrfs/ae169fc6-f504-28f0-a098-6fa6a4dfb612@leemhuis.info/ Reported-by: Maxim Mikityanskiy Link: https://lore.kernel.org/linux-btrfs/Y8voyTXdnPDz8xwY@mail.gmail.com/ Reported-by: Hunter Wardlaw Link: https://bugzilla.suse.com/show_bug.cgi?id=1207231 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=216851 CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 23 +++++++++++++++++------ fs/btrfs/tree-log.h | 2 -- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7535857f4c8f..e71464c0e466 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3607,17 +3607,19 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, } static int flush_dir_items_batch(struct btrfs_trans_handle *trans, - struct btrfs_root *log, + struct btrfs_inode *inode, struct extent_buffer *src, struct btrfs_path *dst_path, int start_slot, int count) { + struct btrfs_root *log = inode->root->log_root; char *ins_data = NULL; struct btrfs_item_batch batch; struct extent_buffer *dst; unsigned long src_offset; unsigned long dst_offset; + u64 last_index; struct btrfs_key key; u32 item_size; int ret; @@ -3675,6 +3677,19 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans, src_offset = btrfs_item_ptr_offset(src, start_slot + count - 1); copy_extent_buffer(dst, src, dst_offset, src_offset, batch.total_data_size); btrfs_release_path(dst_path); + + last_index = batch.keys[count - 1].offset; + ASSERT(last_index > inode->last_dir_index_offset); + + /* + * If for some unexpected reason the last item's index is not greater + * than the last index we logged, warn and return an error to fallback + * to a transaction commit. + */ + if (WARN_ON(last_index <= inode->last_dir_index_offset)) + ret = -EUCLEAN; + else + inode->last_dir_index_offset = last_index; out: kfree(ins_data); @@ -3724,7 +3739,6 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, } di = btrfs_item_ptr(src, i, struct btrfs_dir_item); - ctx->last_dir_item_offset = key.offset; /* * Skip ranges of items that consist only of dir item keys created @@ -3787,7 +3801,7 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, if (batch_size > 0) { int ret; - ret = flush_dir_items_batch(trans, log, src, dst_path, + ret = flush_dir_items_batch(trans, inode, src, dst_path, batch_start, batch_size); if (ret < 0) return ret; @@ -4075,7 +4089,6 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, min_key = BTRFS_DIR_START_INDEX; max_key = 0; - ctx->last_dir_item_offset = inode->last_dir_index_offset; while (1) { ret = log_dir_items(trans, inode, path, dst_path, @@ -4087,8 +4100,6 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, min_key = max_key + 1; } - inode->last_dir_index_offset = ctx->last_dir_item_offset; - return 0; } diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index aed1e05e9879..bcca74128c3b 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -23,8 +23,6 @@ struct btrfs_log_ctx { bool logging_new_delayed_dentries; /* Indicate if the inode being logged was logged before. */ bool logged_before; - /* Tracks the last logged dir item/index key offset. */ - u64 last_dir_item_offset; struct inode *inode; struct list_head list; /* Only used for fast fsyncs. */ From 51aa10112b12ff353b18d8bfc6e1eaff68142c81 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 20 Jan 2023 21:47:16 +0800 Subject: [PATCH 084/118] btrfs: free device in btrfs_close_devices for a single device filesystem commit 5f58d783fd7823b2c2d5954d1126e702f94bfc4c upstream. We have this check to make sure we don't accidentally add older devices that may have disappeared and re-appeared with an older generation from being added to an fs_devices (such as a replace source device). This makes sense, we don't want stale disks in our file system. However for single disks this doesn't really make sense. I've seen this in testing, but I was provided a reproducer from a project that builds btrfs images on loopback devices. The loopback device gets cached with the new generation, and then if it is re-used to generate a new file system we'll fail to mount it because the new fs is "older" than what we have in cache. Fix this by freeing the cache when closing the device for a single device filesystem. This will ensure that the mount command passed device path is scanned successfully during the next mount. CC: stable@vger.kernel.org # 5.10+ Reported-by: Daan De Meyer Signed-off-by: Josef Bacik Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f66bf1a88edf..05f9cbbf6e1e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -408,6 +408,7 @@ void btrfs_free_device(struct btrfs_device *device) static void free_fs_devices(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *device; + WARN_ON(fs_devices->opened); while (!list_empty(&fs_devices->devices)) { device = list_entry(fs_devices->devices.next, @@ -1194,9 +1195,22 @@ void btrfs_close_devices(struct btrfs_fs_devices *fs_devices) mutex_lock(&uuid_mutex); close_fs_devices(fs_devices); - if (!fs_devices->opened) + if (!fs_devices->opened) { list_splice_init(&fs_devices->seed_list, &list); + /* + * If the struct btrfs_fs_devices is not assembled with any + * other device, it can be re-initialized during the next mount + * without the needing device-scan step. Therefore, it can be + * fully freed. + */ + if (fs_devices->num_devices == 1) { + list_del(&fs_devices->fs_list); + free_fs_devices(fs_devices); + } + } + + list_for_each_entry_safe(fs_devices, tmp, &list, seed_list) { close_fs_devices(fs_devices); list_del(&fs_devices->seed_list); From db9c4a94b6291dc166e77e56951b9e7256ecc0e9 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Wed, 8 Feb 2023 13:12:23 -0500 Subject: [PATCH 085/118] usb: core: add quirk for Alcor Link AK9563 smartcard reader commit 303e724d7b1e1a0a93daf0b1ab5f7c4f53543b34 upstream. The Alcor Link AK9563 smartcard reader used on some Lenovo platforms doesn't work. If LPM is enabled the reader will provide an invalid usb config descriptor. Added quirk to disable LPM. Verified fix on Lenovo P16 G1 and T14 G3 Tested-by: Miroslav Zatko Tested-by: Dennis Wassenberg Cc: stable@vger.kernel.org Signed-off-by: Dennis Wassenberg Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20230208181223.1092654-1-mpearson-lenovo@squebb.ca Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 079e183cf3bf..934b3d997702 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -526,6 +526,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* DJI CineSSD */ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM }, + /* Alcor Link AK9563 SC Reader used in 2022 Lenovo ThinkPads */ + { USB_DEVICE(0x2ce3, 0x9563), .driver_info = USB_QUIRK_NO_LPM }, + /* DELL USB GEN2 */ { USB_DEVICE(0x413c, 0xb062), .driver_info = USB_QUIRK_NO_LPM | USB_QUIRK_RESET_RESUME }, From 01815a1caf2fec67910a7fb108241b22699e6590 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Wed, 8 Feb 2023 20:53:19 +0000 Subject: [PATCH 086/118] usb: typec: altmodes/displayport: Fix probe pin assign check commit 54e5c00a4eb0a4c663445b245f641bbfab142430 upstream. While checking Pin Assignments of the port and partner during probe, we don't take into account whether the peripheral is a plug or receptacle. This manifests itself in a mode entry failure on certain docks and dongles with captive cables. For instance, the Startech.com Type-C to DP dongle (Model #CDP2DP) advertises its DP VDO as 0x405. This would fail the Pin Assignment compatibility check, despite it supporting Pin Assignment C as a UFP. Update the check to use the correct DP Pin Assign macros that take the peripheral's receptacle bit into account. Fixes: c1e5c2f0cb8a ("usb: typec: altmodes/displayport: correct pin assignment for UFP receptacles") Cc: stable@vger.kernel.org Reported-by: Diana Zigterman Signed-off-by: Prashant Malani Link: https://lore.kernel.org/r/20230208205318.131385-1-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/displayport.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index 80d8c6c3be36..3a42313d0d66 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -535,10 +535,10 @@ int dp_altmode_probe(struct typec_altmode *alt) /* FIXME: Port can only be DFP_U. */ /* Make sure we have compatiple pin configurations */ - if (!(DP_CAP_DFP_D_PIN_ASSIGN(port->vdo) & - DP_CAP_UFP_D_PIN_ASSIGN(alt->vdo)) && - !(DP_CAP_UFP_D_PIN_ASSIGN(port->vdo) & - DP_CAP_DFP_D_PIN_ASSIGN(alt->vdo))) + if (!(DP_CAP_PIN_ASSIGN_DFP_D(port->vdo) & + DP_CAP_PIN_ASSIGN_UFP_D(alt->vdo)) && + !(DP_CAP_PIN_ASSIGN_UFP_D(port->vdo) & + DP_CAP_PIN_ASSIGN_DFP_D(alt->vdo))) return -ENODEV; ret = sysfs_create_group(&alt->dev.kobj, &dp_altmode_group); From a04c7d062b537ff787d00da95bdfe343260d4beb Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Thu, 15 Dec 2022 17:09:14 +0000 Subject: [PATCH 087/118] cxl/region: Fix null pointer dereference for resetting decoder commit 4fa4302d6dc7de7e8e74dc7405611a2efb4bf54b upstream. Not all decoders have a reset callback. The CXL specification allows a host bridge with a single root port to have no explicit HDM decoders. Currently the region driver assumes there are none. As such the CXL core creates a special pass through decoder instance without a commit/reset callback. Prior to this patch, the ->reset() callback was called unconditionally when calling cxl_region_decode_reset. Thus a configuration with 1 Host Bridge, 1 Root Port, and one directly attached CXL type 3 device or multiple CXL type 3 devices attached to downstream ports of a switch can cause a null pointer dereference. Before the fix, a kernel crash was observed when we destroy the region, and a pass through decoder is reset. The issue can be reproduced as below, 1) create a region with a CXL setup which includes a HB with a single root port under which a memdev is attached directly. 2) destroy the region with cxl destroy-region regionX -f. Fixes: 176baefb2eb5 ("cxl/hdm: Commit decoder state to hardware") Cc: Signed-off-by: Fan Ni Reviewed-by: Davidlohr Bueso Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Tested-by: Gregory Price Reviewed-by: Gregory Price Link: https://lore.kernel.org/r/20221215170909.2650271-1-fan.ni@samsung.com Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/cxl/core/region.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index c4f32c32dfd5..36962d1c2351 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -131,7 +131,7 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_port *iter = cxled_to_port(cxled); struct cxl_ep *ep; - int rc; + int rc = 0; while (!is_cxl_root(to_cxl_port(iter->dev.parent))) iter = to_cxl_port(iter->dev.parent); @@ -143,7 +143,8 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) cxl_rr = cxl_rr_load(iter, cxlr); cxld = cxl_rr->decoder; - rc = cxld->reset(cxld); + if (cxld->reset) + rc = cxld->reset(cxld); if (rc) return rc; } @@ -186,7 +187,8 @@ static int cxl_region_decode_commit(struct cxl_region *cxlr) iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) { cxl_rr = cxl_rr_load(iter, cxlr); cxld = cxl_rr->decoder; - cxld->reset(cxld); + if (cxld->reset) + cxld->reset(cxld); } cxled->cxld.reset(&cxled->cxld); From 15f9f8eb3b8b1d1831899b4ffb816a0ecc57f044 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 7 Feb 2023 11:04:30 -0800 Subject: [PATCH 088/118] cxl/region: Fix passthrough-decoder detection commit 711442e29f16f0d39dd0e2460c9baacfccb9d5a7 upstream. A passthrough decoder is a decoder that maps only 1 target. It is a special case because it does not impose any constraints on the interleave-math as compared to a decoder with multiple targets. Extend the passthrough case to multi-target-capable decoders that only have one target selected. I.e. the current code was only considering passthrough *ports* which are only a subset of the potential passthrough decoder scenarios. Fixes: e4f6dfa9ef75 ("cxl/region: Fix 'distance' calculation with passthrough ports") Cc: Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/167564540422.847146.13816934143225777888.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/cxl/core/region.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 36962d1c2351..9709bbf773b7 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -993,10 +993,10 @@ static int cxl_port_setup_targets(struct cxl_port *port, int i, distance; /* - * Passthrough ports impose no distance requirements between + * Passthrough decoders impose no distance requirements between * peers */ - if (port->nr_dports == 1) + if (cxl_rr->nr_targets == 1) distance = 0; else distance = p->nr_targets / cxl_rr->nr_targets; From ab6fd64dda2ba0b2fae1a34fe68edf1769c16c4a Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 14 Dec 2022 13:37:04 +0100 Subject: [PATCH 089/118] clk: ingenic: jz4760: Update M/N/OD calculation algorithm commit ecfb9f404771dde909ce7743df954370933c3be2 upstream. The previous algorithm was pretty broken. - The inner loop had a '(m > m_max)' condition, and the value of 'm' would increase in each iteration; - Each iteration would actually multiply 'm' by two, so it is not needed to re-compute the whole equation at each iteration; - It would loop until (m & 1) == 0, which means it would loop at most once. - The outer loop would divide the 'n' value by two at the end of each iteration. This meant that for a 12 MHz parent clock and a 1.2 GHz requested clock, it would first try n=12, then n=6, then n=3, then n=1, none of which would work; the only valid value is n=2 in this case. Simplify this algorithm with a single for loop, which decrements 'n' after each iteration, addressing all of the above problems. Fixes: bdbfc029374f ("clk: ingenic: Add support for the JZ4760") Cc: Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20221214123704.7305-1-paul@crapouillou.net Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/ingenic/jz4760-cgu.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/clk/ingenic/jz4760-cgu.c b/drivers/clk/ingenic/jz4760-cgu.c index ecd395ac8a28..e407f00bd594 100644 --- a/drivers/clk/ingenic/jz4760-cgu.c +++ b/drivers/clk/ingenic/jz4760-cgu.c @@ -58,7 +58,7 @@ jz4760_cgu_calc_m_n_od(const struct ingenic_cgu_pll_info *pll_info, unsigned long rate, unsigned long parent_rate, unsigned int *pm, unsigned int *pn, unsigned int *pod) { - unsigned int m, n, od, m_max = (1 << pll_info->m_bits) - 2; + unsigned int m, n, od, m_max = (1 << pll_info->m_bits) - 1; /* The frequency after the N divider must be between 1 and 50 MHz. */ n = parent_rate / (1 * MHZ); @@ -66,19 +66,17 @@ jz4760_cgu_calc_m_n_od(const struct ingenic_cgu_pll_info *pll_info, /* The N divider must be >= 2. */ n = clamp_val(n, 2, 1 << pll_info->n_bits); - for (;; n >>= 1) { - od = (unsigned int)-1; + rate /= MHZ; + parent_rate /= MHZ; - do { - m = (rate / MHZ) * (1 << ++od) * n / (parent_rate / MHZ); - } while ((m > m_max || m & 1) && (od < 4)); - - if (od < 4 && m >= 4 && m <= m_max) - break; + for (m = m_max; m >= m_max && n >= 2; n--) { + m = rate * n / parent_rate; + od = m & 1; + m <<= od; } *pm = m; - *pn = n; + *pn = n + 1; *pod = 1 << od; } From 7d2cd4736d1aba688d25801623ffead12caed768 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 3 Feb 2023 17:50:54 +0100 Subject: [PATCH 090/118] pinctrl: qcom: sm8450-lpass-lpi: correct swr_rx_data group commit 5921b250f43870e7d8044ca14e402292ceb3e3a8 upstream. According to hardware programming guide, the swr_rx_data pin group has only two pins (GPIO5 and GPIO6). This is also visible in "struct sm8450_groups" in the driver - GPIO15 does not have swr_rx_data function. Fixes: ec1652fc4d56 ("pinctrl: qcom: Add sm8450 lpass lpi pinctrl driver") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230203165054.390762-1-krzysztof.kozlowski@linaro.org Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-sm8450-lpass-lpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8450-lpass-lpi.c b/drivers/pinctrl/qcom/pinctrl-sm8450-lpass-lpi.c index c3c8c34148f1..e22d03ce292e 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8450-lpass-lpi.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8450-lpass-lpi.c @@ -105,7 +105,7 @@ static const struct pinctrl_pin_desc sm8450_lpi_pins[] = { static const char * const swr_tx_clk_groups[] = { "gpio0" }; static const char * const swr_tx_data_groups[] = { "gpio1", "gpio2", "gpio14" }; static const char * const swr_rx_clk_groups[] = { "gpio3" }; -static const char * const swr_rx_data_groups[] = { "gpio4", "gpio5", "gpio15" }; +static const char * const swr_rx_data_groups[] = { "gpio4", "gpio5" }; static const char * const dmic1_clk_groups[] = { "gpio6" }; static const char * const dmic1_data_groups[] = { "gpio7" }; static const char * const dmic2_clk_groups[] = { "gpio8" }; From 62890f3eabf80676c0fefbbdd88e0f3f12528032 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 3 Feb 2023 15:33:59 +0800 Subject: [PATCH 091/118] drm/amd/pm: add SMU 13.0.7 missing GetPptLimit message mapping commit 0e763afcb50814e256ecb780fcc0f3bade2e1a0c upstream. Add missing GetPptLimit message mapping. Signed-off-by: Evan Quan Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index eea06939e7da..5dfeab7b999b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -124,6 +124,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0), + MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 0), }; static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = { From a89868254cd2079b800361e68f39ac4e57293080 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 7 Feb 2023 13:04:52 +0800 Subject: [PATCH 092/118] ceph: flush cap releases when the session is flushed commit e7d84c6a1296d059389f7342d9b4b7defb518d3a upstream. MDS expects the completed cap release prior to responding to the session flush for cache drop. Cc: stable@vger.kernel.org Link: http://tracker.ceph.com/issues/38009 Signed-off-by: Xiubo Li Reviewed-by: Venky Shankar Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/mds_client.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 26a0a8b9975e..756560df3bdb 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3662,6 +3662,12 @@ static void handle_session(struct ceph_mds_session *session, break; case CEPH_SESSION_FLUSHMSG: + /* flush cap releases */ + spin_lock(&session->s_cap_lock); + if (session->s_num_cap_releases) + ceph_flush_cap_releases(mdsc, session); + spin_unlock(&session->s_cap_lock); + send_flushmsg_ack(mdsc, session, seq); break; From 9f55a0a2d94fd731e396b8586be6babca49dfd92 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 25 Jan 2023 12:23:46 -0800 Subject: [PATCH 093/118] nvdimm: Support sizeof(struct page) > MAX_STRUCT_PAGE_SIZE commit c91d713630848460de8669e6570307b7e559863b upstream. Commit 6e9f05dc66f9 ("libnvdimm/pfn_dev: increase MAX_STRUCT_PAGE_SIZE") ...updated MAX_STRUCT_PAGE_SIZE to account for sizeof(struct page) potentially doubling in the case of CONFIG_KMSAN=y. Unfortunately this doubles the amount of capacity stolen from user addressable capacity for everyone, regardless of whether they are using the debug option. Revert that change, mandate that MAX_STRUCT_PAGE_SIZE never exceed 64, but allow for debug scenarios to proceed with creating debug sized page maps with a compile option to support debug scenarios. Note that this only applies to cases where the page map is permanent, i.e. stored in a reservation of the pmem itself ("--map=dev" in "ndctl create-namespace" terms). For the "--map=mem" case, since the allocation is ephemeral for the lifespan of the namespace, there are no explicit restriction. However, the implicit restriction, of having enough available "System RAM" to store the page map for the typically large pmem, still applies. Fixes: 6e9f05dc66f9 ("libnvdimm/pfn_dev: increase MAX_STRUCT_PAGE_SIZE") Cc: Cc: Alexander Potapenko Cc: Marco Elver Reported-by: Jeff Moyer Acked-by: Yu Zhao Link: https://lore.kernel.org/r/167467815773.463042.7022545814443036382.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/Kconfig | 19 ++++++++++++++++++ drivers/nvdimm/nd.h | 2 +- drivers/nvdimm/pfn_devs.c | 42 +++++++++++++++++++++++++-------------- 3 files changed, 47 insertions(+), 16 deletions(-) diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 5a29046e3319..e4c20f0cb049 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -102,6 +102,25 @@ config NVDIMM_KEYS depends on ENCRYPTED_KEYS depends on (LIBNVDIMM=ENCRYPTED_KEYS) || LIBNVDIMM=m +config NVDIMM_KMSAN + bool + depends on KMSAN + help + KMSAN, and other memory debug facilities, increase the size of + 'struct page' to contain extra metadata. This collides with + the NVDIMM capability to store a potentially + larger-than-"System RAM" size 'struct page' array in a + reservation of persistent memory rather than limited / + precious DRAM. However, that reservation needs to persist for + the life of the given NVDIMM namespace. If you are using KMSAN + to debug an issue unrelated to NVDIMMs or DAX then say N to this + option. Otherwise, say Y but understand that any namespaces + (with the page array stored pmem) created with this build of + the kernel will permanently reserve and strand excess + capacity compared to the CONFIG_KMSAN=n case. + + Select N if unsure. + config NVDIMM_TEST_BUILD tristate "Build the unit test core" depends on m diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 85ca5b4da3cf..ec5219680092 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -652,7 +652,7 @@ void devm_namespace_disable(struct device *dev, struct nd_namespace_common *ndns); #if IS_ENABLED(CONFIG_ND_CLAIM) /* max struct page size independent of kernel config */ -#define MAX_STRUCT_PAGE_SIZE 128 +#define MAX_STRUCT_PAGE_SIZE 64 int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap); #else static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 61af072ac98f..af7d9301520c 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -13,6 +13,8 @@ #include "pfn.h" #include "nd.h" +static const bool page_struct_override = IS_ENABLED(CONFIG_NVDIMM_KMSAN); + static void nd_pfn_release(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); @@ -758,12 +760,6 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) return -ENXIO; } - /* - * Note, we use 64 here for the standard size of struct page, - * debugging options may cause it to be larger in which case the - * implementation will limit the pfns advertised through - * ->direct_access() to those that are included in the memmap. - */ start = nsio->res.start; size = resource_size(&nsio->res); npfns = PHYS_PFN(size - SZ_8K); @@ -782,20 +778,33 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) } end_trunc = start + size - ALIGN_DOWN(start + size, align); if (nd_pfn->mode == PFN_MODE_PMEM) { + unsigned long page_map_size = MAX_STRUCT_PAGE_SIZE * npfns; + /* * The altmap should be padded out to the block size used * when populating the vmemmap. This *should* be equal to * PMD_SIZE for most architectures. * - * Also make sure size of struct page is less than 128. We - * want to make sure we use large enough size here so that - * we don't have a dynamic reserve space depending on - * struct page size. But we also want to make sure we notice - * when we end up adding new elements to struct page. + * Also make sure size of struct page is less than + * MAX_STRUCT_PAGE_SIZE. The goal here is compatibility in the + * face of production kernel configurations that reduce the + * 'struct page' size below MAX_STRUCT_PAGE_SIZE. For debug + * kernel configurations that increase the 'struct page' size + * above MAX_STRUCT_PAGE_SIZE, the page_struct_override allows + * for continuing with the capacity that will be wasted when + * reverting to a production kernel configuration. Otherwise, + * those configurations are blocked by default. */ - BUILD_BUG_ON(sizeof(struct page) > MAX_STRUCT_PAGE_SIZE); - offset = ALIGN(start + SZ_8K + MAX_STRUCT_PAGE_SIZE * npfns, align) - - start; + if (sizeof(struct page) > MAX_STRUCT_PAGE_SIZE) { + if (page_struct_override) + page_map_size = sizeof(struct page) * npfns; + else { + dev_err(&nd_pfn->dev, + "Memory debug options prevent using pmem for the page map\n"); + return -EINVAL; + } + } + offset = ALIGN(start + SZ_8K + page_map_size, align) - start; } else if (nd_pfn->mode == PFN_MODE_RAM) offset = ALIGN(start + SZ_8K, align) - start; else @@ -818,7 +827,10 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) pfn_sb->version_minor = cpu_to_le16(4); pfn_sb->end_trunc = cpu_to_le32(end_trunc); pfn_sb->align = cpu_to_le32(nd_pfn->align); - pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE); + if (sizeof(struct page) > MAX_STRUCT_PAGE_SIZE && page_struct_override) + pfn_sb->page_struct_size = cpu_to_le16(sizeof(struct page)); + else + pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE); pfn_sb->page_size = cpu_to_le32(PAGE_SIZE); checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); pfn_sb->checksum = cpu_to_le64(checksum); From 7d151eccd07a0a9767f3e0af976e05a9210ea678 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Thu, 26 Jan 2023 22:53:06 -0500 Subject: [PATCH 094/118] riscv: Fixup race condition on PG_dcache_clean in flush_icache_pte commit 950b879b7f0251317d26bae0687e72592d607532 upstream. In commit 588a513d3425 ("arm64: Fix race condition on PG_dcache_clean in __sync_icache_dcache()"), we found RISC-V has the same issue as the previous arm64. The previous implementation didn't guarantee the correct sequence of operations, which means flush_icache_all() hasn't been called when the PG_dcache_clean was set. That would cause a risk of page synchronization. Fixes: 08f051eda33b ("RISC-V: Flush I$ when making a dirty page executable") Signed-off-by: Guo Ren Signed-off-by: Guo Ren Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230127035306.1819561-1-guoren@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/mm/cacheflush.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 57b40a350420..8a2e7040f8fc 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -83,8 +83,10 @@ void flush_icache_pte(pte_t pte) { struct page *page = pte_page(pte); - if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + if (!test_bit(PG_dcache_clean, &page->flags)) { flush_icache_all(); + set_bit(PG_dcache_clean, &page->flags); + } } #endif /* CONFIG_MMU */ From 026cae99d06409752145e2472c1b264cc5fb6bf1 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sat, 4 Feb 2023 01:35:31 -0500 Subject: [PATCH 095/118] riscv: kprobe: Fixup misaligned load text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit eb7423273cc9922ee2d05bf660c034d7d515bb91 upstream. The current kprobe would cause a misaligned load for the probe point. This patch fixup it with two half-word loads instead. Fixes: c22b0bcb1dd0 ("riscv: Add kprobes supported") Signed-off-by: Guo Ren Signed-off-by: Guo Ren Link: https://lore.kernel.org/linux-riscv/878rhig9zj.fsf@all.your.base.are.belong.to.us/ Reported-by: Bjorn Topel Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20230204063531.740220-1-guoren@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/probes/kprobes.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 388ecada500c..cca2b3a2135a 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -65,16 +65,18 @@ static bool __kprobes arch_check_kprobe(struct kprobe *p) int __kprobes arch_prepare_kprobe(struct kprobe *p) { - unsigned long probe_addr = (unsigned long)p->addr; + u16 *insn = (u16 *)p->addr; - if (probe_addr & 0x1) + if ((unsigned long)insn & 0x1) return -EILSEQ; if (!arch_check_kprobe(p)) return -EILSEQ; /* copy instruction */ - p->opcode = *p->addr; + p->opcode = (kprobe_opcode_t)(*insn++); + if (GET_INSN_LENGTH(p->opcode) == 4) + p->opcode |= (kprobe_opcode_t)(*insn) << 16; /* decode instruction */ switch (riscv_probe_decode_insn(p->addr, &p->ainsn.api)) { From 6f097c24815e67909a1fcc2c605586d02babd673 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 6 Feb 2023 14:22:40 +1000 Subject: [PATCH 096/118] powerpc/64s/interrupt: Fix interrupt exit race with security mitigation switch commit 2ea31e2e62bbc4d11c411eeb36f1b02841dbcab1 upstream. The RFI and STF security mitigation options can flip the interrupt_exit_not_reentrant static branch condition concurrently with the interrupt exit code which tests that branch. Interrupt exit tests this condition to set MSR[EE|RI] for exit, then again in the case a soft-masked interrupt is found pending, to recover the MSR so the interrupt can be replayed before attempting to exit again. If the condition changes between these two tests, the MSR and irq soft-mask state will become corrupted, leading to warnings and possible crashes. For example, if the branch is initially true then false, MSR[EE] will be 0 but PACA_IRQ_HARD_DIS clear and EE may not get enabled, leading to warnings in irq_64.c. Fixes: 13799748b957 ("powerpc/64: use interrupt restart table to speed up return from interrupt") Cc: stable@vger.kernel.org # v5.14+ Reported-by: Sachin Sant Tested-by: Sachin Sant Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230206042240.92103-1-npiggin@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/interrupt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index fc6631a80527..0ec1581619db 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -50,16 +50,18 @@ static inline bool exit_must_hard_disable(void) */ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) { + bool must_hard_disable = (exit_must_hard_disable() || !restartable); + /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); - if (exit_must_hard_disable() || !restartable) + if (must_hard_disable) __hard_EE_RI_disable(); #ifdef CONFIG_PPC64 /* This pattern matches prep_irq_for_idle */ if (unlikely(lazy_irq_pending_nocheck())) { - if (exit_must_hard_disable() || !restartable) { + if (must_hard_disable) { local_paca->irq_happened |= PACA_IRQ_HARD_DIS; __hard_RI_enable(); } From a2e60fee4b7681bb9664f6a638d9630102ac978b Mon Sep 17 00:00:00 2001 From: Friedrich Vock Date: Thu, 2 Feb 2023 17:21:03 +0100 Subject: [PATCH 097/118] drm/amdgpu: Use the TGID for trace_amdgpu_vm_update_ptes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e53448e0a1efa5133c7db78f1df1f4caf177676b upstream. The pid field corresponds to the result of gettid() in userspace. However, userspace cannot reliably attribute PTE events to processes with just the thread id. This patch allows userspace to easily attribute PTE update events to specific processes by comparing this field with the result of getpid(). For attributing events to specific threads, the thread id is also contained in the common fields of each trace event. Reviewed-by: Christian König Signed-off-by: Friedrich Vock Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index b5f3bba851db..01e42bdd8e4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -974,7 +974,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, min(nptes, 32u), dst, incr, upd_flags, - vm->task_info.pid, + vm->task_info.tgid, vm->immediate.fence_context); amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt), cursor.level, pe_start, dst, From 386a8d694f95f5d9b844946f17e6376b92a8abff Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 12 Feb 2023 15:13:03 +0000 Subject: [PATCH 098/118] tracing: Fix TASK_COMM_LEN in trace event format file commit b6c7abd1c28a63ad633433d037ee15a1bc3023ba upstream. After commit 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN"), the content of the format file under /sys/kernel/tracing/events/task/task_newtask was changed from field:char comm[16]; offset:12; size:16; signed:0; to field:char comm[TASK_COMM_LEN]; offset:12; size:16; signed:0; John reported that this change breaks older versions of perfetto. Then Mathieu pointed out that this behavioral change was caused by the use of __stringify(_len), which happens to work on macros, but not on enum labels. And he also gave the suggestion on how to fix it: :One possible solution to make this more robust would be to extend :struct trace_event_fields with one more field that indicates the length :of an array as an actual integer, without storing it in its stringified :form in the type, and do the formatting in f_show where it belongs. The result as follows after this change, $ cat /sys/kernel/tracing/events/task/task_newtask/format field:char comm[16]; offset:12; size:16; signed:0; Link: https://lore.kernel.org/lkml/Y+QaZtz55LIirsUO@google.com/ Link: https://lore.kernel.org/linux-trace-kernel/20230210155921.4610-1-laoar.shao@gmail.com/ Link: https://lore.kernel.org/linux-trace-kernel/20230212151303.12353-1-laoar.shao@gmail.com Cc: stable@vger.kernel.org Cc: Alexei Starovoitov Cc: Kajetan Puchalski CC: Qais Yousef Fixes: 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN") Reported-by: John Stultz Debugged-by: Mathieu Desnoyers Suggested-by: Mathieu Desnoyers Suggested-by: Steven Rostedt Signed-off-by: Yafang Shao Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- include/linux/trace_events.h | 1 + include/trace/stages/stage4_event_fields.h | 3 +- kernel/trace/trace.h | 1 + kernel/trace/trace_events.c | 39 +++++++++++++++++----- kernel/trace/trace_export.c | 3 +- 5 files changed, 36 insertions(+), 11 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 20749bd9db71..04c59f8d801f 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -270,6 +270,7 @@ struct trace_event_fields { const int align; const int is_signed; const int filter_type; + const int len; }; int (*define_fields)(struct trace_event_call *); }; diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h index a8fb25f39a99..fae467ccd0c3 100644 --- a/include/trace/stages/stage4_event_fields.h +++ b/include/trace/stages/stage4_event_fields.h @@ -26,7 +26,8 @@ #define __array(_type, _item, _len) { \ .type = #_type"["__stringify(_len)"]", .name = #_item, \ .size = sizeof(_type[_len]), .align = ALIGN_STRUCTFIELD(_type), \ - .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER,\ + .len = _len }, #undef __dynamic_array #define __dynamic_array(_type, _item, _len) { \ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9e931f51328a..ac7af03ce837 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1282,6 +1282,7 @@ struct ftrace_event_field { int offset; int size; int is_signed; + int len; }; struct prog_entry; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f71ea6e79b3c..2a2ea9b6f762 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -114,7 +114,7 @@ trace_find_event_field(struct trace_event_call *call, char *name) static int __trace_define_field(struct list_head *head, const char *type, const char *name, int offset, int size, - int is_signed, int filter_type) + int is_signed, int filter_type, int len) { struct ftrace_event_field *field; @@ -133,6 +133,7 @@ static int __trace_define_field(struct list_head *head, const char *type, field->offset = offset; field->size = size; field->is_signed = is_signed; + field->len = len; list_add(&field->link, head); @@ -150,14 +151,28 @@ int trace_define_field(struct trace_event_call *call, const char *type, head = trace_get_fields(call); return __trace_define_field(head, type, name, offset, size, - is_signed, filter_type); + is_signed, filter_type, 0); } EXPORT_SYMBOL_GPL(trace_define_field); +int trace_define_field_ext(struct trace_event_call *call, const char *type, + const char *name, int offset, int size, int is_signed, + int filter_type, int len) +{ + struct list_head *head; + + if (WARN_ON(!call->class)) + return 0; + + head = trace_get_fields(call); + return __trace_define_field(head, type, name, offset, size, + is_signed, filter_type, len); +} + #define __generic_field(type, item, filter_type) \ ret = __trace_define_field(&ftrace_generic_fields, #type, \ #item, 0, 0, is_signed_type(type), \ - filter_type); \ + filter_type, 0); \ if (ret) \ return ret; @@ -166,7 +181,7 @@ EXPORT_SYMBOL_GPL(trace_define_field); "common_" #item, \ offsetof(typeof(ent), item), \ sizeof(ent.item), \ - is_signed_type(type), FILTER_OTHER); \ + is_signed_type(type), FILTER_OTHER, 0); \ if (ret) \ return ret; @@ -1588,12 +1603,17 @@ static int f_show(struct seq_file *m, void *v) seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", field->type, field->name, field->offset, field->size, !!field->is_signed); - else - seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", + else if (field->len) + seq_printf(m, "\tfield:%.*s %s[%d];\toffset:%u;\tsize:%u;\tsigned:%d;\n", (int)(array_descriptor - field->type), field->type, field->name, - array_descriptor, field->offset, + field->len, field->offset, field->size, !!field->is_signed); + else + seq_printf(m, "\tfield:%.*s %s[];\toffset:%u;\tsize:%u;\tsigned:%d;\n", + (int)(array_descriptor - field->type), + field->type, field->name, + field->offset, field->size, !!field->is_signed); return 0; } @@ -2379,9 +2399,10 @@ event_define_fields(struct trace_event_call *call) } offset = ALIGN(offset, field->align); - ret = trace_define_field(call, field->type, field->name, + ret = trace_define_field_ext(call, field->type, field->name, offset, field->size, - field->is_signed, field->filter_type); + field->is_signed, field->filter_type, + field->len); if (WARN_ON_ONCE(ret)) { pr_err("error code is %d\n", ret); break; diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index d960f6b11b5e..58f3946081e2 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -111,7 +111,8 @@ static void __always_unused ____ftrace_check_##name(void) \ #define __array(_type, _item, _len) { \ .type = #_type"["__stringify(_len)"]", .name = #_item, \ .size = sizeof(_type[_len]), .align = __alignof__(_type), \ - is_signed_type(_type), .filter_type = FILTER_OTHER }, + is_signed_type(_type), .filter_type = FILTER_OTHER, \ + .len = _len }, #undef __array_desc #define __array_desc(_type, _container, _item, _len) __array(_type, _item, _len) From 446ac8dd8997cd71befd726ce30520e6c35d80a5 Mon Sep 17 00:00:00 2001 From: Wander Lairson Costa Date: Thu, 2 Feb 2023 09:30:20 -0300 Subject: [PATCH 099/118] rtmutex: Ensure that the top waiter is always woken up commit db370a8b9f67ae5f17e3d5482493294467784504 upstream. Let L1 and L2 be two spinlocks. Let T1 be a task holding L1 and blocked on L2. T1, currently, is the top waiter of L2. Let T2 be the task holding L2. Let T3 be a task trying to acquire L1. The following events will lead to a state in which the wait queue of L2 isn't empty, but no task actually holds the lock. T1 T2 T3 == == == spin_lock(L1) | raw_spin_lock(L1->wait_lock) | rtlock_slowlock_locked(L1) | | task_blocks_on_rt_mutex(L1, T3) | | | orig_waiter->lock = L1 | | | orig_waiter->task = T3 | | | raw_spin_unlock(L1->wait_lock) | | | rt_mutex_adjust_prio_chain(T1, L1, L2, orig_waiter, T3) spin_unlock(L2) | | | | | rt_mutex_slowunlock(L2) | | | | | | raw_spin_lock(L2->wait_lock) | | | | | | wakeup(T1) | | | | | | raw_spin_unlock(L2->wait_lock) | | | | | | | | waiter = T1->pi_blocked_on | | | | waiter == rt_mutex_top_waiter(L2) | | | | waiter->task == T1 | | | | raw_spin_lock(L2->wait_lock) | | | | dequeue(L2, waiter) | | | | update_prio(waiter, T1) | | | | enqueue(L2, waiter) | | | | waiter != rt_mutex_top_waiter(L2) | | | | L2->owner == NULL | | | | wakeup(T1) | | | | raw_spin_unlock(L2->wait_lock) T1 wakes up T1 != top_waiter(L2) schedule_rtlock() If the deadline of T1 is updated before the call to update_prio(), and the new deadline is greater than the deadline of the second top waiter, then after the requeue, T1 is no longer the top waiter, and the wrong task is woken up which will then go back to sleep because it is not the top waiter. This can be reproduced in PREEMPT_RT with stress-ng: while true; do stress-ng --sched deadline --sched-period 1000000000 \ --sched-runtime 800000000 --sched-deadline \ 1000000000 --mmapfork 23 -t 20 done A similar issue was pointed out by Thomas versus the cases where the top waiter drops out early due to a signal or timeout, which is a general issue for all regular rtmutex use cases, e.g. futex. The problematic code is in rt_mutex_adjust_prio_chain(): // Save the top waiter before dequeue/enqueue prerequeue_top_waiter = rt_mutex_top_waiter(lock); rt_mutex_dequeue(lock, waiter); waiter_update_prio(waiter, task); rt_mutex_enqueue(lock, waiter); // Lock has no owner? if (!rt_mutex_owner(lock)) { // Top waiter changed ----> if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) ----> wake_up_state(waiter->task, waiter->wake_state); This only takes the case into account where @waiter is the new top waiter due to the requeue operation. But it fails to handle the case where @waiter is not longer the top waiter due to the requeue operation. Ensure that the new top waiter is woken up so in all cases so it can take over the ownerless lock. [ tglx: Amend changelog, add Fixes tag ] Fixes: c014ef69b3ac ("locking/rtmutex: Add wake_state to rt_mutex_waiter") Signed-off-by: Wander Lairson Costa Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230117172649.52465-1-wander@redhat.com Link: https://lore.kernel.org/r/20230202123020.14844-1-wander@redhat.com Signed-off-by: Greg Kroah-Hartman --- kernel/locking/rtmutex.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 010cf4e6d0b8..728f434de2bb 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -901,8 +901,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, * then we need to wake the new top waiter up to try * to get the lock. */ - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) - wake_up_state(waiter->task, waiter->wake_state); + top_waiter = rt_mutex_top_waiter(lock); + if (prerequeue_top_waiter != top_waiter) + wake_up_state(top_waiter->task, top_waiter->wake_state); raw_spin_unlock_irq(&lock->wait_lock); return 0; } From a01ad536becb5d4c001a7d50dc1ca9fa14ef75a8 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 9 Feb 2023 21:11:47 +0100 Subject: [PATCH 100/118] arm64: dts: meson-gx: Make mmc host controller interrupts level-sensitive commit 66e45351f7d6798751f98001d1fcd572024d87f0 upstream. The usage of edge-triggered interrupts lead to lost interrupts under load, see [0]. This was confirmed to be fixed by using level-triggered interrupts. The report was about SDIO. However, as the host controller is the same for SD and MMC, apply the change to all mmc controller instances. [0] https://www.spinics.net/lists/linux-mmc/msg73991.html Fixes: ef8d2ffedf18 ("ARM64: dts: meson-gxbb: add MMC support") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Acked-by: Neil Armstrong Link: https://lore.kernel.org/r/76e042e0-a610-5ed5-209f-c4d7f879df44@gmail.com Signed-off-by: Neil Armstrong Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/amlogic/meson-gx.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 023a52005494..fa6cff4a2ebc 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -602,21 +602,21 @@ sd_emmc_a: mmc@70000 { compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc"; reg = <0x0 0x70000 0x0 0x800>; - interrupts = ; + interrupts = ; status = "disabled"; }; sd_emmc_b: mmc@72000 { compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc"; reg = <0x0 0x72000 0x0 0x800>; - interrupts = ; + interrupts = ; status = "disabled"; }; sd_emmc_c: mmc@74000 { compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc"; reg = <0x0 0x74000 0x0 0x800>; - interrupts = ; + interrupts = ; status = "disabled"; }; }; From 222b1070b0fa12be25f9503a503724a899c6326b Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 9 Feb 2023 21:11:10 +0100 Subject: [PATCH 101/118] arm64: dts: meson-g12-common: Make mmc host controller interrupts level-sensitive commit ac8db4cceed218cca21c84f9d75ce88182d8b04f upstream. The usage of edge-triggered interrupts lead to lost interrupts under load, see [0]. This was confirmed to be fixed by using level-triggered interrupts. The report was about SDIO. However, as the host controller is the same for SD and MMC, apply the change to all mmc controller instances. [0] https://www.spinics.net/lists/linux-mmc/msg73991.html Fixes: 4759fd87b928 ("arm64: dts: meson: g12a: add mmc nodes") Tested-by: FUKAUMI Naoki Tested-by: Martin Blumenstingl Tested-by: Jerome Brunet Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Acked-by: Neil Armstrong Link: https://lore.kernel.org/r/27d89baa-b8fa-baca-541b-ef17a97cde3c@gmail.com Signed-off-by: Neil Armstrong Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index 45947c1031c4..894cea697550 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -2318,7 +2318,7 @@ sd_emmc_a: sd@ffe03000 { compatible = "amlogic,meson-axg-mmc"; reg = <0x0 0xffe03000 0x0 0x800>; - interrupts = ; + interrupts = ; status = "disabled"; clocks = <&clkc CLKID_SD_EMMC_A>, <&clkc CLKID_SD_EMMC_A_CLK0>, @@ -2330,7 +2330,7 @@ sd_emmc_b: sd@ffe05000 { compatible = "amlogic,meson-axg-mmc"; reg = <0x0 0xffe05000 0x0 0x800>; - interrupts = ; + interrupts = ; status = "disabled"; clocks = <&clkc CLKID_SD_EMMC_B>, <&clkc CLKID_SD_EMMC_B_CLK0>, @@ -2342,7 +2342,7 @@ sd_emmc_c: mmc@ffe07000 { compatible = "amlogic,meson-axg-mmc"; reg = <0x0 0xffe07000 0x0 0x800>; - interrupts = ; + interrupts = ; status = "disabled"; clocks = <&clkc CLKID_SD_EMMC_C>, <&clkc CLKID_SD_EMMC_C_CLK0>, From 274d9a28527d64a95107a1e62c71632065f0924f Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 9 Feb 2023 21:10:31 +0100 Subject: [PATCH 102/118] arm64: dts: meson-axg: Make mmc host controller interrupts level-sensitive commit d182bcf300772d8b2e5f43e47fa0ebda2b767cc4 upstream. The usage of edge-triggered interrupts lead to lost interrupts under load, see [0]. This was confirmed to be fixed by using level-triggered interrupts. The report was about SDIO. However, as the host controller is the same for SD and MMC, apply the change to all mmc controller instances. [0] https://www.spinics.net/lists/linux-mmc/msg73991.html Fixes: 221cf34bac54 ("ARM64: dts: meson-axg: enable the eMMC controller") Reported-by: Peter Suti Tested-by: Vyacheslav Bocharov Tested-by: Peter Suti Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Acked-by: Neil Armstrong Link: https://lore.kernel.org/r/c00655d3-02f8-6f5f-4239-ca2412420cad@gmail.com Signed-off-by: Neil Armstrong Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/amlogic/meson-axg.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi index 04f797b5a012..73cd1791a13f 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi @@ -1885,7 +1885,7 @@ sd_emmc_b: sd@5000 { compatible = "amlogic,meson-axg-mmc"; reg = <0x0 0x5000 0x0 0x800>; - interrupts = ; + interrupts = ; status = "disabled"; clocks = <&clkc CLKID_SD_EMMC_B>, <&clkc CLKID_SD_EMMC_B_CLK0>, @@ -1897,7 +1897,7 @@ sd_emmc_c: mmc@7000 { compatible = "amlogic,meson-axg-mmc"; reg = <0x0 0x7000 0x0 0x800>; - interrupts = ; + interrupts = ; status = "disabled"; clocks = <&clkc CLKID_SD_EMMC_C>, <&clkc CLKID_SD_EMMC_C_CLK0>, From 3b4c045a98f53a8890a94bb5846a390c8e39e673 Mon Sep 17 00:00:00 2001 From: David Chen Date: Thu, 9 Feb 2023 17:48:28 +0000 Subject: [PATCH 103/118] Fix page corruption caused by racy check in __free_pages commit 462a8e08e0e6287e5ce13187257edbf24213ed03 upstream. When we upgraded our kernel, we started seeing some page corruption like the following consistently: BUG: Bad page state in process ganesha.nfsd pfn:1304ca page:0000000022261c55 refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 pfn:0x1304ca flags: 0x17ffffc0000000() raw: 0017ffffc0000000 ffff8a513ffd4c98 ffffeee24b35ec08 0000000000000000 raw: 0000000000000000 0000000000000001 00000000ffffff7f 0000000000000000 page dumped because: nonzero mapcount CPU: 0 PID: 15567 Comm: ganesha.nfsd Kdump: loaded Tainted: P B O 5.10.158-1.nutanix.20221209.el7.x86_64 #1 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/05/2016 Call Trace: dump_stack+0x74/0x96 bad_page.cold+0x63/0x94 check_new_page_bad+0x6d/0x80 rmqueue+0x46e/0x970 get_page_from_freelist+0xcb/0x3f0 ? _cond_resched+0x19/0x40 __alloc_pages_nodemask+0x164/0x300 alloc_pages_current+0x87/0xf0 skb_page_frag_refill+0x84/0x110 ... Sometimes, it would also show up as corruption in the free list pointer and cause crashes. After bisecting the issue, we found the issue started from commit e320d3012d25 ("mm/page_alloc.c: fix freeing non-compound pages"): if (put_page_testzero(page)) free_the_page(page, order); else if (!PageHead(page)) while (order-- > 0) free_the_page(page + (1 << order), order); So the problem is the check PageHead is racy because at this point we already dropped our reference to the page. So even if we came in with compound page, the page can already be freed and PageHead can return false and we will end up freeing all the tail pages causing double free. Fixes: e320d3012d25 ("mm/page_alloc.c: fix freeing non-compound pages") Link: https://lore.kernel.org/lkml/BYAPR02MB448855960A9656EEA81141FC94D99@BYAPR02MB4488.namprd02.prod.outlook.com/ Cc: Andrew Morton Cc: stable@vger.kernel.org Signed-off-by: Chunwei Chen Reviewed-by: Vlastimil Babka Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6e60657875d3..b2877a84ed19 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5640,9 +5640,12 @@ EXPORT_SYMBOL(get_zeroed_page); */ void __free_pages(struct page *page, unsigned int order) { + /* get PageHead before we drop reference */ + int head = PageHead(page); + if (put_page_testzero(page)) free_the_page(page, order); - else if (!PageHead(page)) + else if (!head) while (order-- > 0) free_the_page(page + (1 << order), order); } From 346631d52a42ca29c6673637c7d0cb493df72748 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Wed, 8 Feb 2023 16:28:21 -0800 Subject: [PATCH 104/118] arm64: efi: Force the use of SetVirtualAddressMap() on eMAG and Altra Max machines commit 190233164cd77115f8dea718cbac561f557092c6 upstream. Commit 550b33cfd445 ("arm64: efi: Force the use of SetVirtualAddressMap() on Altra machines") identifies the Altra family via the family field in the type#1 SMBIOS record. eMAG and Altra Max machines are similarly affected but not detected with the strict strcmp test. The type1_family smbios string is not an entirely reliable means of identifying systems with this issue as OEMs can, and do, use their own strings for these fields. However, until we have a better solution, capture the bulk of these systems by adding strcmp matching for "eMAG" and "Altra Max". Fixes: 550b33cfd445 ("arm64: efi: Force the use of SetVirtualAddressMap() on Altra machines") Cc: # 6.1.x Cc: Alexandru Elisei Signed-off-by: Darren Hart Tested-by: Justin He Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/arm64-stub.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index f9de5217ea65..42282c5c3fe6 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -20,10 +20,13 @@ static bool system_needs_vamap(void) const u8 *type1_family = efi_get_smbios_string(1, family); /* - * Ampere Altra machines crash in SetTime() if SetVirtualAddressMap() - * has not been called prior. + * Ampere eMAG, Altra, and Altra Max machines crash in SetTime() if + * SetVirtualAddressMap() has not been called prior. */ - if (!type1_family || strcmp(type1_family, "Altra")) + if (!type1_family || ( + strcmp(type1_family, "eMAG") && + strcmp(type1_family, "Altra") && + strcmp(type1_family, "Altra Max"))) return false; efi_warn("Working around broken SetVirtualAddressMap()\n"); From daaa0760cd0b6cf5985384d24e991f6c23a236e7 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 31 Jan 2023 10:40:09 +0800 Subject: [PATCH 105/118] drm/amd/pm: bump SMU 13.0.0 driver_if header version commit 9874cc2df4e892c8744aa0472866cbf7c3cf1862 upstream. This can suppress the warning caused by version mismatch. Signed-off-by: Evan Quan Acked-by: Alex Deucher Acked-by: Guchun Chen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- .../drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 5 +++-- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h index d6b964cf73bd..4bc7aee4d44f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h @@ -123,7 +123,8 @@ (1 << FEATURE_DS_FCLK_BIT) | \ (1 << FEATURE_DS_LCLK_BIT) | \ (1 << FEATURE_DS_DCFCLK_BIT) | \ - (1 << FEATURE_DS_UCLK_BIT)) + (1 << FEATURE_DS_UCLK_BIT) | \ + (1ULL << FEATURE_DS_VCN_BIT)) //For use with feature control messages typedef enum { @@ -522,9 +523,9 @@ typedef enum { TEMP_HOTSPOT_M, TEMP_MEM, TEMP_VR_GFX, - TEMP_VR_SOC, TEMP_VR_MEM0, TEMP_VR_MEM1, + TEMP_VR_SOC, TEMP_VR_U, TEMP_LIQUID0, TEMP_LIQUID1, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index e8c6febb8b64..d9c4821bcfc8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -28,7 +28,7 @@ #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x34 +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x37 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32 From e379d5662c26b65384d95674559f9343be132851 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Mon, 6 Feb 2023 12:21:42 -0500 Subject: [PATCH 106/118] drm/amdgpu: Add unique_id support for GC 11.0.1/2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c108a18462949fe709ebd6b0be68398d643bc285 upstream. These can support unique_id, so create the sysfs file for them Signed-off-by: Kent Russell Reviewed-by: Harish Kasiviswanathan Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 236657eece47..41635694e521 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1991,6 +1991,8 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ case IP_VERSION(9, 4, 2): case IP_VERSION(10, 3, 0): case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 2): *states = ATTR_STATE_SUPPORTED; break; default: From d82e6903b395aafc5c94b7282eccdd13903942a3 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 7 Feb 2023 10:42:31 +0800 Subject: [PATCH 107/118] drm/amd/pm: bump SMU 13.0.7 driver_if header version commit dc38b996db968f51f0fe45845a519c5cd7f6bd04 upstream. This can suppress the warning caused by version mismatch. Signed-off-by: Evan Quan Acked-by: Alex Deucher Acked-by: Guchun Chen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- .../inc/pmfw_if/smu13_driver_if_v13_0_7.h | 29 ++++++++++--------- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h index d6b13933a98f..48a3a3952ceb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h @@ -113,20 +113,21 @@ #define NUM_FEATURES 64 #define ALLOWED_FEATURE_CTRL_DEFAULT 0xFFFFFFFFFFFFFFFFULL -#define ALLOWED_FEATURE_CTRL_SCPM (1 << FEATURE_DPM_GFXCLK_BIT) | \ - (1 << FEATURE_DPM_GFX_POWER_OPTIMIZER_BIT) | \ - (1 << FEATURE_DPM_UCLK_BIT) | \ - (1 << FEATURE_DPM_FCLK_BIT) | \ - (1 << FEATURE_DPM_SOCCLK_BIT) | \ - (1 << FEATURE_DPM_MP0CLK_BIT) | \ - (1 << FEATURE_DPM_LINK_BIT) | \ - (1 << FEATURE_DPM_DCN_BIT) | \ - (1 << FEATURE_DS_GFXCLK_BIT) | \ - (1 << FEATURE_DS_SOCCLK_BIT) | \ - (1 << FEATURE_DS_FCLK_BIT) | \ - (1 << FEATURE_DS_LCLK_BIT) | \ - (1 << FEATURE_DS_DCFCLK_BIT) | \ - (1 << FEATURE_DS_UCLK_BIT) +#define ALLOWED_FEATURE_CTRL_SCPM ((1 << FEATURE_DPM_GFXCLK_BIT) | \ + (1 << FEATURE_DPM_GFX_POWER_OPTIMIZER_BIT) | \ + (1 << FEATURE_DPM_UCLK_BIT) | \ + (1 << FEATURE_DPM_FCLK_BIT) | \ + (1 << FEATURE_DPM_SOCCLK_BIT) | \ + (1 << FEATURE_DPM_MP0CLK_BIT) | \ + (1 << FEATURE_DPM_LINK_BIT) | \ + (1 << FEATURE_DPM_DCN_BIT) | \ + (1 << FEATURE_DS_GFXCLK_BIT) | \ + (1 << FEATURE_DS_SOCCLK_BIT) | \ + (1 << FEATURE_DS_FCLK_BIT) | \ + (1 << FEATURE_DS_LCLK_BIT) | \ + (1 << FEATURE_DS_DCFCLK_BIT) | \ + (1 << FEATURE_DS_UCLK_BIT) | \ + (1ULL << FEATURE_DS_VCN_BIT)) //For use with feature control messages typedef enum { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index d9c4821bcfc8..992163e66f7b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -32,7 +32,7 @@ #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x35 +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x37 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_10 0x1D #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms From 2bcbbef9cace772f5b7128b11401c515982de34b Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Thu, 2 Feb 2023 10:48:56 -0300 Subject: [PATCH 108/118] drm/amdgpu/fence: Fix oops due to non-matching drm_sched init/fini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5ad7bbf3dba5c4a684338df1f285080f2588b535 upstream. Currently amdgpu calls drm_sched_fini() from the fence driver sw fini routine - such function is expected to be called only after the respective init function - drm_sched_init() - was executed successfully. Happens that we faced a driver probe failure in the Steam Deck recently, and the function drm_sched_fini() was called even without its counter-part had been previously called, causing the following oops: amdgpu: probe of 0000:04:00.0 failed with error -110 BUG: kernel NULL pointer dereference, address: 0000000000000090 PGD 0 P4D 0 Oops: 0002 [#1] PREEMPT SMP NOPTI CPU: 0 PID: 609 Comm: systemd-udevd Not tainted 6.2.0-rc3-gpiccoli #338 Hardware name: Valve Jupiter/Jupiter, BIOS F7A0113 11/04/2022 RIP: 0010:drm_sched_fini+0x84/0xa0 [gpu_sched] [...] Call Trace: amdgpu_fence_driver_sw_fini+0xc8/0xd0 [amdgpu] amdgpu_device_fini_sw+0x2b/0x3b0 [amdgpu] amdgpu_driver_release_kms+0x16/0x30 [amdgpu] devm_drm_dev_init_release+0x49/0x70 [...] To prevent that, check if the drm_sched was properly initialized for a given ring before calling its fini counter-part. Notice ideally we'd use sched.ready for that; such field is set as the latest thing on drm_sched_init(). But amdgpu seems to "override" the meaning of such field - in the above oops for example, it was a GFX ring causing the crash, and the sched.ready field was set to true in the ring init routine, regardless of the state of the DRM scheduler. Hence, we ended-up using sched.ops as per Christian's suggestion [0], and also removed the no_scheduler check [1]. [0] https://lore.kernel.org/amd-gfx/984ee981-2906-0eaf-ccec-9f80975cb136@amd.com/ [1] https://lore.kernel.org/amd-gfx/cd0e2994-f85f-d837-609f-7056d5fb7231@amd.com/ Fixes: 067f44c8b459 ("drm/amdgpu: avoid over-handle of fence driver fini in s3 test (v2)") Suggested-by: Christian König Cc: Guchun Chen Cc: Luben Tuikov Cc: Mario Limonciello Reviewed-by: Luben Tuikov Signed-off-by: Guilherme G. Piccoli Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index d0d99ed607dd..6fdb679321d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -564,7 +564,13 @@ void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; - if (!ring->no_scheduler) + /* + * Notice we check for sched.ops since there's some + * override on the meaning of sched.ready by amdgpu. + * The natural check would be sched.ready, which is + * set as drm_sched_init() finishes... + */ + if (ring->sched.ops) drm_sched_fini(&ring->sched); for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) From 4609e1773222670af42e71c3a7bbf0453438d74a Mon Sep 17 00:00:00 2001 From: Jane Jian Date: Fri, 13 Jan 2023 18:53:45 +0800 Subject: [PATCH 109/118] drm/amdgpu/smu: skip pptable init under sriov commit c6ac406cd8ff610a2d5da298b1d3071acfcde7f0 upstream. sriov does not need to init pptable from amdgpu driver we finish it from PF Signed-off-by: Jane Jian Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index cf96c3f2affe..508e392547d7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -407,6 +407,9 @@ static int smu_v13_0_0_setup_pptable(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; int ret = 0; + if (amdgpu_sriov_vf(smu->adev)) + return 0; + ret = smu_v13_0_0_get_pptable_from_pmfw(smu, &smu_table->power_play_table, &smu_table->power_play_table_size); @@ -1257,6 +1260,9 @@ static int smu_v13_0_0_get_thermal_temperature_range(struct smu_context *smu, table_context->power_play_table; PPTable_t *pptable = smu->smu_table.driver_pptable; + if (amdgpu_sriov_vf(smu->adev)) + return 0; + if (!range) return -EINVAL; From 8d81e1c6865e7114d9829b0f3c73a66cd3a0963d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 31 Jan 2023 09:56:46 -0500 Subject: [PATCH 110/118] drm/amd/display: properly handling AGP aperture in vm setup commit 5c4e8c71d1202cd84d870e7e5cb8d6b52f9c3507 upstream. Take into account whether or not the AGP aperture is enabled or not when calculating the system aperture. Fixes white screens with DCN 3.1.4. Based on a patch from Yifan Zhang Cc: Yifan Zhang Acked-by: Harry Wentland Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 42 ++++++++++++------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b425ec00817c..988b1c947aef 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1193,24 +1193,38 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ memset(pa_config, 0, sizeof(*pa_config)); - logical_addr_low = min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18; - pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); - - if (adev->apu_flags & AMD_APU_IS_RAVEN2) - /* - * Raven2 has a HW issue that it is unable to use the vram which - * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the - * workaround that increase system aperture high address (add 1) - * to get rid of the VM fault and hardware hang. - */ - logical_addr_high = max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18); - else - logical_addr_high = max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18; - agp_base = 0; agp_bot = adev->gmc.agp_start >> 24; agp_top = adev->gmc.agp_end >> 24; + /* AGP aperture is disabled */ + if (agp_bot == agp_top) { + logical_addr_low = adev->gmc.vram_start >> 18; + if (adev->apu_flags & AMD_APU_IS_RAVEN2) + /* + * Raven2 has a HW issue that it is unable to use the vram which + * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the + * workaround that increase system aperture high address (add 1) + * to get rid of the VM fault and hardware hang. + */ + logical_addr_high = (adev->gmc.fb_end >> 18) + 0x1; + else + logical_addr_high = adev->gmc.vram_end >> 18; + } else { + logical_addr_low = min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18; + if (adev->apu_flags & AMD_APU_IS_RAVEN2) + /* + * Raven2 has a HW issue that it is unable to use the vram which + * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the + * workaround that increase system aperture high address (add 1) + * to get rid of the VM fault and hardware hang. + */ + logical_addr_high = max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18); + else + logical_addr_high = max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18; + } + + pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); page_table_start.high_part = (u32)(adev->gmc.gart_start >> 44) & 0xF; page_table_start.low_part = (u32)(adev->gmc.gart_start >> 12); From 5af27a53a881ed49e071875c10760b0fc13996a1 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Tue, 31 Jan 2023 15:05:46 -0100 Subject: [PATCH 111/118] drm/amd/display: fix cursor offset on rotation 180 commit 49d0555976f0972af68397ed996375c135b38ba7 upstream. Cursor gets clipped off in the middle of the screen with hw rotation 180. Fix a miscalculation of cursor offset when it's placed near the edges in the pipe split case. Cursor bugs with hw rotation were reported on AMD issue tracker: https://gitlab.freedesktop.org/drm/amd/-/issues/2247 The issues on rotation 270 was fixed by: https://lore.kernel.org/amd-gfx/20221118125935.4013669-22-Brian.Chang@amd.com/ that partially addressed the rotation 180 too. So, this patch is the final bits for rotation 180. Reported-by: Xaver Hugl Reviewed-by: Harry Wentland Fixes: 9d84c7ef8a87 ("drm/amd/display: Correct cursor position on horizontal mirror") Signed-off-by: Melissa Wen Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index c06538c37a11..55d63d860ef1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -3612,7 +3612,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = temp_x + viewport_width; + pos_cpy.x = 2 * viewport_width - temp_x; } } } else { From bfa700d12274bd3509b64fd545d1046eef89d564 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 3 Feb 2023 08:49:20 -0800 Subject: [PATCH 112/118] drm/i915: Move fd_install after last use of fence commit 251e8c5b1b1fadcc387a8e618c7437d330bdac3e upstream. Because eb_composite_fence_create() drops the fence_array reference after creation of the sync_file, only the sync_file holds a ref to the fence. But fd_install() makes that reference visable to userspace, so it must be the last thing we do with the fence. Signed-off-by: Rob Clark Fixes: 00dae4d3d35d ("drm/i915: Implement SINGLE_TIMELINE with a syncobj (v4)") Cc: # v5.15+ [tursulin: Added stable tag.] Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20230203164937.4035503-1-robdclark@gmail.com (cherry picked from commit 960dafa30455450d318756a9896a02727f2639e0) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index f461e34cc5f0..0a123bb44c9f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3478,6 +3478,13 @@ err_request: eb.composite_fence : &eb.requests[0]->fence); + if (unlikely(eb.gem_context->syncobj)) { + drm_syncobj_replace_fence(eb.gem_context->syncobj, + eb.composite_fence ? + eb.composite_fence : + &eb.requests[0]->fence); + } + if (out_fence) { if (err == 0) { fd_install(out_fence_fd, out_fence->file); @@ -3489,13 +3496,6 @@ err_request: } } - if (unlikely(eb.gem_context->syncobj)) { - drm_syncobj_replace_fence(eb.gem_context->syncobj, - eb.composite_fence ? - eb.composite_fence : - &eb.requests[0]->fence); - } - if (!out_fence && eb.composite_fence) dma_fence_put(eb.composite_fence); From baaed8c92927e2530a799daf2e909e6688dc2943 Mon Sep 17 00:00:00 2001 From: Aravind Iddamsetty Date: Fri, 3 Feb 2023 19:22:05 +0530 Subject: [PATCH 113/118] drm/i915: Initialize the obj flags for shmem objects commit 44e4c5684fcc82d8f099656c4ea39d9571e2a8ac upstream. Obj flags for shmem objects is not being set correctly. Fixes in setting BO_ALLOC_USER flag which applies to shmem objs as well. v2: Add fixes tag (Tvrtko, Matt A) Fixes: 13d29c823738 ("drm/i915/ehl: unconditionally flush the pages on acquire") Cc: # v5.15+ Cc: Matthew Auld Cc: Tvrtko Ursulin Reviewed-by: Matthew Auld Signed-off-by: Aravind Iddamsetty Reviewed-by: Andrzej Hajda Signed-off-by: Tvrtko Ursulin [tursulin: Grouped all tags together.] Link: https://patchwork.freedesktop.org/patch/msgid/20230203135205.4051149-1-aravind.iddamsetty@intel.com (cherry picked from commit bca0d1d3ceeb07be45a51c0fa4d57a0ce31b6aed) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 2f7804492cd5..b7eae3aee166 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -579,7 +579,7 @@ static int shmem_object_init(struct intel_memory_region *mem, mapping_set_gfp_mask(mapping, mask); GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); - i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, 0); + i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, flags); obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; From 7fa83855852e4b13dd05702569f3f0bb3e1c624d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 7 Feb 2023 08:43:35 +0200 Subject: [PATCH 114/118] drm/i915: Fix VBT DSI DVO port handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6a7ff131f17f44c593173c5ee30e2c03ef211685 upstream. Turns out modern (icl+) VBTs still declare their DSI ports as MIPI-A and MIPI-C despite the PHYs now being A and B. Remap appropriately to allow the panels declared as MIPI-C to work. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8016 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230207064337.18697-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 118b5c136c04da705b274b0d39982bb8b7430fc5) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_bios.c | 33 ++++++++++++++++------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index edbdb949b6ce..178a8cbb7583 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -2466,6 +2466,22 @@ static enum port dvo_port_to_port(struct drm_i915_private *i915, dvo_port); } +static enum port +dsi_dvo_port_to_port(struct drm_i915_private *i915, u8 dvo_port) +{ + switch (dvo_port) { + case DVO_PORT_MIPIA: + return PORT_A; + case DVO_PORT_MIPIC: + if (DISPLAY_VER(i915) >= 11) + return PORT_B; + else + return PORT_C; + default: + return PORT_NONE; + } +} + static int parse_bdb_230_dp_max_link_rate(const int vbt_max_link_rate) { switch (vbt_max_link_rate) { @@ -3406,19 +3422,16 @@ bool intel_bios_is_dsi_present(struct drm_i915_private *i915, dvo_port = child->dvo_port; - if (dvo_port == DVO_PORT_MIPIA || - (dvo_port == DVO_PORT_MIPIB && DISPLAY_VER(i915) >= 11) || - (dvo_port == DVO_PORT_MIPIC && DISPLAY_VER(i915) < 11)) { - if (port) - *port = dvo_port - DVO_PORT_MIPIA; - return true; - } else if (dvo_port == DVO_PORT_MIPIB || - dvo_port == DVO_PORT_MIPIC || - dvo_port == DVO_PORT_MIPID) { + if (dsi_dvo_port_to_port(i915, dvo_port) == PORT_NONE) { drm_dbg_kms(&i915->drm, "VBT has unsupported DSI port %c\n", port_name(dvo_port - DVO_PORT_MIPIA)); + continue; } + + if (port) + *port = dsi_dvo_port_to_port(i915, dvo_port); + return true; } return false; @@ -3503,7 +3516,7 @@ bool intel_bios_get_dsc_params(struct intel_encoder *encoder, if (!(child->device_type & DEVICE_TYPE_MIPI_OUTPUT)) continue; - if (child->dvo_port - DVO_PORT_MIPIA == encoder->port) { + if (dsi_dvo_port_to_port(i915, child->dvo_port) == encoder->port) { if (!devdata->dsc) return false; From cc95b5d240b631e42e2863e1dcb6ad83920cc449 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 9 Feb 2023 09:22:24 -0600 Subject: [PATCH 115/118] x86/speculation: Identify processors vulnerable to SMT RSB predictions commit be8de49bea505e7777a69ef63d60e02ac1712683 upstream. Certain AMD processors are vulnerable to a cross-thread return address predictions bug. When running in SMT mode and one of the sibling threads transitions out of C0 state, the other sibling thread could use return target predictions from the sibling thread that transitioned out of C0. The Spectre v2 mitigations cover the Linux kernel, as it fills the RSB when context switching to the idle thread. However, KVM allows a VMM to prevent exiting guest mode when transitioning out of C0. A guest could act maliciously in this situation, so create a new x86 BUG that can be used to detect if the processor is vulnerable. Reviewed-by: Borislav Petkov (AMD) Signed-off-by: Tom Lendacky Message-Id: <91cec885656ca1fcd4f0185ce403a53dd9edecb7.1675956146.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/common.c | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b2da7cb64b31..92729c38853d 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -463,5 +463,6 @@ #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ #define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3e508f239098..e80572b674b7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1235,6 +1235,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define MMIO_SBDS BIT(2) /* CPU is affected by RETbleed, speculating where you would not expect it */ #define RETBLEED BIT(3) +/* CPU is affected by SMT (cross-thread) return predictions */ +#define SMT_RSB BIT(4) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1266,8 +1268,8 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), - VULNBL_AMD(0x17, RETBLEED), - VULNBL_HYGON(0x18, RETBLEED), + VULNBL_AMD(0x17, RETBLEED | SMT_RSB), + VULNBL_HYGON(0x18, RETBLEED | SMT_RSB), {} }; @@ -1385,6 +1387,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !(ia32_cap & ARCH_CAP_PBRSB_NO)) setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); + if (cpu_matches(cpu_vuln_blacklist, SMT_RSB)) + setup_force_cpu_bug(X86_BUG_SMT_RSB); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; From 40c4fdfc942e0c93054884546bf785fe24c6831e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 9 Feb 2023 09:22:25 -0600 Subject: [PATCH 116/118] KVM: x86: Mitigate the cross-thread return address predictions bug commit 6f0f2d5ef895d66a3f2b32dd05189ec34afa5a55 upstream. By default, KVM/SVM will intercept attempts by the guest to transition out of C0. However, the KVM_CAP_X86_DISABLE_EXITS capability can be used by a VMM to change this behavior. To mitigate the cross-thread return address predictions bug (X86_BUG_SMT_RSB), a VMM must not be allowed to override the default behavior to intercept C0 transitions. Use a module parameter to control the mitigation on processors that are vulnerable to X86_BUG_SMT_RSB. If the processor is vulnerable to the X86_BUG_SMT_RSB bug and the module parameter is set to mitigate the bug, KVM will not allow the disabling of the HLT, MWAIT and CSTATE exits. Signed-off-by: Tom Lendacky Message-Id: <4019348b5e07148eb4d593380a5f6713b93c9a16.1675956146.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 69227f77b201..a0c35b948c30 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -192,6 +192,10 @@ module_param(enable_pmu, bool, 0444); bool __read_mostly eager_page_split = true; module_param(eager_page_split, bool, 0644); +/* Enable/disable SMT_RSB bug mitigation */ +bool __read_mostly mitigate_smt_rsb; +module_param(mitigate_smt_rsb, bool, 0444); + /* * Restoring the host value for MSRs that are only consumed when running in * usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU @@ -4435,10 +4439,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_CLOCK_VALID_FLAGS; break; case KVM_CAP_X86_DISABLE_EXITS: - r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE | - KVM_X86_DISABLE_EXITS_CSTATE; - if(kvm_can_mwait_in_guest()) - r |= KVM_X86_DISABLE_EXITS_MWAIT; + r = KVM_X86_DISABLE_EXITS_PAUSE; + + if (!mitigate_smt_rsb) { + r |= KVM_X86_DISABLE_EXITS_HLT | + KVM_X86_DISABLE_EXITS_CSTATE; + + if (kvm_can_mwait_in_guest()) + r |= KVM_X86_DISABLE_EXITS_MWAIT; + } break; case KVM_CAP_X86_SMM: /* SMBASE is usually relocated above 1M on modern chipsets, @@ -6214,15 +6223,26 @@ split_irqchip_unlock: if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS) break; - if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && - kvm_can_mwait_in_guest()) - kvm->arch.mwait_in_guest = true; - if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT) - kvm->arch.hlt_in_guest = true; if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) kvm->arch.pause_in_guest = true; - if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) - kvm->arch.cstate_in_guest = true; + +#define SMT_RSB_MSG "This processor is affected by the Cross-Thread Return Predictions vulnerability. " \ + "KVM_CAP_X86_DISABLE_EXITS should only be used with SMT disabled or trusted guests." + + if (!mitigate_smt_rsb) { + if (boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible() && + (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE)) + pr_warn_once(SMT_RSB_MSG); + + if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && + kvm_can_mwait_in_guest()) + kvm->arch.mwait_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT) + kvm->arch.hlt_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) + kvm->arch.cstate_in_guest = true; + } + r = 0; break; case KVM_CAP_MSR_PLATFORM_INFO: @@ -13730,6 +13750,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit); static int __init kvm_x86_init(void) { kvm_mmu_x86_module_init(); + mitigate_smt_rsb &= boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible(); return 0; } module_init(kvm_x86_init); From da1ae884562cc22e2705113cc39712477e37ab4e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 9 Feb 2023 09:22:26 -0600 Subject: [PATCH 117/118] Documentation/hw-vuln: Add documentation for Cross-Thread Return Predictions commit 493a2c2d23ca91afba96ac32b6cbafb54382c2a3 upstream. Add the admin guide for the Cross-Thread Return Predictions vulnerability. Signed-off-by: Tom Lendacky Message-Id: <60f9c0b4396956ce70499ae180cb548720b25c7e.1675956146.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/hw-vuln/cross-thread-rsb.rst | 92 +++++++++++++++++++ Documentation/admin-guide/hw-vuln/index.rst | 1 + 2 files changed, 93 insertions(+) create mode 100644 Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst diff --git a/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst b/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst new file mode 100644 index 000000000000..ec6e9f5bcf9e --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst @@ -0,0 +1,92 @@ + +.. SPDX-License-Identifier: GPL-2.0 + +Cross-Thread Return Address Predictions +======================================= + +Certain AMD and Hygon processors are subject to a cross-thread return address +predictions vulnerability. When running in SMT mode and one sibling thread +transitions out of C0 state, the other sibling thread could use return target +predictions from the sibling thread that transitioned out of C0. + +The Spectre v2 mitigations protect the Linux kernel, as it fills the return +address prediction entries with safe targets when context switching to the idle +thread. However, KVM does allow a VMM to prevent exiting guest mode when +transitioning out of C0. This could result in a guest-controlled return target +being consumed by the sibling thread. + +Affected processors +------------------- + +The following CPUs are vulnerable: + + - AMD Family 17h processors + - Hygon Family 18h processors + +Related CVEs +------------ + +The following CVE entry is related to this issue: + + ============== ======================================= + CVE-2022-27672 Cross-Thread Return Address Predictions + ============== ======================================= + +Problem +------- + +Affected SMT-capable processors support 1T and 2T modes of execution when SMT +is enabled. In 2T mode, both threads in a core are executing code. For the +processor core to enter 1T mode, it is required that one of the threads +requests to transition out of the C0 state. This can be communicated with the +HLT instruction or with an MWAIT instruction that requests non-C0. +When the thread re-enters the C0 state, the processor transitions back +to 2T mode, assuming the other thread is also still in C0 state. + +In affected processors, the return address predictor (RAP) is partitioned +depending on the SMT mode. For instance, in 2T mode each thread uses a private +16-entry RAP, but in 1T mode, the active thread uses a 32-entry RAP. Upon +transition between 1T/2T mode, the RAP contents are not modified but the RAP +pointers (which control the next return target to use for predictions) may +change. This behavior may result in return targets from one SMT thread being +used by RET predictions in the sibling thread following a 1T/2T switch. In +particular, a RET instruction executed immediately after a transition to 1T may +use a return target from the thread that just became idle. In theory, this +could lead to information disclosure if the return targets used do not come +from trustworthy code. + +Attack scenarios +---------------- + +An attack can be mounted on affected processors by performing a series of CALL +instructions with targeted return locations and then transitioning out of C0 +state. + +Mitigation mechanism +-------------------- + +Before entering idle state, the kernel context switches to the idle thread. The +context switch fills the RAP entries (referred to as the RSB in Linux) with safe +targets by performing a sequence of CALL instructions. + +Prevent a guest VM from directly putting the processor into an idle state by +intercepting HLT and MWAIT instructions. + +Both mitigations are required to fully address this issue. + +Mitigation control on the kernel command line +--------------------------------------------- + +Use existing Spectre v2 mitigations that will fill the RSB on context switch. + +Mitigation control for KVM - module parameter +--------------------------------------------- + +By default, the KVM hypervisor mitigates this issue by intercepting guest +attempts to transition out of C0. A VMM can use the KVM_CAP_X86_DISABLE_EXITS +capability to override those interceptions, but since this is not common, the +mitigation that covers this path is not enabled by default. + +The mitigation for the KVM_CAP_X86_DISABLE_EXITS capability can be turned on +using the boolean module parameter mitigate_smt_rsb, e.g.: + kvm.mitigate_smt_rsb=1 diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 4df436e7c417..e0614760a99e 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -18,3 +18,4 @@ are configurable at compile, boot or run time. core-scheduling.rst l1d_flush.rst processor_mmio_stale_data.rst + cross-thread-rsb.rst From 129c15b606278f9254a16013f7e5a94a128d9bcd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Feb 2023 19:11:56 +0100 Subject: [PATCH 118/118] Linux 6.1.12 Link: https://lore.kernel.org/r/20230213144742.219399167@linuxfoundation.org Tested-by: Ronald Warsow Tested-by: Florian Fainelli Tested-by: Conor Dooley Tested-by: Allen Pais Tested-by: Justin M. Forbes Tested-by: Shuah Khan Tested-by: Bagas Sanjaya Tested-by: Ron Economos Tested-by: Linux Kernel Functional Testing Tested-by: Sudip Mukherjee Tested-by: Salvatore Bonaccorso Tested-by: Jon Hunter Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e039f2af1772..23390805e521 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 11 +SUBLEVEL = 12 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth