From fd1aeef3e9c7d9c469d28457d762f8f373f6393a Mon Sep 17 00:00:00 2001 From: Christian Heusel Date: Thu, 24 Apr 2025 16:00:28 +0200 Subject: [PATCH 001/129] Revert "rndis_host: Flag RNDIS modems as WWAN devices" commit 765f253e28909f161b0211f85cf0431cfee7d6df upstream. This reverts commit 67d1a8956d2d62fe6b4c13ebabb57806098511d8. Since this commit has been proven to be problematic for the setup of USB-tethered ethernet connections and the related breakage is very noticeable for users it should be reverted until a fixed version of the change can be rolled out. Closes: https://lore.kernel.org/all/e0df2d85-1296-4317-b717-bd757e3ab928@heusel.eu/ Link: https://chaos.social/@gromit/114377862699921553 Link: https://bugzilla.kernel.org/show_bug.cgi?id=220002 Link: https://bugs.gentoo.org/953555 Link: https://bbs.archlinux.org/viewtopic.php?id=304892 Cc: stable@vger.kernel.org Acked-by: Lubomir Rintel Signed-off-by: Christian Heusel Link: https://patch.msgid.link/20250424-usb-tethering-fix-v1-1-b65cf97c740e@heusel.eu Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/rndis_host.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index bb0bf1415872..7b3739b29c8f 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -630,16 +630,6 @@ static const struct driver_info zte_rndis_info = { .tx_fixup = rndis_tx_fixup, }; -static const struct driver_info wwan_rndis_info = { - .description = "Mobile Broadband RNDIS device", - .flags = FLAG_WWAN | FLAG_POINTTOPOINT | FLAG_FRAMING_RN | FLAG_NO_SETINT, - .bind = rndis_bind, - .unbind = rndis_unbind, - .status = rndis_status, - .rx_fixup = rndis_rx_fixup, - .tx_fixup = rndis_tx_fixup, -}; - /*-------------------------------------------------------------------------*/ static const struct usb_device_id products [] = { @@ -676,11 +666,9 @@ static const struct usb_device_id products [] = { USB_INTERFACE_INFO(USB_CLASS_WIRELESS_CONTROLLER, 1, 3), .driver_info = (unsigned long) &rndis_info, }, { - /* Mobile Broadband Modem, seen in Novatel Verizon USB730L and - * Telit FN990A (RNDIS) - */ + /* Novatel Verizon USB730L */ USB_INTERFACE_INFO(USB_CLASS_MISC, 4, 1), - .driver_info = (unsigned long)&wwan_rndis_info, + .driver_info = (unsigned long) &rndis_info, }, { }, // END }; From f73ecbfa78ce650f24424c9dea46747eb0809e16 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Thu, 17 Apr 2025 04:19:23 +0930 Subject: [PATCH 002/129] ALSA: usb-audio: Add retry on -EPROTO from usb_set_interface() commit f406005e162b660dc405b4f18bf7bcb93a515608 upstream. During initialisation of Focusrite USB audio interfaces, -EPROTO is sometimes returned from usb_set_interface(), which sometimes prevents the device from working: subsequent usb_set_interface() and uac_clock_source_is_valid() calls fail. This patch adds up to 5 retries in endpoint_set_interface(), with a delay starting at 5ms and doubling each time. 5 retries was chosen to allow for longer than expected waits for the interface to start responding correctly; in testing, a single 5ms delay was sufficient to fix the issue. Closes: https://github.com/geoffreybennett/fcp-support/issues/2 Cc: stable@vger.kernel.org Signed-off-by: Geoffrey D. Bennett Link: https://patch.msgid.link/Z//7s9dKsmVxHzY2@m.b4.vu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/endpoint.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 68aa174be12d..f71f6ff3e2b0 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -926,14 +926,21 @@ static int endpoint_set_interface(struct snd_usb_audio *chip, { int altset = set ? ep->altsetting : 0; int err; + int retries = 0; + const int max_retries = 5; if (ep->iface_ref->altset == altset) return 0; usb_audio_dbg(chip, "Setting usb interface %d:%d for EP 0x%x\n", ep->iface, altset, ep->ep_num); +retry: err = usb_set_interface(chip->dev, ep->iface, altset); if (err < 0) { + if (err == -EPROTO && ++retries <= max_retries) { + msleep(5 * (1 << (retries - 1))); + goto retry; + } usb_audio_err_ratelimited( chip, "%d:%d: usb_set_interface failed (%d)\n", ep->iface, altset, err); From 3ac18e0a94c19788c037c6b0c313ff9171931882 Mon Sep 17 00:00:00 2001 From: Joachim Priesner Date: Mon, 28 Apr 2025 07:36:06 +0200 Subject: [PATCH 003/129] ALSA: usb-audio: Add second USB ID for Jabra Evolve 65 headset commit 1149719442d28c96dc63cad432b5a6db7c300e1a upstream. There seem to be multiple USB device IDs used for these; the one I have reports as 0b0e:030c when powered on. (When powered off, it reports as 0b0e:0311.) Signed-off-by: Joachim Priesner Cc: Link: https://patch.msgid.link/20250428053606.9237-1-joachim.priesner@web.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/format.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/format.c b/sound/usb/format.c index 3b3a5ea6fcbf..f33d25a4e4cc 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -263,7 +263,8 @@ static int parse_audio_format_rates_v1(struct snd_usb_audio *chip, struct audiof } /* Jabra Evolve 65 headset */ - if (chip->usb_id == USB_ID(0x0b0e, 0x030b)) { + if (chip->usb_id == USB_ID(0x0b0e, 0x030b) || + chip->usb_id == USB_ID(0x0b0e, 0x030c)) { /* only 48kHz for playback while keeping 16kHz for capture */ if (fp->nr_rates != 1) return set_fixed_rate(fp, 48000, SNDRV_PCM_RATE_48000); From b364ee98aca9dab1e94c370036d53fb1423e742d Mon Sep 17 00:00:00 2001 From: Dave Chen Date: Tue, 15 Apr 2025 14:33:42 +0800 Subject: [PATCH 004/129] btrfs: fix COW handling in run_delalloc_nocow() commit be3f1938d3e6ea8186f0de3dd95245dda4f22c1e upstream. In run_delalloc_nocow(), when the found btrfs_key's offset > cur_offset, it indicates a gap between the current processing region and the next file extent. The original code would directly jump to the "must_cow" label, which increments the slot and forces a fallback to COW. This behavior might skip an extent item and result in an overestimated COW fallback range. This patch modifies the logic so that when a gap is detected: - If no COW range is already being recorded (cow_start is unset), cow_start is set to cur_offset. - cur_offset is then advanced to the beginning of the next extent. - Instead of jumping to "must_cow", control flows directly to "next_slot" so that the same extent item can be reexamined properly. The change ensures that we accurately account for the extent gap and avoid accidentally extending the range that needs to fallback to COW. CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Filipe Manana Signed-off-by: Dave Chen Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 11c4d69177f0..48d257923672 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2058,12 +2058,13 @@ next_slot: /* * If the found extent starts after requested offset, then - * adjust extent_end to be right before this extent begins + * adjust cur_offset to be right before this extent begins. */ if (found_key.offset > cur_offset) { - extent_end = found_key.offset; - extent_type = 0; - goto must_cow; + if (cow_start == (u64)-1) + cow_start = cur_offset; + cur_offset = found_key.offset; + goto next_slot; } /* From 94808275aa6c99a3506064dd62e25228050287db Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 18 Apr 2025 17:25:12 +0100 Subject: [PATCH 005/129] drm/fdinfo: Protect against driver unbind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5b1834d6202f86180e451ad1a2a8a193a1da18fc upstream. If we unbind a driver from the PCI device with an active DRM client, subsequent read of the fdinfo data associated with the file descriptor in question will not end well. Protect the path with a drm_dev_enter/exit() pair. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Lucas De Marchi Cc: Rodrigo Vivi Cc: Umesh Nerlige Ramappa Reviewed-by: Christian König Fixes: 3f09a0cd4ea3 ("drm: Add common fdinfo helper") Cc: # v6.5+ Signed-off-by: Christian König Link: https://lore.kernel.org/r/20250418162512.72324-1-tvrtko.ursulin@igalia.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_file.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index 1d22dba69b27..b943221b238f 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -1015,6 +1015,10 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) struct drm_file *file = f->private_data; struct drm_device *dev = file->minor->dev; struct drm_printer p = drm_seq_file_printer(m); + int idx; + + if (!drm_dev_enter(dev, &idx)) + return; drm_printf(&p, "drm-driver:\t%s\n", dev->driver->name); drm_printf(&p, "drm-client-id:\t%llu\n", file->client_id); @@ -1029,6 +1033,8 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) if (dev->driver->show_fdinfo) dev->driver->show_fdinfo(&p, file); + + drm_dev_exit(idx); } EXPORT_SYMBOL(drm_show_fdinfo); From 126f5c6e0cb84e5c6f7a3a856d799d85668fb38e Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Tue, 15 Apr 2025 14:19:00 +0200 Subject: [PATCH 006/129] drm/nouveau: Fix WARN_ON in nouveau_fence_context_kill() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bbe5679f30d7690a9b6838a583b9690ea73fe0e9 upstream. Nouveau is mostly designed in a way that it's expected that fences only ever get signaled through nouveau_fence_signal(). However, in at least one other place, nouveau_fence_done(), can signal fences, too. If that happens (race) a signaled fence remains in the pending list for a while, until it gets removed by nouveau_fence_update(). Should nouveau_fence_context_kill() run in the meantime, this would be a bug because the function would attempt to set an error code on an already signaled fence. Have nouveau_fence_context_kill() check for a fence being signaled. Cc: stable@vger.kernel.org # v5.10+ Fixes: ea13e5abf807 ("drm/nouveau: signal pending fences when channel has been killed") Suggested-by: Christian König Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250415121900.55719-3-phasta@kernel.org Signed-off-by: Danilo Krummrich Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 03eacb22648e..1bfa312d6fb8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -90,7 +90,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) while (!list_empty(&fctx->pending)) { fence = list_entry(fctx->pending.next, typeof(*fence), head); - if (error) + if (error && !dma_fence_is_signaled_locked(&fence->base)) dma_fence_set_error(&fence->base, error); if (nouveau_fence_signal(fence)) From 726641633413c5f182a541ead42d68bb82e0f677 Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Fri, 25 Apr 2025 07:26:39 -0700 Subject: [PATCH 007/129] EDAC/altera: Test the correct error reg offset commit 4fb7b8fceb0beebbe00712c3daf49ade0386076a upstream. Test correct structure member, ecc_cecnt_offset, before using it. [ bp: Massage commit message. ] Fixes: 73bcc942f427 ("EDAC, altera: Add Arria10 EDAC support") Signed-off-by: Niravkumar L Rabara Signed-off-by: Matthew Gerlach Signed-off-by: Borislav Petkov (AMD) Acked-by: Dinh Nguyen Cc: stable@kernel.org Link: https://lore.kernel.org/20250425142640.33125-2-matthew.gerlach@altera.com Signed-off-by: Greg Kroah-Hartman --- drivers/edac/altera_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 8b31cd54bdb6..3d575aecbb42 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -98,7 +98,7 @@ static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id) if (status & priv->ecc_stat_ce_mask) { regmap_read(drvdata->mc_vbase, priv->ecc_saddr_offset, &err_addr); - if (priv->ecc_uecnt_offset) + if (priv->ecc_cecnt_offset) regmap_read(drvdata->mc_vbase, priv->ecc_cecnt_offset, &err_count); edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count, From 78cf306f7214d80025b4782d8d0a2b1901abdf0e Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Fri, 25 Apr 2025 07:26:40 -0700 Subject: [PATCH 008/129] EDAC/altera: Set DDR and SDMMC interrupt mask before registration commit 6dbe3c5418c4368e824bff6ae4889257dd544892 upstream. Mask DDR and SDMMC in probe function to avoid spurious interrupts before registration. Removed invalid register write to system manager. Fixes: 1166fde93d5b ("EDAC, altera: Add Arria10 ECC memory init functions") Signed-off-by: Niravkumar L Rabara Signed-off-by: Matthew Gerlach Signed-off-by: Borislav Petkov (AMD) Acked-by: Dinh Nguyen Cc: stable@kernel.org Link: https://lore.kernel.org/20250425142640.33125-3-matthew.gerlach@altera.com Signed-off-by: Greg Kroah-Hartman --- drivers/edac/altera_edac.c | 7 ++++--- drivers/edac/altera_edac.h | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 3d575aecbb42..e04fd1a7e9aa 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1015,9 +1015,6 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, } } - /* Interrupt mode set to every SBERR */ - regmap_write(ecc_mgr_map, ALTR_A10_ECC_INTMODE_OFST, - ALTR_A10_ECC_INTMODE); /* Enable ECC */ ecc_set_bits(ecc_ctrl_en_mask, (ecc_block_base + ALTR_A10_ECC_CTRL_OFST)); @@ -2138,6 +2135,10 @@ static int altr_edac_a10_probe(struct platform_device *pdev) return PTR_ERR(edac->ecc_mgr_map); } + /* Set irq mask for DDR SBE to avoid any pending irq before registration */ + regmap_write(edac->ecc_mgr_map, A10_SYSMGR_ECC_INTMASK_SET_OFST, + (A10_SYSMGR_ECC_INTMASK_SDMMCB | A10_SYSMGR_ECC_INTMASK_DDR0)); + edac->irq_chip.name = pdev->dev.of_node->name; edac->irq_chip.irq_mask = a10_eccmgr_irq_mask; edac->irq_chip.irq_unmask = a10_eccmgr_irq_unmask; diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 3727e72c8c2e..7248d24c4908 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -249,6 +249,8 @@ struct altr_sdram_mc_data { #define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94 #define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 #define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1) +#define A10_SYSMGR_ECC_INTMASK_SDMMCB BIT(16) +#define A10_SYSMGR_ECC_INTMASK_DDR0 BIT(17) #define A10_SYSMGR_ECC_INTSTAT_SERR_OFST 0x9C #define A10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 From ddf4e7f0f509e6af8d79a6d17ab6168edae1a480 Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Mon, 21 Apr 2025 14:23:41 +0800 Subject: [PATCH 009/129] i2c: imx-lpi2c: Fix clock count when probe defers commit b1852c5de2f2a37dd4462f7837c9e3e678f9e546 upstream. Deferred probe with pm_runtime_put() may delay clock disable, causing incorrect clock usage count. Use pm_runtime_put_sync() to ensure the clock is disabled immediately. Fixes: 13d6eb20fc79 ("i2c: imx-lpi2c: add runtime pm support") Signed-off-by: Clark Wang Signed-off-by: Carlos Song Cc: # v4.16+ Link: https://lore.kernel.org/r/20250421062341.2471922-1-carlos.song@nxp.com Signed-off-by: Andi Shyti Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-imx-lpi2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index 5d4f04a3c6d3..b44b36bd565e 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -616,9 +616,9 @@ static int lpi2c_imx_probe(struct platform_device *pdev) return 0; rpm_disable: - pm_runtime_put(&pdev->dev); - pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); return ret; } From 3821cae9bd5a99a42d3d0be1b58e41f072cd4c4c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 1 May 2025 11:47:47 +0100 Subject: [PATCH 010/129] arm64: errata: Add missing sentinels to Spectre-BHB MIDR arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fee4d171451c1ad9e8aaf65fc0ab7d143a33bd72 upstream. Commit a5951389e58d ("arm64: errata: Add newer ARM cores to the spectre_bhb_loop_affected() lists") added some additional CPUs to the Spectre-BHB workaround, including some new arrays for designs that require new 'k' values for the workaround to be effective. Unfortunately, the new arrays omitted the sentinel entry and so is_midr_in_range_list() will walk off the end when it doesn't find a match. With UBSAN enabled, this leads to a crash during boot when is_midr_in_range_list() is inlined (which was more common prior to c8c2647e69be ("arm64: Make  _midr_in_range_list() an exported function")): | Internal error: aarch64 BRK: 00000000f2000001 [#1] PREEMPT SMP | pstate: 804000c5 (Nzcv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : spectre_bhb_loop_affected+0x28/0x30 | lr : is_spectre_bhb_affected+0x170/0x190 | [...] | Call trace: | spectre_bhb_loop_affected+0x28/0x30 | update_cpu_capabilities+0xc0/0x184 | init_cpu_features+0x188/0x1a4 | cpuinfo_store_boot_cpu+0x4c/0x60 | smp_prepare_boot_cpu+0x38/0x54 | start_kernel+0x8c/0x478 | __primary_switched+0xc8/0xd4 | Code: 6b09011f 54000061 52801080 d65f03c0 (d4200020) | ---[ end trace 0000000000000000 ]--- | Kernel panic - not syncing: aarch64 BRK: Fatal exception Add the missing sentinel entries. Cc: Lee Jones Cc: James Morse Cc: Doug Anderson Cc: Shameer Kolothum Cc: Reported-by: Greg Kroah-Hartman Fixes: a5951389e58d ("arm64: errata: Add newer ARM cores to the spectre_bhb_loop_affected() lists") Signed-off-by: Will Deacon Reviewed-by: Lee Jones Reviewed-by: Douglas Anderson Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250501104747.28431-1-will@kernel.org Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/proton-pack.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index ecfbff6991bb..edc4c727783d 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -879,10 +879,12 @@ static u8 spectre_bhb_loop_affected(void) static const struct midr_range spectre_bhb_k132_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + {}, }; static const struct midr_range spectre_bhb_k38_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + {}, }; static const struct midr_range spectre_bhb_k32_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), From 6a098c51d18ec99485668da44294565c43dbc106 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 3 May 2025 18:24:01 +0200 Subject: [PATCH 011/129] parisc: Fix double SIGFPE crash commit de3629baf5a33af1919dec7136d643b0662e85ef upstream. Camm noticed that on parisc a SIGFPE exception will crash an application with a second SIGFPE in the signal handler. Dave analyzed it, and it happens because glibc uses a double-word floating-point store to atomically update function descriptors. As a result of lazy binding, we hit a floating-point store in fpe_func almost immediately. When the T bit is set, an assist exception trap occurs when when the co-processor encounters *any* floating-point instruction except for a double store of register %fr0. The latter cancels all pending traps. Let's fix this by clearing the Trap (T) bit in the FP status register before returning to the signal handler in userspace. The issue can be reproduced with this test program: root@parisc:~# cat fpe.c static void fpe_func(int sig, siginfo_t *i, void *v) { sigset_t set; sigemptyset(&set); sigaddset(&set, SIGFPE); sigprocmask(SIG_UNBLOCK, &set, NULL); printf("GOT signal %d with si_code %ld\n", sig, i->si_code); } int main() { struct sigaction action = { .sa_sigaction = fpe_func, .sa_flags = SA_RESTART|SA_SIGINFO }; sigaction(SIGFPE, &action, 0); feenableexcept(FE_OVERFLOW); return printf("%lf\n",1.7976931348623158E308*1.7976931348623158E308); } root@parisc:~# gcc fpe.c -lm root@parisc:~# ./a.out Floating point exception root@parisc:~# strace -f ./a.out execve("./a.out", ["./a.out"], 0xf9ac7034 /* 20 vars */) = 0 getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM_INFINITY}) = 0 ... rt_sigaction(SIGFPE, {sa_handler=0x1110a, sa_mask=[], sa_flags=SA_RESTART|SA_SIGINFO}, NULL, 8) = 0 --- SIGFPE {si_signo=SIGFPE, si_code=FPE_FLTOVF, si_addr=0x1078f} --- --- SIGFPE {si_signo=SIGFPE, si_code=FPE_FLTOVF, si_addr=0xf8f21237} --- +++ killed by SIGFPE +++ Floating point exception Signed-off-by: Helge Deller Suggested-by: John David Anglin Reported-by: Camm Maguire Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/parisc/math-emu/driver.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c index 6ce427b58836..ecd27b48d61f 100644 --- a/arch/parisc/math-emu/driver.c +++ b/arch/parisc/math-emu/driver.c @@ -103,9 +103,19 @@ handle_fpe(struct pt_regs *regs) memcpy(regs->fr, frcopy, sizeof regs->fr); if (signalcode != 0) { - force_sig_fault(signalcode >> 24, signalcode & 0xffffff, - (void __user *) regs->iaoq[0]); - return -1; + int sig = signalcode >> 24; + + if (sig == SIGFPE) { + /* + * Clear floating point trap bit to avoid trapping + * again on the first floating-point instruction in + * the userspace signal handler. + */ + regs->fr[0] &= ~(1ULL << 38); + } + force_sig_fault(sig, signalcode & 0xffffff, + (void __user *) regs->iaoq[0]); + return -1; } return signalcode ? -1 : 0; From 34b6fa11431aef71045ae5a00d90a7d630597eda Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 25 Apr 2025 17:13:55 -0700 Subject: [PATCH 012/129] perf/x86/intel: KVM: Mask PEBS_ENABLE loaded for guest with vCPU's value. commit 58f6217e5d0132a9f14e401e62796916aa055c1b upstream. When generating the MSR_IA32_PEBS_ENABLE value that will be loaded on VM-Entry to a KVM guest, mask the value with the vCPU's desired PEBS_ENABLE value. Consulting only the host kernel's host vs. guest masks results in running the guest with PEBS enabled even when the guest doesn't want to use PEBS. Because KVM uses perf events to proxy the guest virtual PMU, simply looking at exclude_host can't differentiate between events created by host userspace, and events created by KVM on behalf of the guest. Running the guest with PEBS unexpectedly enabled typically manifests as crashes due to a near-infinite stream of #PFs. E.g. if the guest hasn't written MSR_IA32_DS_AREA, the CPU will hit page faults on address '0' when trying to record PEBS events. The issue is most easily reproduced by running `perf kvm top` from before commit 7b100989b4f6 ("perf evlist: Remove __evlist__add_default") (after which, `perf kvm top` effectively stopped using PEBS). The userspace side of perf creates a guest-only PEBS event, which intel_guest_get_msrs() misconstrues a guest-*owned* PEBS event. Arguably, this is a userspace bug, as enabling PEBS on guest-only events simply cannot work, and userspace can kill VMs in many other ways (there is no danger to the host). However, even if this is considered to be bad userspace behavior, there's zero downside to perf/KVM restricting PEBS to guest-owned events. Note, commit 854250329c02 ("KVM: x86/pmu: Disable guest PEBS temporarily in two rare situations") fixed the case where host userspace is profiling KVM *and* userspace, but missed the case where userspace is profiling only KVM. Fixes: c59a1f106f5c ("KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS") Closes: https://lore.kernel.org/all/Z_VUswFkWiTYI0eD@do-x1carbon Reported-by: Seth Forshee Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dapeng Mi Tested-by: "Seth Forshee (DigitalOcean)" Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250426001355.1026530-1-seanjc@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 66d5782df18f..835c9febb6a8 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4206,7 +4206,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) arr[pebs_enable] = (struct perf_guest_switch_msr){ .msr = MSR_IA32_PEBS_ENABLE, .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask, - .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask, + .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask & kvm_pmu->pebs_enable, }; if (arr[pebs_enable].host) { From e51dd7126693af7f6682366c926ff678a1d6ff94 Mon Sep 17 00:00:00 2001 From: Vishal Badole Date: Thu, 24 Apr 2025 18:32:48 +0530 Subject: [PATCH 013/129] amd-xgbe: Fix to ensure dependent features are toggled with RX checksum offload commit f04dd30f1bef1ed2e74a4050af6e5e5e3869bac3 upstream. According to the XGMAC specification, enabling features such as Layer 3 and Layer 4 Packet Filtering, Split Header and Virtualized Network support automatically selects the IPC Full Checksum Offload Engine on the receive side. When RX checksum offload is disabled, these dependent features must also be disabled to prevent abnormal behavior caused by mismatched feature dependencies. Ensure that toggling RX checksum offload (disabling or enabling) properly disables or enables all dependent features, maintaining consistent and expected behavior in the network device. Cc: stable@vger.kernel.org Fixes: 1a510ccf5869 ("amd-xgbe: Add support for VXLAN offload capabilities") Signed-off-by: Vishal Badole Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250424130248.428865-1-Vishal.Badole@amd.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amd/xgbe/xgbe-desc.c | 9 +++++++-- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 24 +++++++++++++++++++++-- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 11 +++++++++-- drivers/net/ethernet/amd/xgbe/xgbe.h | 4 ++++ 4 files changed, 42 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index 230726d7b74f..d41b58fad37b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -373,8 +373,13 @@ static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata, } /* Set up the header page info */ - xgbe_set_buffer_data(&rdata->rx.hdr, &ring->rx_hdr_pa, - XGBE_SKB_ALLOC_SIZE); + if (pdata->netdev->features & NETIF_F_RXCSUM) { + xgbe_set_buffer_data(&rdata->rx.hdr, &ring->rx_hdr_pa, + XGBE_SKB_ALLOC_SIZE); + } else { + xgbe_set_buffer_data(&rdata->rx.hdr, &ring->rx_hdr_pa, + pdata->rx_buf_size); + } /* Set up the buffer page info */ xgbe_set_buffer_data(&rdata->rx.buf, &ring->rx_buf_pa, diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index f393228d41c7..f1b0fb02b3cd 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -320,6 +320,18 @@ static void xgbe_config_sph_mode(struct xgbe_prv_data *pdata) XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE); } +static void xgbe_disable_sph_mode(struct xgbe_prv_data *pdata) +{ + unsigned int i; + + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->rx_ring) + break; + + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 0); + } +} + static int xgbe_write_rss_reg(struct xgbe_prv_data *pdata, unsigned int type, unsigned int index, unsigned int val) { @@ -3545,8 +3557,12 @@ static int xgbe_init(struct xgbe_prv_data *pdata) xgbe_config_tx_coalesce(pdata); xgbe_config_rx_buffer_size(pdata); xgbe_config_tso_mode(pdata); - xgbe_config_sph_mode(pdata); - xgbe_config_rss(pdata); + + if (pdata->netdev->features & NETIF_F_RXCSUM) { + xgbe_config_sph_mode(pdata); + xgbe_config_rss(pdata); + } + desc_if->wrapper_tx_desc_init(pdata); desc_if->wrapper_rx_desc_init(pdata); xgbe_enable_dma_interrupts(pdata); @@ -3702,5 +3718,9 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if) hw_if->disable_vxlan = xgbe_disable_vxlan; hw_if->set_vxlan_id = xgbe_set_vxlan_id; + /* For Split Header*/ + hw_if->enable_sph = xgbe_config_sph_mode; + hw_if->disable_sph = xgbe_disable_sph_mode; + DBGPR("<--xgbe_init_function_ptrs\n"); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 6b73648b3779..34d45cebefb5 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2257,10 +2257,17 @@ static int xgbe_set_features(struct net_device *netdev, if (ret) return ret; - if ((features & NETIF_F_RXCSUM) && !rxcsum) + if ((features & NETIF_F_RXCSUM) && !rxcsum) { + hw_if->enable_sph(pdata); + hw_if->enable_vxlan(pdata); hw_if->enable_rx_csum(pdata); - else if (!(features & NETIF_F_RXCSUM) && rxcsum) + schedule_work(&pdata->restart_work); + } else if (!(features & NETIF_F_RXCSUM) && rxcsum) { + hw_if->disable_sph(pdata); + hw_if->disable_vxlan(pdata); hw_if->disable_rx_csum(pdata); + schedule_work(&pdata->restart_work); + } if ((features & NETIF_F_HW_VLAN_CTAG_RX) && !rxvlan) hw_if->enable_rx_vlan_stripping(pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index ad136ed493ed..173f4dad470f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -865,6 +865,10 @@ struct xgbe_hw_if { void (*enable_vxlan)(struct xgbe_prv_data *); void (*disable_vxlan)(struct xgbe_prv_data *); void (*set_vxlan_id)(struct xgbe_prv_data *); + + /* For Split Header */ + void (*enable_sph)(struct xgbe_prv_data *pdata); + void (*disable_sph)(struct xgbe_prv_data *pdata); }; /* This structure represents implementation specific routines for an From dfbaecf7e38f5e9bfa5e47a1e525ffbb58bab8cf Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Fri, 2 May 2025 13:22:28 +0200 Subject: [PATCH 014/129] irqchip/qcom-mpm: Prevent crash when trying to handle non-wake GPIOs commit 38a05c0b87833f5b188ae43b428b1f792df2b384 upstream. On Qualcomm chipsets not all GPIOs are wakeup capable. Those GPIOs do not have a corresponding MPM pin and should not be handled inside the MPM driver. The IRQ domain hierarchy is always applied, so it's required to explicitly disconnect the hierarchy for those. The pinctrl-msm driver marks these with GPIO_NO_WAKE_IRQ. qcom-pdc has a check for this, but irq-qcom-mpm is currently missing the check. This is causing crashes when setting up interrupts for non-wake GPIOs: root@rb1:~# gpiomon -c gpiochip1 10 irq: IRQ159: trimming hierarchy from :soc@0:interrupt-controller@f200000-1 Unable to handle kernel paging request at virtual address ffff8000a1dc3820 Hardware name: Qualcomm Technologies, Inc. Robotics RB1 (DT) pc : mpm_set_type+0x80/0xcc lr : mpm_set_type+0x5c/0xcc Call trace: mpm_set_type+0x80/0xcc (P) qcom_mpm_set_type+0x64/0x158 irq_chip_set_type_parent+0x20/0x38 msm_gpio_irq_set_type+0x50/0x530 __irq_set_trigger+0x60/0x184 __setup_irq+0x304/0x6bc request_threaded_irq+0xc8/0x19c edge_detector_setup+0x260/0x364 linereq_create+0x420/0x5a8 gpio_ioctl+0x2d4/0x6c0 Fix this by copying the check for GPIO_NO_WAKE_IRQ from qcom-pdc.c, so that MPM is removed entirely from the hierarchy for non-wake GPIOs. Fixes: a6199bb514d8 ("irqchip: Add Qualcomm MPM controller driver") Reported-by: Alexey Klimov Signed-off-by: Stephan Gerhold Signed-off-by: Thomas Gleixner Tested-by: Alexey Klimov Reviewed-by: Bartosz Golaszewski Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250502-irq-qcom-mpm-fix-no-wake-v1-1-8a1eafcd28d4@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-qcom-mpm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c index 7124565234a5..0807e4aca933 100644 --- a/drivers/irqchip/irq-qcom-mpm.c +++ b/drivers/irqchip/irq-qcom-mpm.c @@ -226,6 +226,9 @@ static int qcom_mpm_alloc(struct irq_domain *domain, unsigned int virq, if (ret) return ret; + if (pin == GPIO_NO_WAKE_IRQ) + return irq_domain_disconnect_hierarchy(domain, virq); + ret = irq_domain_set_hwirq_and_chip(domain, virq, pin, &qcom_mpm_chip, priv); if (ret) From c0fabecd65103550a1967c236d7c483d3c5368e1 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 18 Mar 2025 07:19:46 +0000 Subject: [PATCH 015/129] mm/memblock: pass size instead of end to memblock_set_node() commit 06eaa824fd239edd1eab2754f29b2d03da313003 upstream. The second parameter of memblock_set_node() is size instead of end. Since it iterates from lower address to higher address, finally the node id is correct. But during the process, some of them are wrong. Pass size instead of end. Fixes: 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") Signed-off-by: Wei Yang CC: Mike Rapoport CC: Yajun Deng CC: stable@vger.kernel.org Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20250318071948.23854-2-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Greg Kroah-Hartman --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index e8a2a1537d6a..a592ad619b2a 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2135,7 +2135,7 @@ static void __init memmap_init_reserved_pages(void) if (memblock_is_nomap(region)) reserve_bootmem_region(start, end, nid); - memblock_set_node(start, end, &memblock.reserved, nid); + memblock_set_node(start, region->size, &memblock.reserved, nid); } /* initialize struct pages for the reserved regions */ From 4d1a2d1363563c84820ba0503519f3567f229c10 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 18 Mar 2025 07:19:47 +0000 Subject: [PATCH 016/129] mm/memblock: repeat setting reserved region nid if array is doubled commit eac8ea8736ccc09513152d970eb2a42ed78e87e8 upstream. Commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") introduce a way to set nid to all reserved region. But there is a corner case it will leave some region with invalid nid. When memblock_set_node() doubles the array of memblock.reserved, it may lead to a new reserved region before current position. The new region will be left with an invalid node id. Repeat the process when detecting it. Fixes: 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") Signed-off-by: Wei Yang CC: Mike Rapoport CC: Yajun Deng CC: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250318071948.23854-3-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Greg Kroah-Hartman --- mm/memblock.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mm/memblock.c b/mm/memblock.c index a592ad619b2a..047dce35cf6e 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2122,11 +2122,14 @@ static void __init memmap_init_reserved_pages(void) struct memblock_region *region; phys_addr_t start, end; int nid; + unsigned long max_reserved; /* * set nid on all reserved pages and also treat struct * pages for the NOMAP regions as PageReserved */ +repeat: + max_reserved = memblock.reserved.max; for_each_mem_region(region) { nid = memblock_get_region_node(region); start = region->base; @@ -2137,6 +2140,13 @@ static void __init memmap_init_reserved_pages(void) memblock_set_node(start, region->size, &memblock.reserved, nid); } + /* + * 'max' is changed means memblock.reserved has been doubled its + * array, which may result a new reserved region before current + * 'start'. Now we should repeat the procedure to set its node id. + */ + if (max_reserved != memblock.reserved.max) + goto repeat; /* initialize struct pages for the reserved regions */ for_each_reserved_mem_region(region) { From 3aa4aff158ea0d4120d8e2d6571a7b9e6de91701 Mon Sep 17 00:00:00 2001 From: Ruslan Piasetskyi Date: Wed, 26 Mar 2025 23:06:38 +0100 Subject: [PATCH 017/129] mmc: renesas_sdhi: Fix error handling in renesas_sdhi_probe commit 649b50a82f09fa44c2f7a65618e4584072145ab7 upstream. After moving tmio_mmc_host_probe down, error handling has to be adjusted. Fixes: 74f45de394d9 ("mmc: renesas_sdhi: register irqs before registering controller") Reviewed-by: Ihar Salauyou Signed-off-by: Ruslan Piasetskyi Reviewed-by: Geert Uytterhoeven Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250326220638.460083-1-ruslan.piasetskyi@gmail.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/renesas_sdhi_core.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index c675dec587ef..597b00e8c953 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -1107,26 +1107,26 @@ int renesas_sdhi_probe(struct platform_device *pdev, num_irqs = platform_irq_count(pdev); if (num_irqs < 0) { ret = num_irqs; - goto eirq; + goto edisclk; } /* There must be at least one IRQ source */ if (!num_irqs) { ret = -ENXIO; - goto eirq; + goto edisclk; } for (i = 0; i < num_irqs; i++) { irq = platform_get_irq(pdev, i); if (irq < 0) { ret = irq; - goto eirq; + goto edisclk; } ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_irq, 0, dev_name(&pdev->dev), host); if (ret) - goto eirq; + goto edisclk; } ret = tmio_mmc_host_probe(host); @@ -1138,8 +1138,6 @@ int renesas_sdhi_probe(struct platform_device *pdev, return ret; -eirq: - tmio_mmc_host_remove(host); edisclk: renesas_sdhi_clk_disable(host); efree: From 75fda0e591680f863ba33bc8e639e5840d1caca6 Mon Sep 17 00:00:00 2001 From: Aaron Kling Date: Wed, 23 Apr 2025 21:03:03 -0500 Subject: [PATCH 018/129] spi: tegra114: Don't fail set_cs_timing when delays are zero commit 4426e6b4ecf632bb75d973051e1179b8bfac2320 upstream. The original code would skip null delay pointers, but when the pointers were converted to point within the spi_device struct, the check was not updated to skip delays of zero. Hence all spi devices that didn't set delays would fail to probe. Fixes: 04e6bb0d6bb1 ("spi: modify set_cs_timing parameter") Cc: stable@vger.kernel.org Signed-off-by: Aaron Kling Link: https://patch.msgid.link/20250423-spi-tegra114-v1-1-2d608bcc12f9@gmail.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-tegra114.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index 460f232dad50..147d7052794f 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -728,9 +728,9 @@ static int tegra_spi_set_hw_cs_timing(struct spi_device *spi) u32 inactive_cycles; u8 cs_state; - if (setup->unit != SPI_DELAY_UNIT_SCK || - hold->unit != SPI_DELAY_UNIT_SCK || - inactive->unit != SPI_DELAY_UNIT_SCK) { + if ((setup->unit && setup->unit != SPI_DELAY_UNIT_SCK) || + (hold->unit && hold->unit != SPI_DELAY_UNIT_SCK) || + (inactive->unit && inactive->unit != SPI_DELAY_UNIT_SCK)) { dev_err(&spi->dev, "Invalid delay unit %d, should be SPI_DELAY_UNIT_SCK\n", SPI_DELAY_UNIT_SCK); From 8daa71c694b3d1e187e44f8a1acb18a73e2ed324 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 1 May 2025 22:41:28 -0400 Subject: [PATCH 019/129] tracing: Do not take trace_event_sem in print_event_fields() commit 0a8f11f8569e7ed16cbcedeb28c4350f6378fea6 upstream. On some paths in print_event_fields() it takes the trace_event_sem for read, even though it should always be held when the function is called. Remove the taking of that mutex and add a lockdep_assert_held_read() to make sure the trace_event_sem is held when print_event_fields() is called. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250501224128.0b1f0571@batman.local.home Fixes: 80a76994b2d88 ("tracing: Add "fields" option to show raw trace event fields") Reported-by: syzbot+441582c1592938fccf09@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/6813ff5e.050a0220.14dd7d.001b.GAE@google.com/ Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 2b948d35fb59..448ee37ae245 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -950,11 +950,12 @@ enum print_line_t print_event_fields(struct trace_iterator *iter, struct trace_event_call *call; struct list_head *head; + lockdep_assert_held_read(&trace_event_sem); + /* ftrace defined events have separate call structures */ if (event->type <= __TRACE_LAST_TYPE) { bool found = false; - down_read(&trace_event_sem); list_for_each_entry(call, &ftrace_events, list) { if (call->event.type == event->type) { found = true; @@ -964,7 +965,6 @@ enum print_line_t print_event_fields(struct trace_iterator *iter, if (call->event.type > __TRACE_LAST_TYPE) break; } - up_read(&trace_event_sem); if (!found) { trace_seq_printf(&iter->seq, "UNKNOWN TYPE %d\n", event->type); goto out; From 08424a0922fb9e32a19b09d852ee87fb6c497538 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Tue, 22 Apr 2025 12:22:02 +0800 Subject: [PATCH 020/129] wifi: brcm80211: fmac: Add error handling for brcmf_usb_dl_writeimage() commit 8e089e7b585d95122c8122d732d1d5ef8f879396 upstream. The function brcmf_usb_dl_writeimage() calls the function brcmf_usb_dl_cmd() but dose not check its return value. The 'state.state' and the 'state.bytes' are uninitialized if the function brcmf_usb_dl_cmd() fails. It is dangerous to use uninitialized variables in the conditions. Add error handling for brcmf_usb_dl_cmd() to jump to error handling path if the brcmf_usb_dl_cmd() fails and the 'state.state' and the 'state.bytes' are uninitialized. Improve the error message to report more detailed error information. Fixes: 71bb244ba2fd ("brcm80211: fmac: add USB support for bcm43235/6/8 chipsets") Cc: stable@vger.kernel.org # v3.4+ Signed-off-by: Wentao Liang Acked-by: Arend van Spriel Link: https://patch.msgid.link/20250422042203.2259-1-vulab@iscas.ac.cn Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index 2178675ae1a4..6f64a05debd2 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -903,14 +903,16 @@ brcmf_usb_dl_writeimage(struct brcmf_usbdev_info *devinfo, u8 *fw, int fwlen) } /* 1) Prepare USB boot loader for runtime image */ - brcmf_usb_dl_cmd(devinfo, DL_START, &state, sizeof(state)); + err = brcmf_usb_dl_cmd(devinfo, DL_START, &state, sizeof(state)); + if (err) + goto fail; rdlstate = le32_to_cpu(state.state); rdlbytes = le32_to_cpu(state.bytes); /* 2) Check we are in the Waiting state */ if (rdlstate != DL_WAITING) { - brcmf_err("Failed to DL_START\n"); + brcmf_err("Invalid DL state: %u\n", rdlstate); err = -EINVAL; goto fail; } From c8c83052283bcf2fdd467a33d1d2bd5ba36e935a Mon Sep 17 00:00:00 2001 From: LongPing Wei Date: Thu, 17 Apr 2025 11:07:38 +0800 Subject: [PATCH 021/129] dm-bufio: don't schedule in atomic context commit a3d8f0a7f5e8b193db509c7191fefeed3533fc44 upstream. A BUG was reported as below when CONFIG_DEBUG_ATOMIC_SLEEP and try_verify_in_tasklet are enabled. [ 129.444685][ T934] BUG: sleeping function called from invalid context at drivers/md/dm-bufio.c:2421 [ 129.444723][ T934] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 934, name: kworker/1:4 [ 129.444740][ T934] preempt_count: 201, expected: 0 [ 129.444756][ T934] RCU nest depth: 0, expected: 0 [ 129.444781][ T934] Preemption disabled at: [ 129.444789][ T934] [] shrink_work+0x21c/0x248 [ 129.445167][ T934] kernel BUG at kernel/sched/walt/walt_debug.c:16! [ 129.445183][ T934] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP [ 129.445204][ T934] Skip md ftrace buffer dump for: 0x1609e0 [ 129.447348][ T934] CPU: 1 PID: 934 Comm: kworker/1:4 Tainted: G W OE 6.6.56-android15-8-o-g6f82312b30b9-debug #1 1400000003000000474e5500b3187743670464e8 [ 129.447362][ T934] Hardware name: Qualcomm Technologies, Inc. Parrot QRD, Alpha-M (DT) [ 129.447373][ T934] Workqueue: dm_bufio_cache shrink_work [ 129.447394][ T934] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 129.447406][ T934] pc : android_rvh_schedule_bug+0x0/0x8 [sched_walt_debug] [ 129.447435][ T934] lr : __traceiter_android_rvh_schedule_bug+0x44/0x6c [ 129.447451][ T934] sp : ffffffc0843dbc90 [ 129.447459][ T934] x29: ffffffc0843dbc90 x28: ffffffffffffffff x27: 0000000000000c8b [ 129.447479][ T934] x26: 0000000000000040 x25: ffffff804b3d6260 x24: ffffffd816232b68 [ 129.447497][ T934] x23: ffffff805171c5b4 x22: 0000000000000000 x21: ffffffd816231900 [ 129.447517][ T934] x20: ffffff80306ba898 x19: 0000000000000000 x18: ffffffc084159030 [ 129.447535][ T934] x17: 00000000d2b5dd1f x16: 00000000d2b5dd1f x15: ffffffd816720358 [ 129.447554][ T934] x14: 0000000000000004 x13: ffffff89ef978000 x12: 0000000000000003 [ 129.447572][ T934] x11: ffffffd817a823c4 x10: 0000000000000202 x9 : 7e779c5735de9400 [ 129.447591][ T934] x8 : ffffffd81560d004 x7 : 205b5d3938373434 x6 : ffffffd8167397c8 [ 129.447610][ T934] x5 : 0000000000000000 x4 : 0000000000000001 x3 : ffffffc0843db9e0 [ 129.447629][ T934] x2 : 0000000000002f15 x1 : 0000000000000000 x0 : 0000000000000000 [ 129.447647][ T934] Call trace: [ 129.447655][ T934] android_rvh_schedule_bug+0x0/0x8 [sched_walt_debug 1400000003000000474e550080cce8a8a78606b6] [ 129.447681][ T934] __might_resched+0x190/0x1a8 [ 129.447694][ T934] shrink_work+0x180/0x248 [ 129.447706][ T934] process_one_work+0x260/0x624 [ 129.447718][ T934] worker_thread+0x28c/0x454 [ 129.447729][ T934] kthread+0x118/0x158 [ 129.447742][ T934] ret_from_fork+0x10/0x20 [ 129.447761][ T934] Code: ???????? ???????? ???????? d2b5dd1f (d4210000) [ 129.447772][ T934] ---[ end trace 0000000000000000 ]--- dm_bufio_lock will call spin_lock_bh when try_verify_in_tasklet is enabled, and __scan will be called in atomic context. Fixes: 7cd326747f46 ("dm bufio: remove dm_bufio_cond_resched()") Signed-off-by: LongPing Wei Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-bufio.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 30ddfb21f658..2d3afeaf8868 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -68,6 +68,8 @@ #define LIST_DIRTY 1 #define LIST_SIZE 2 +#define SCAN_RESCHED_CYCLE 16 + /*--------------------------------------------------------------*/ /* @@ -2387,7 +2389,12 @@ static void __scan(struct dm_bufio_client *c) atomic_long_dec(&c->need_shrink); freed++; - cond_resched(); + + if (unlikely(freed % SCAN_RESCHED_CYCLE == 0)) { + dm_bufio_unlock(c); + cond_resched(); + dm_bufio_lock(c); + } } } } From ecc7f159d17e6f15a67f5fee98f17575dcfcbb3d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 22 Apr 2025 21:18:33 +0200 Subject: [PATCH 022/129] dm-integrity: fix a warning on invalid table line commit 0a533c3e4246c29d502a7e0fba0e86d80a906b04 upstream. If we use the 'B' mode and we have an invalit table line, cancel_delayed_work_sync would trigger a warning. This commit avoids the warning. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index eb2b44f4a61f..1e27a5bce2d9 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4687,7 +4687,7 @@ static void dm_integrity_dtr(struct dm_target *ti) BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); BUG_ON(!list_empty(&ic->wait_list)); - if (ic->mode == 'B') + if (ic->mode == 'B' && ic->bitmap_flush_work.work.func) cancel_delayed_work_sync(&ic->bitmap_flush_work); if (ic->metadata_wq) destroy_workqueue(ic->metadata_wq); From 64e95bb37916ab03dcb7a920276c5a52df8e568b Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 15 Apr 2025 00:17:16 -0400 Subject: [PATCH 023/129] dm: always update the array size in realloc_argv on success commit 5a2a6c428190f945c5cbf5791f72dbea83e97f66 upstream. realloc_argv() was only updating the array size if it was called with old_argv already allocated. The first time it was called to create an argv array, it would allocate the array but return the array size as zero. dm_split_args() would think that it couldn't store any arguments in the array and would call realloc_argv() again, causing it to reallocate the initial slots (this time using GPF_KERNEL) and finally return a size. Aside from being wasteful, this could cause deadlocks on targets that need to process messages without starting new IO. Instead, realloc_argv should always update the allocated array size on success. Fixes: a0651926553c ("dm table: don't copy from a NULL pointer in realloc_argv()") Cc: stable@vger.kernel.org Signed-off-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-table.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index fd84e06670e8..c32ae18a8ca3 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -500,9 +500,10 @@ static char **realloc_argv(unsigned int *size, char **old_argv) gfp = GFP_NOIO; } argv = kmalloc_array(new_size, sizeof(*argv), gfp); - if (argv && old_argv) { - memcpy(argv, old_argv, *size * sizeof(*argv)); + if (argv) { *size = new_size; + if (old_argv) + memcpy(argv, old_argv, *size * sizeof(*argv)); } kfree(old_argv); From 13d67528e1ae4486e9ab24b70122fab104c73c29 Mon Sep 17 00:00:00 2001 From: Pavel Paklov Date: Tue, 25 Mar 2025 09:22:44 +0000 Subject: [PATCH 024/129] iommu/amd: Fix potential buffer overflow in parse_ivrs_acpihid commit 8dee308e4c01dea48fc104d37f92d5b58c50b96c upstream. There is a string parsing logic error which can lead to an overflow of hid or uid buffers. Comparing ACPIID_LEN against a total string length doesn't take into account the lengths of individual hid and uid buffers so the check is insufficient in some cases. For example if the length of hid string is 4 and the length of the uid string is 260, the length of str will be equal to ACPIID_LEN + 1 but uid string will overflow uid buffer which size is 256. The same applies to the hid string with length 13 and uid string with length 250. Check the length of hid and uid strings separately to prevent buffer overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter") Cc: stable@vger.kernel.org Signed-off-by: Pavel Paklov Link: https://lore.kernel.org/r/20250325092259.392844-1-Pavel.Paklov@cyberprotect.ru Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd/init.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index ef3fae113dd6..2e7a12f30651 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3682,6 +3682,14 @@ found: while (*uid == '0' && *(uid + 1)) uid++; + if (strlen(hid) >= ACPIHID_HID_LEN) { + pr_err("Invalid command line: hid is too long\n"); + return 1; + } else if (strlen(uid) >= ACPIHID_UID_LEN) { + pr_err("Invalid command line: uid is too long\n"); + return 1; + } + i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); From f39493cf70236d8e5c4a55df5aedc1aaad17df62 Mon Sep 17 00:00:00 2001 From: Mingcong Bai Date: Fri, 18 Apr 2025 11:16:42 +0800 Subject: [PATCH 025/129] iommu/vt-d: Apply quirk_iommu_igfx for 8086:0044 (QM57/QS57) commit 2c8a7c66c90832432496616a9a3c07293f1364f3 upstream. On the Lenovo ThinkPad X201, when Intel VT-d is enabled in the BIOS, the kernel boots with errors related to DMAR, the graphical interface appeared quite choppy, and the system resets erratically within a minute after it booted: DMAR: DRHD: handling fault status reg 3 DMAR: [DMA Write NO_PASID] Request device [00:02.0] fault addr 0xb97ff000 [fault reason 0x05] PTE Write access is not set Upon comparing boot logs with VT-d on/off, I found that the Intel Calpella quirk (`quirk_calpella_no_shadow_gtt()') correctly applied the igfx IOMMU disable/quirk correctly: pci 0000:00:00.0: DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics Whereas with VT-d on, it went into the "else" branch, which then triggered the DMAR handling fault above: ... else if (!disable_igfx_iommu) { /* we have to ensure the gfx device is idle before we flush */ pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); iommu_set_dma_strict(); } Now, this is not exactly scientific, but moving 0x0044 to quirk_iommu_igfx seems to have fixed the aforementioned issue. Running a few `git blame' runs on the function, I have found that the quirk was originally introduced as a fix specific to ThinkPad X201: commit 9eecabcb9a92 ("intel-iommu: Abort IOMMU setup for igfx if BIOS gave no shadow GTT space") Which was later revised twice to the "else" branch we saw above: - 2011: commit 6fbcfb3e467a ("intel-iommu: Workaround IOTLB hang on Ironlake GPU") - 2024: commit ba00196ca41c ("iommu/vt-d: Decouple igfx_off from graphic identity mapping") I'm uncertain whether further testings on this particular laptops were done in 2011 and (honestly I'm not sure) 2024, but I would be happy to do some distro-specific testing if that's what would be required to verify this patch. P.S., I also see IDs 0x0040, 0x0062, and 0x006a listed under the same `quirk_calpella_no_shadow_gtt()' quirk, but I'm not sure how similar these chipsets are (if they share the same issue with VT-d or even, indeed, if this issue is specific to a bug in the Lenovo BIOS). With regards to 0x0062, it seems to be a Centrino wireless card, but not a chipset? I have also listed a couple (distro and kernel) bug reports below as references (some of them are from 7-8 years ago!), as they seem to be similar issue found on different Westmere/Ironlake, Haswell, and Broadwell hardware setups. Cc: stable@vger.kernel.org Fixes: 6fbcfb3e467a ("intel-iommu: Workaround IOTLB hang on Ironlake GPU") Fixes: ba00196ca41c ("iommu/vt-d: Decouple igfx_off from graphic identity mapping") Link: https://groups.google.com/g/qubes-users/c/4NP4goUds2c?pli=1 Link: https://bugs.archlinux.org/task/65362 Link: https://bbs.archlinux.org/viewtopic.php?id=230323 Reported-by: Wenhao Sun Closes: https://bugzilla.kernel.org/show_bug.cgi?id=197029 Signed-off-by: Mingcong Bai Link: https://lore.kernel.org/r/20250415133330.12528-1-jeffbai@aosc.io Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel/iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d6381c00bb8d..6a745616d85a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4855,6 +4855,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx); +/* QM57/QS57 integrated gfx malfunctions with dmar */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx); + /* Broadwell igfx malfunctions with dmar */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx); @@ -4932,7 +4935,6 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); From 3a868a7f8da4775391c09fcd24898fcb2beb4545 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 14 Apr 2025 11:24:00 -0500 Subject: [PATCH 026/129] platform/x86/amd: pmc: Require at least 2.5 seconds between HW sleep cycles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9f5595d5f03fd4dc640607a71e89a1daa68fd19d upstream. When an APU exits HW sleep with no active wake sources the Linux kernel will rapidly assert that the APU can enter back into HW sleep. This happens in a few ms. Contrasting this to Windows, Windows can take 10s of seconds to enter back into the resiliency phase for Modern Standby. For some situations this can be problematic because it can cause leakage from VDDCR_SOC to VDD_MISC and force VDD_MISC outside of the electrical design guide specifications. On some designs this will trip the over voltage protection feature (OVP) of the voltage regulator module, but it could cause APU damage as well. To prevent this risk, add an explicit sleep call so that future attempts to enter into HW sleep will have enough time to settle. This will occur while the screen is dark and only on cases that the APU should enter HW sleep again, so it shouldn't be noticeable to any user. Cc: stable@vger.kernel.org Signed-off-by: Mario Limonciello Acked-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20250414162446.3853194-1-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/amd/pmc/pmc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index 70907e8f3ea9..946a546cd9dd 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -823,10 +823,9 @@ static void amd_pmc_s2idle_check(void) struct smu_metrics table; int rc; - /* CZN: Ensure that future s0i3 entry attempts at least 10ms passed */ - if (pdev->cpu_id == AMD_CPU_ID_CZN && !get_metrics_table(pdev, &table) && - table.s0i3_last_entry_status) - usleep_range(10000, 20000); + /* Avoid triggering OVP */ + if (!get_metrics_table(pdev, &table) && table.s0i3_last_entry_status) + msleep(2500); /* Dump the IdleMask before we add to the STB */ amd_pmc_idlemask_read(pdev, pdev->dev, NULL); From a45445b6093d4ffd83bb42da677c29959576fa59 Mon Sep 17 00:00:00 2001 From: Shouye Liu Date: Thu, 17 Apr 2025 11:23:21 +0800 Subject: [PATCH 027/129] platform/x86/intel-uncore-freq: Fix missing uncore sysfs during CPU hotplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8d6955ed76e8a47115f2ea1d9c263ee6f505d737 upstream. In certain situations, the sysfs for uncore may not be present when all CPUs in a package are offlined and then brought back online after boot. This issue can occur if there is an error in adding the sysfs entry due to a memory allocation failure. Retrying to bring the CPUs online will not resolve the issue, as the uncore_cpu_mask is already set for the package before the failure condition occurs. This issue does not occur if the failure happens during module initialization, as the module will fail to load in the event of any error. To address this, ensure that the uncore_cpu_mask is not set until the successful return of uncore_freq_add_entry(). Fixes: dbce412a7733 ("platform/x86/intel-uncore-freq: Split common and enumeration part") Signed-off-by: Shouye Liu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250417032321.75580-1-shouyeliu@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- .../x86/intel/uncore-frequency/uncore-frequency.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c index a3b25253b6fd..2c9c5cc7d854 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c @@ -121,15 +121,13 @@ static int uncore_event_cpu_online(unsigned int cpu) { struct uncore_data *data; int target; + int ret; /* Check if there is an online cpu in the package for uncore MSR */ target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu)); if (target < nr_cpu_ids) return 0; - /* Use this CPU on this die as a control CPU */ - cpumask_set_cpu(cpu, &uncore_cpu_mask); - data = uncore_get_instance(cpu); if (!data) return 0; @@ -138,7 +136,14 @@ static int uncore_event_cpu_online(unsigned int cpu) data->die_id = topology_die_id(cpu); data->domain_id = UNCORE_DOMAIN_ID_INVALID; - return uncore_freq_add_entry(data, cpu); + ret = uncore_freq_add_entry(data, cpu); + if (ret) + return ret; + + /* Use this CPU on this die as a control CPU */ + cpumask_set_cpu(cpu, &uncore_cpu_mask); + + return 0; } static int uncore_event_cpu_offline(unsigned int cpu) From b447463562238428503cfba1c913261047772f90 Mon Sep 17 00:00:00 2001 From: Sean Heelan Date: Sat, 19 Apr 2025 19:59:28 +0100 Subject: [PATCH 028/129] ksmbd: fix use-after-free in kerberos authentication commit e86e9134e1d1c90a960dd57f59ce574d27b9a124 upstream. Setting sess->user = NULL was introduced to fix the dangling pointer created by ksmbd_free_user. However, it is possible another thread could be operating on the session and make use of sess->user after it has been passed to ksmbd_free_user but before sess->user is set to NULL. Cc: stable@vger.kernel.org Signed-off-by: Sean Heelan Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/auth.c | 14 +++++++++++++- fs/smb/server/smb2pdu.c | 5 ----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/smb/server/auth.c b/fs/smb/server/auth.c index 5345d2417c7f..f4b20b80af06 100644 --- a/fs/smb/server/auth.c +++ b/fs/smb/server/auth.c @@ -546,7 +546,19 @@ int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob, retval = -ENOMEM; goto out; } - sess->user = user; + + if (!sess->user) { + /* First successful authentication */ + sess->user = user; + } else { + if (!ksmbd_compare_user(sess->user, user)) { + ksmbd_debug(AUTH, "different user tried to reuse session\n"); + retval = -EPERM; + ksmbd_free_user(user); + goto out; + } + ksmbd_free_user(user); + } memcpy(sess->sess_key, resp->payload, resp->session_key_len); memcpy(out_blob, resp->payload + resp->session_key_len, diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index d41d67ec5ee5..13750a5e5ba0 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1599,11 +1599,6 @@ static int krb5_authenticate(struct ksmbd_work *work, if (prev_sess_id && prev_sess_id != sess->id) destroy_previous_session(conn, sess->user, prev_sess_id); - if (sess->state == SMB2_SESSION_VALID) { - ksmbd_free_user(sess->user); - sess->user = NULL; - } - retval = ksmbd_krb5_authenticate(sess, in_blob, in_len, out_blob, &out_len); if (retval) { From e76948644ec7b84658fc7d8bad5e9bf9db7386bb Mon Sep 17 00:00:00 2001 From: Jethro Donaldson Date: Wed, 30 Apr 2025 00:59:15 +1200 Subject: [PATCH 029/129] smb: client: fix zero length for mkdir POSIX create context commit 74c72419ec8da5cbc9c49410d3c44bb954538bdd upstream. SMB create requests issued via smb311_posix_mkdir() have an incorrect length of zero bytes for the POSIX create context data. ksmbd server rejects such requests and logs "cli req too short" causing mkdir to fail with "invalid argument" on the client side. It also causes subsequent rmmod to crash in cifs_destroy_request_bufs() Inspection of packets sent by cifs.ko using wireshark show valid data for the SMB2_POSIX_CREATE_CONTEXT is appended with the correct offset, but with an incorrect length of zero bytes. Fails with ksmbd+cifs.ko only as Windows server/client does not use POSIX extensions. Fix smb311_posix_mkdir() to set req->CreateContextsLength as part of appending the POSIX creation context to the request. Signed-off-by: Jethro Donaldson Acked-by: Paulo Alcantara (Red Hat) Reviewed-by: Namjae Jeon Cc: stable@vger.kernel.org Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2pdu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 0af3535e08f3..4536b6fcfa02 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2932,6 +2932,7 @@ replay_again: req->CreateContextsOffset = cpu_to_le32( sizeof(struct smb2_create_req) + iov[1].iov_len); + le32_add_cpu(&req->CreateContextsLength, iov[n_iov-1].iov_len); pc_buf = iov[n_iov-1].iov_base; } From cbd54987db25aceea41ddf8ebe6c8fd90f643f35 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 16 Apr 2025 16:12:37 +0200 Subject: [PATCH 030/129] cpufreq: Avoid using inconsistent policy->min and policy->max commit 7491cdf46b5cbdf123fc84fbe0a07e9e3d7b7620 upstream. Since cpufreq_driver_resolve_freq() can run in parallel with cpufreq_set_policy() and there is no synchronization between them, the former may access policy->min and policy->max while the latter is updating them and it may see intermediate values of them due to the way the update is carried out. Also the compiler is free to apply any optimizations it wants both to the stores in cpufreq_set_policy() and to the loads in cpufreq_driver_resolve_freq() which may result in additional inconsistencies. To address this, use WRITE_ONCE() when updating policy->min and policy->max in cpufreq_set_policy() and use READ_ONCE() for reading them in cpufreq_driver_resolve_freq(). Moreover, rearrange the update in cpufreq_set_policy() to avoid storing intermediate values in policy->min and policy->max with the help of the observation that their new values are expected to be properly ordered upfront. Also modify cpufreq_driver_resolve_freq() to take the possible reverse ordering of policy->min and policy->max, which may happen depending on the ordering of operations when this function and cpufreq_set_policy() run concurrently, into account by always honoring the max when it turns out to be less than the min (in case it comes from thermal throttling or similar). Fixes: 151717690694 ("cpufreq: Make policy min/max hard requirements") Cc: 5.16+ # 5.16+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Christian Loehle Acked-by: Viresh Kumar Link: https://patch.msgid.link/5907080.DvuYhMxLoT@rjwysocki.net Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0ac0998152ce..b0ea1da0c148 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -538,8 +538,6 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy, { unsigned int idx; - target_freq = clamp_val(target_freq, policy->min, policy->max); - if (!policy->freq_table) return target_freq; @@ -563,7 +561,22 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy, unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, unsigned int target_freq) { - return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_LE); + unsigned int min = READ_ONCE(policy->min); + unsigned int max = READ_ONCE(policy->max); + + /* + * If this function runs in parallel with cpufreq_set_policy(), it may + * read policy->min before the update and policy->max after the update + * or the other way around, so there is no ordering guarantee. + * + * Resolve this by always honoring the max (in case it comes from + * thermal throttling or similar). + */ + if (unlikely(min > max)) + min = max; + + return __resolve_freq(policy, clamp_val(target_freq, min, max), + CPUFREQ_RELATION_LE); } EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); @@ -2335,6 +2348,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (cpufreq_disabled()) return -ENODEV; + target_freq = clamp_val(target_freq, policy->min, policy->max); target_freq = __resolve_freq(policy, target_freq, relation); pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", @@ -2625,11 +2639,15 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, * Resolve policy min/max to available frequencies. It ensures * no frequency resolution will neither overshoot the requested maximum * nor undershoot the requested minimum. + * + * Avoid storing intermediate values in policy->max or policy->min and + * compiler optimizations around them because they may be accessed + * concurrently by cpufreq_driver_resolve_freq() during the update. */ - policy->min = new_data.min; - policy->max = new_data.max; - policy->min = __resolve_freq(policy, policy->min, CPUFREQ_RELATION_L); - policy->max = __resolve_freq(policy, policy->max, CPUFREQ_RELATION_H); + WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, CPUFREQ_RELATION_H)); + new_data.min = __resolve_freq(policy, new_data.min, CPUFREQ_RELATION_L); + WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min); + trace_cpu_frequency_limits(policy); policy->cached_target_freq = UINT_MAX; From 25687fd196337dd26fd8086cc5cff1bae0798103 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 25 Apr 2025 13:36:21 +0200 Subject: [PATCH 031/129] cpufreq: Fix setting policy limits when frequency tables are used commit b79028039f440e7d2c4df6ab243060c4e3803e84 upstream. Commit 7491cdf46b5c ("cpufreq: Avoid using inconsistent policy->min and policy->max") overlooked the fact that policy->min and policy->max were accessed directly in cpufreq_frequency_table_target() and in the functions called by it. Consequently, the changes made by that commit led to problems with setting policy limits. Address this by passing the target frequency limits to __resolve_freq() and cpufreq_frequency_table_target() and propagating them to the functions called by the latter. Fixes: 7491cdf46b5c ("cpufreq: Avoid using inconsistent policy->min and policy->max") Cc: 5.16+ # 5.16+ Closes: https://lore.kernel.org/linux-pm/aAplED3IA_J0eZN0@linaro.org/ Reported-by: Stephan Gerhold Signed-off-by: Rafael J. Wysocki Tested-by: Stephan Gerhold Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/5896780.DvuYhMxLoT@rjwysocki.net Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq.c | 22 +++++--- drivers/cpufreq/cpufreq_ondemand.c | 3 +- drivers/cpufreq/freq_table.c | 6 +-- include/linux/cpufreq.h | 83 +++++++++++++++++++----------- 4 files changed, 73 insertions(+), 41 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b0ea1da0c148..6682f422cadd 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -534,14 +534,18 @@ void cpufreq_disable_fast_switch(struct cpufreq_policy *policy) EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); static unsigned int __resolve_freq(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) + unsigned int target_freq, + unsigned int min, unsigned int max, + unsigned int relation) { unsigned int idx; + target_freq = clamp_val(target_freq, min, max); + if (!policy->freq_table) return target_freq; - idx = cpufreq_frequency_table_target(policy, target_freq, relation); + idx = cpufreq_frequency_table_target(policy, target_freq, min, max, relation); policy->cached_resolved_idx = idx; policy->cached_target_freq = target_freq; return policy->freq_table[idx].frequency; @@ -575,8 +579,7 @@ unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, if (unlikely(min > max)) min = max; - return __resolve_freq(policy, clamp_val(target_freq, min, max), - CPUFREQ_RELATION_LE); + return __resolve_freq(policy, target_freq, min, max, CPUFREQ_RELATION_LE); } EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); @@ -2348,8 +2351,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (cpufreq_disabled()) return -ENODEV; - target_freq = clamp_val(target_freq, policy->min, policy->max); - target_freq = __resolve_freq(policy, target_freq, relation); + target_freq = __resolve_freq(policy, target_freq, policy->min, + policy->max, relation); pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", policy->cpu, target_freq, relation, old_target_freq); @@ -2644,8 +2647,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, * compiler optimizations around them because they may be accessed * concurrently by cpufreq_driver_resolve_freq() during the update. */ - WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, CPUFREQ_RELATION_H)); - new_data.min = __resolve_freq(policy, new_data.min, CPUFREQ_RELATION_L); + WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, + new_data.min, new_data.max, + CPUFREQ_RELATION_H)); + new_data.min = __resolve_freq(policy, new_data.min, new_data.min, + new_data.max, CPUFREQ_RELATION_L); WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min); trace_cpu_frequency_limits(policy); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index c52d19d67557..65cbd5ecbaf8 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -77,7 +77,8 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, return freq_next; } - index = cpufreq_frequency_table_target(policy, freq_next, relation); + index = cpufreq_frequency_table_target(policy, freq_next, policy->min, + policy->max, relation); freq_req = freq_table[index].frequency; freq_reduc = freq_req * od_tuners->powersave_bias / 1000; freq_avg = freq_req - freq_reduc; diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index c17dc51a5a02..94de089b1453 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -116,8 +116,8 @@ int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy) EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation) { struct cpufreq_frequency_table optimal = { .driver_data = ~0, @@ -148,7 +148,7 @@ int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, cpufreq_for_each_valid_entry_idx(pos, table, i) { freq = pos->frequency; - if ((freq < policy->min) || (freq > policy->max)) + if (freq < min || freq > max) continue; if (freq == target_freq) { optimal.driver_data = i; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 9ca4211c063f..184a84dd467e 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -787,8 +787,8 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation); + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation); int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, unsigned int freq); @@ -853,12 +853,12 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_al(policy, target_freq, @@ -868,6 +868,14 @@ static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_l(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find highest freq at or below target in a table in ascending order */ static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy, unsigned int target_freq, @@ -921,12 +929,12 @@ static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ah(policy, target_freq, @@ -936,6 +944,14 @@ static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_h(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find closest freq to target in a table in ascending order */ static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy, unsigned int target_freq, @@ -1006,12 +1022,12 @@ static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ac(policy, target_freq, @@ -1021,7 +1037,17 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } -static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_c(policy, target_freq, policy->min, policy->max, efficiencies); +} + +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, + unsigned int min, unsigned int max, + int idx) { unsigned int freq; @@ -1030,11 +1056,13 @@ static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) freq = policy->freq_table[idx].frequency; - return freq == clamp_val(freq, policy->min, policy->max); + return freq == clamp_val(freq, min, max); } static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, + unsigned int min, + unsigned int max, unsigned int relation) { bool efficiencies = policy->efficiencies_available && @@ -1045,29 +1073,26 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, relation &= ~CPUFREQ_RELATION_E; if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)) - return cpufreq_table_index_unsorted(policy, target_freq, - relation); + return cpufreq_table_index_unsorted(policy, target_freq, min, + max, relation); retry: switch (relation) { case CPUFREQ_RELATION_L: - idx = cpufreq_table_find_index_l(policy, target_freq, - efficiencies); + idx = find_index_l(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_H: - idx = cpufreq_table_find_index_h(policy, target_freq, - efficiencies); + idx = find_index_h(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_C: - idx = cpufreq_table_find_index_c(policy, target_freq, - efficiencies); + idx = find_index_c(policy, target_freq, min, max, efficiencies); break; default: WARN_ON_ONCE(1); return 0; } - /* Limit frequency index to honor policy->min/max */ - if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { + /* Limit frequency index to honor min and max */ + if (!cpufreq_is_in_limits(policy, min, max, idx) && efficiencies) { efficiencies = false; goto retry; } From 056ebbddb8faf4ddf83d005454dd78fc25c2d897 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Tue, 22 Apr 2025 20:30:25 +0900 Subject: [PATCH 032/129] tracing: Fix oob write in trace_seq_to_buffer() commit f5178c41bb43444a6008150fe6094497135d07cb upstream. syzbot reported this bug: ================================================================== BUG: KASAN: slab-out-of-bounds in trace_seq_to_buffer kernel/trace/trace.c:1830 [inline] BUG: KASAN: slab-out-of-bounds in tracing_splice_read_pipe+0x6be/0xdd0 kernel/trace/trace.c:6822 Write of size 4507 at addr ffff888032b6b000 by task syz.2.320/7260 CPU: 1 UID: 0 PID: 7260 Comm: syz.2.320 Not tainted 6.15.0-rc1-syzkaller-00301-g3bde70a2c827 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xc3/0x670 mm/kasan/report.c:521 kasan_report+0xe0/0x110 mm/kasan/report.c:634 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0xef/0x1a0 mm/kasan/generic.c:189 __asan_memcpy+0x3c/0x60 mm/kasan/shadow.c:106 trace_seq_to_buffer kernel/trace/trace.c:1830 [inline] tracing_splice_read_pipe+0x6be/0xdd0 kernel/trace/trace.c:6822 .... ================================================================== It has been reported that trace_seq_to_buffer() tries to copy more data than PAGE_SIZE to buf. Therefore, to prevent this, we should use the smaller of trace_seq_used(&iter->seq) and PAGE_SIZE as an argument. Link: https://lore.kernel.org/20250422113026.13308-1-aha310510@gmail.com Reported-by: syzbot+c8cd2d2c412b868263fb@syzkaller.appspotmail.com Fixes: 3c56819b14b0 ("tracing: splice support for tracing_pipe") Suggested-by: Steven Rostedt Signed-off-by: Jeongjun Park Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a41c99350a5b..95868c315730 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7027,13 +7027,14 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, /* Copy the data into the page, so we can start over. */ ret = trace_seq_to_buffer(&iter->seq, page_address(spd.pages[i]), - trace_seq_used(&iter->seq)); + min((size_t)trace_seq_used(&iter->seq), + PAGE_SIZE)); if (ret < 0) { __free_page(spd.pages[i]); break; } spd.partial[i].offset = 0; - spd.partial[i].len = trace_seq_used(&iter->seq); + spd.partial[i].len = ret; trace_seq_init(&iter->seq); } From f0b56f1d2fcc5db6e58c168996c7a28d1ba28e99 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 30 Apr 2025 16:19:43 +0800 Subject: [PATCH 033/129] bpf: add find_containing_subprog() utility function commit 27e88bc4df1d80888fe1aaca786a7cc6e69587e2 upstream. Add a utility function, looking for a subprogram containing a given instruction index, rewrite find_subprog() to use this function. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20241210041100.1898468-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Shung-Hsi Yu Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e443506b0a65..f6c0f21be98e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2636,16 +2636,36 @@ static int cmp_subprogs(const void *a, const void *b) ((struct bpf_subprog_info *)b)->start; } +/* Find subprogram that contains instruction at 'off' */ +static struct bpf_subprog_info *find_containing_subprog(struct bpf_verifier_env *env, int off) +{ + struct bpf_subprog_info *vals = env->subprog_info; + int l, r, m; + + if (off >= env->prog->len || off < 0 || env->subprog_cnt == 0) + return NULL; + + l = 0; + r = env->subprog_cnt - 1; + while (l < r) { + m = l + (r - l + 1) / 2; + if (vals[m].start <= off) + l = m; + else + r = m - 1; + } + return &vals[l]; +} + +/* Find subprogram that starts exactly at 'off' */ static int find_subprog(struct bpf_verifier_env *env, int off) { struct bpf_subprog_info *p; - p = bsearch(&off, env->subprog_info, env->subprog_cnt, - sizeof(env->subprog_info[0]), cmp_subprogs); - if (!p) + p = find_containing_subprog(env, off); + if (!p || p->start != off) return -ENOENT; return p - env->subprog_info; - } static int add_subprog(struct bpf_verifier_env *env, int off) From 3d496a1eaaec6372c745c3564cd8fe0d11c3220a Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 30 Apr 2025 16:19:44 +0800 Subject: [PATCH 034/129] bpf: refactor bpf_helper_changes_pkt_data to use helper number commit b238e187b4a2d3b54d80aec05a9cab6466b79dde upstream. Use BPF helper number instead of function pointer in bpf_helper_changes_pkt_data(). This would simplify usage of this function in verifier.c:check_cfg() (in a follow-up patch), where only helper number is easily available and there is no real need to lookup helper proto. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20241210041100.1898468-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Shung-Hsi Yu Signed-off-by: Greg Kroah-Hartman --- include/linux/filter.h | 2 +- kernel/bpf/core.c | 2 +- kernel/bpf/verifier.c | 2 +- net/core/filter.c | 61 +++++++++++++++++++----------------------- 4 files changed, 30 insertions(+), 37 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 5090e940ba3e..adf65eacade0 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -915,7 +915,7 @@ bool bpf_jit_needs_zext(void); bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_jit_supports_far_kfunc_call(void); -bool bpf_helper_changes_pkt_data(void *func); +bool bpf_helper_changes_pkt_data(enum bpf_func_id func_id); static inline bool bpf_dump_raw_ok(const struct cred *cred) { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 02f327f05fd6..81fd1bb99416 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2893,7 +2893,7 @@ void __weak bpf_jit_compile(struct bpf_prog *prog) { } -bool __weak bpf_helper_changes_pkt_data(void *func) +bool __weak bpf_helper_changes_pkt_data(enum bpf_func_id func_id) { return false; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f6c0f21be98e..899fa7dcd417 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10007,7 +10007,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } /* With LD_ABS/IND some JITs save/restore skb from r1. */ - changes_data = bpf_helper_changes_pkt_data(fn->func); + changes_data = bpf_helper_changes_pkt_data(func_id); if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n", func_id_name(func_id), func_id); diff --git a/net/core/filter.c b/net/core/filter.c index 39eef3370d80..1127ab198409 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7868,42 +7868,35 @@ static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = { #endif /* CONFIG_INET */ -bool bpf_helper_changes_pkt_data(void *func) +bool bpf_helper_changes_pkt_data(enum bpf_func_id func_id) { - if (func == bpf_skb_vlan_push || - func == bpf_skb_vlan_pop || - func == bpf_skb_store_bytes || - func == bpf_skb_change_proto || - func == bpf_skb_change_head || - func == sk_skb_change_head || - func == bpf_skb_change_tail || - func == sk_skb_change_tail || - func == bpf_skb_adjust_room || - func == sk_skb_adjust_room || - func == bpf_skb_pull_data || - func == sk_skb_pull_data || - func == bpf_clone_redirect || - func == bpf_l3_csum_replace || - func == bpf_l4_csum_replace || - func == bpf_xdp_adjust_head || - func == bpf_xdp_adjust_meta || - func == bpf_msg_pull_data || - func == bpf_msg_push_data || - func == bpf_msg_pop_data || - func == bpf_xdp_adjust_tail || -#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) - func == bpf_lwt_seg6_store_bytes || - func == bpf_lwt_seg6_adjust_srh || - func == bpf_lwt_seg6_action || -#endif -#ifdef CONFIG_INET - func == bpf_sock_ops_store_hdr_opt || -#endif - func == bpf_lwt_in_push_encap || - func == bpf_lwt_xmit_push_encap) + switch (func_id) { + case BPF_FUNC_clone_redirect: + case BPF_FUNC_l3_csum_replace: + case BPF_FUNC_l4_csum_replace: + case BPF_FUNC_lwt_push_encap: + case BPF_FUNC_lwt_seg6_action: + case BPF_FUNC_lwt_seg6_adjust_srh: + case BPF_FUNC_lwt_seg6_store_bytes: + case BPF_FUNC_msg_pop_data: + case BPF_FUNC_msg_pull_data: + case BPF_FUNC_msg_push_data: + case BPF_FUNC_skb_adjust_room: + case BPF_FUNC_skb_change_head: + case BPF_FUNC_skb_change_proto: + case BPF_FUNC_skb_change_tail: + case BPF_FUNC_skb_pull_data: + case BPF_FUNC_skb_store_bytes: + case BPF_FUNC_skb_vlan_pop: + case BPF_FUNC_skb_vlan_push: + case BPF_FUNC_store_hdr_opt: + case BPF_FUNC_xdp_adjust_head: + case BPF_FUNC_xdp_adjust_meta: + case BPF_FUNC_xdp_adjust_tail: return true; - - return false; + default: + return false; + } } const struct bpf_func_proto bpf_event_output_data_proto __weak; From 79751e9227a5910c0e5a2c7186877d91821d957d Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 30 Apr 2025 16:19:45 +0800 Subject: [PATCH 035/129] bpf: track changes_pkt_data property for global functions commit 51081a3f25c742da5a659d7fc6fd77ebfdd555be upstream. When processing calls to certain helpers, verifier invalidates all packet pointers in a current state. For example, consider the following program: __attribute__((__noinline__)) long skb_pull_data(struct __sk_buff *sk, __u32 len) { return bpf_skb_pull_data(sk, len); } SEC("tc") int test_invalidate_checks(struct __sk_buff *sk) { int *p = (void *)(long)sk->data; if ((void *)(p + 1) > (void *)(long)sk->data_end) return TCX_DROP; skb_pull_data(sk, 0); *p = 42; return TCX_PASS; } After a call to bpf_skb_pull_data() the pointer 'p' can't be used safely. See function filter.c:bpf_helper_changes_pkt_data() for a list of such helpers. At the moment verifier invalidates packet pointers when processing helper function calls, and does not traverse global sub-programs when processing calls to global sub-programs. This means that calls to helpers done from global sub-programs do not invalidate pointers in the caller state. E.g. the program above is unsafe, but is not rejected by verifier. This commit fixes the omission by computing field bpf_subprog_info->changes_pkt_data for each sub-program before main verification pass. changes_pkt_data should be set if: - subprogram calls helper for which bpf_helper_changes_pkt_data returns true; - subprogram calls a global function, for which bpf_subprog_info->changes_pkt_data should be set. The verifier.c:check_cfg() pass is modified to compute this information. The commit relies on depth first instruction traversal done by check_cfg() and absence of recursive function calls: - check_cfg() would eventually visit every call to subprogram S in a state when S is fully explored; - when S is fully explored: - every direct helper call within S is explored (and thus changes_pkt_data is set if needed); - every call to subprogram S1 called by S was visited with S1 fully explored (and thus S inherits changes_pkt_data from S1). The downside of such approach is that dead code elimination is not taken into account: if a helper call inside global function is dead because of current configuration, verifier would conservatively assume that the call occurs for the purpose of the changes_pkt_data computation. Reported-by: Nick Zavaritsky Closes: https://lore.kernel.org/bpf/0498CA22-5779-4767-9C0C-A9515CEA711F@gmail.com/ Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20241210041100.1898468-4-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov [shung-hsi.yu: do not use bitfield in "struct bpf_subprog_info" because commit 406a6fa44bfb ("bpf: use bitfields for simple per-subprog bool flags") is not present and minor context difference in check_func_call() because commit 491dd8edecbc ("bpf: Emit global subprog name in verifier logs") is not present. ] Signed-off-by: Shung-Hsi Yu Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 32 +++++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 92919d52f7e1..32e89758176b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -573,6 +573,7 @@ struct bpf_subprog_info { bool tail_call_reachable; bool has_ld_abs; bool is_async_cb; + bool changes_pkt_data; }; struct bpf_verifier_env; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 899fa7dcd417..3ba6c13bb221 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9364,6 +9364,8 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (env->log.level & BPF_LOG_LEVEL) verbose(env, "Func#%d is global and valid. Skipping.\n", subprog); + if (env->subprog_info[subprog].changes_pkt_data) + clear_all_pkt_pointers(env); clear_caller_saved_regs(env, caller->regs); /* All global functions return a 64-bit SCALAR_VALUE */ @@ -15114,6 +15116,29 @@ static int check_return_code(struct bpf_verifier_env *env) return 0; } +static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off) +{ + struct bpf_subprog_info *subprog; + + subprog = find_containing_subprog(env, off); + subprog->changes_pkt_data = true; +} + +/* 't' is an index of a call-site. + * 'w' is a callee entry point. + * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED. + * Rely on DFS traversal order and absence of recursive calls to guarantee that + * callee's change_pkt_data marks would be correct at that moment. + */ +static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w) +{ + struct bpf_subprog_info *caller, *callee; + + caller = find_containing_subprog(env, t); + callee = find_containing_subprog(env, w); + caller->changes_pkt_data |= callee->changes_pkt_data; +} + /* non-recursive DFS pseudo code * 1 procedure DFS-iterative(G,v): * 2 label v as discovered @@ -15247,6 +15272,7 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, bool visit_callee) { int ret, insn_sz; + int w; insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1; ret = push_insn(t, t + insn_sz, FALLTHROUGH, env); @@ -15258,8 +15284,10 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, mark_jmp_point(env, t + insn_sz); if (visit_callee) { + w = t + insns[t].imm + 1; mark_prune_point(env, t); - ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); + merge_callee_effects(env, t, w); + ret = push_insn(t, w, BRANCH, env); } return ret; } @@ -15311,6 +15339,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env) mark_prune_point(env, t); mark_jmp_point(env, t); } + if (bpf_helper_call(insn) && bpf_helper_changes_pkt_data(insn->imm)) + mark_subprog_changes_pkt_data(env, t); if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { struct bpf_kfunc_call_arg_meta meta; From 85a30a46396a99d4ee864c871ec14533fcc769cc Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 30 Apr 2025 16:19:46 +0800 Subject: [PATCH 036/129] selftests/bpf: test for changing packet data from global functions commit 3f23ee5590d9605dbde9a5e1d4b97637a4803329 upstream. Check if verifier is aware of packet pointers invalidation done in global functions. Based on a test shared by Nick Zavaritsky in [0]. [0] https://lore.kernel.org/bpf/0498CA22-5779-4767-9C0C-A9515CEA711F@gmail.com/ Suggested-by: Nick Zavaritsky Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20241210041100.1898468-5-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Shung-Hsi Yu Signed-off-by: Greg Kroah-Hartman --- .../selftests/bpf/progs/verifier_sock.c | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c index ee76b51005ab..e85f0f1deac7 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sock.c +++ b/tools/testing/selftests/bpf/progs/verifier_sock.c @@ -977,4 +977,32 @@ l1_%=: r0 = *(u8*)(r7 + 0); \ : __clobber_all); } +__noinline +long skb_pull_data2(struct __sk_buff *sk, __u32 len) +{ + return bpf_skb_pull_data(sk, len); +} + +__noinline +long skb_pull_data1(struct __sk_buff *sk, __u32 len) +{ + return skb_pull_data2(sk, len); +} + +/* global function calls bpf_skb_pull_data(), which invalidates packet + * pointers established before global function call. + */ +SEC("tc") +__failure __msg("invalid mem access") +int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk) +{ + int *p = (void *)(long)sk->data; + + if ((void *)(p + 1) > (void *)(long)sk->data_end) + return TCX_DROP; + skb_pull_data1(sk, 0); + *p = 42; /* this is unsafe */ + return TCX_PASS; +} + char _license[] SEC("license") = "GPL"; From 7197fc4acdf238ec8ad06de5a8235df0c1f9c7d7 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 30 Apr 2025 16:19:47 +0800 Subject: [PATCH 037/129] bpf: check changes_pkt_data property for extension programs commit 81f6d0530ba031b5f038a091619bf2ff29568852 upstream. When processing calls to global sub-programs, verifier decides whether to invalidate all packet pointers in current state depending on the changes_pkt_data property of the global sub-program. Because of this, an extension program replacing a global sub-program must be compatible with changes_pkt_data property of the sub-program being replaced. This commit: - adds changes_pkt_data flag to struct bpf_prog_aux: - this flag is set in check_cfg() for main sub-program; - in jit_subprogs() for other sub-programs; - modifies bpf_check_attach_btf_id() to check changes_pkt_data flag; - moves call to check_attach_btf_id() after the call to check_cfg(), because it needs changes_pkt_data flag to be set: bpf_check: ... ... - check_attach_btf_id resolve_pseudo_ldimm64 resolve_pseudo_ldimm64 --> bpf_prog_is_offloaded bpf_prog_is_offloaded check_cfg check_cfg + check_attach_btf_id ... ... The following fields are set by check_attach_btf_id(): - env->ops - prog->aux->attach_btf_trace - prog->aux->attach_func_name - prog->aux->attach_func_proto - prog->aux->dst_trampoline - prog->aux->mod - prog->aux->saved_dst_attach_type - prog->aux->saved_dst_prog_type - prog->expected_attach_type Neither of these fields are used by resolve_pseudo_ldimm64() or bpf_prog_offload_verifier_prep() (for netronome and netdevsim drivers), so the reordering is safe. Suggested-by: Alexei Starovoitov Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20241210041100.1898468-6-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov [ shung-hsi.yu: adapt to missing fields in "struct bpf_prog_aux". Context difference in jit_subprogs() because BPF Exception is not supported. Context difference in bpf_check() because commit 5b5f51bff1b6 "bpf: no_caller_saved_registers attribute for helper calls" is not present. ] Signed-off-by: Shung-Hsi Yu Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf.h | 1 + kernel/bpf/verifier.c | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 035e627f94f6..17de12a98f85 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1430,6 +1430,7 @@ struct bpf_prog_aux { bool sleepable; bool tail_call_reachable; bool xdp_has_frags; + bool changes_pkt_data; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3ba6c13bb221..9fe9f72a6194 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15462,6 +15462,7 @@ static int check_cfg(struct bpf_verifier_env *env) } } ret = 0; /* cfg looks good */ + env->prog->aux->changes_pkt_data = env->subprog_info[0].changes_pkt_data; err_free: kvfree(insn_state); @@ -18622,6 +18623,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) } func[i]->aux->num_exentries = num_exentries; func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable; + func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data; func[i] = bpf_int_jit_compile(func[i]); if (!func[i]->jited) { err = -ENOTSUPP; @@ -19934,6 +19936,12 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, "Extension programs should be JITed\n"); return -EINVAL; } + if (prog->aux->changes_pkt_data && + !aux->func[subprog]->aux->changes_pkt_data) { + bpf_log(log, + "Extension program changes packet data, while original does not\n"); + return -EINVAL; + } } if (!tgt_prog->jited) { bpf_log(log, "Can attach to only JITed progs\n"); @@ -20393,10 +20401,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret < 0) goto skip_full_check; - ret = check_attach_btf_id(env); - if (ret) - goto skip_full_check; - ret = resolve_pseudo_ldimm64(env); if (ret < 0) goto skip_full_check; @@ -20411,6 +20415,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret < 0) goto skip_full_check; + ret = check_attach_btf_id(env); + if (ret) + goto skip_full_check; + ret = do_check_subprogs(env); ret = ret ?: do_check_main(env); From 70234fbfa266266723fe58d36ce5f9aa599bdcd8 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 30 Apr 2025 16:19:48 +0800 Subject: [PATCH 038/129] selftests/bpf: freplace tests for tracking of changes_packet_data commit 89ff40890d8f12a7d7e93fb602cc27562f3834f0 upstream. Try different combinations of global functions replacement: - replace function that changes packet data with one that doesn't; - replace function that changes packet data with one that does; - replace function that doesn't change packet data with one that does; - replace function that doesn't change packet data with one that doesn't; Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20241210041100.1898468-7-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Shung-Hsi Yu Signed-off-by: Greg Kroah-Hartman --- .../bpf/prog_tests/changes_pkt_data.c | 76 +++++++++++++++++++ .../selftests/bpf/progs/changes_pkt_data.c | 26 +++++++ .../bpf/progs/changes_pkt_data_freplace.c | 18 +++++ 3 files changed, 120 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c create mode 100644 tools/testing/selftests/bpf/progs/changes_pkt_data.c create mode 100644 tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c new file mode 100644 index 000000000000..c0c7202f6c5c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bpf/libbpf.h" +#include "changes_pkt_data_freplace.skel.h" +#include "changes_pkt_data.skel.h" +#include + +static void print_verifier_log(const char *log) +{ + if (env.verbosity >= VERBOSE_VERY) + fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); +} + +static void test_aux(const char *main_prog_name, const char *freplace_prog_name, bool expect_load) +{ + struct changes_pkt_data_freplace *freplace = NULL; + struct bpf_program *freplace_prog = NULL; + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct changes_pkt_data *main = NULL; + char log[16*1024]; + int err; + + opts.kernel_log_buf = log; + opts.kernel_log_size = sizeof(log); + if (env.verbosity >= VERBOSE_SUPER) + opts.kernel_log_level = 1 | 2 | 4; + main = changes_pkt_data__open_opts(&opts); + if (!ASSERT_OK_PTR(main, "changes_pkt_data__open")) + goto out; + err = changes_pkt_data__load(main); + print_verifier_log(log); + if (!ASSERT_OK(err, "changes_pkt_data__load")) + goto out; + freplace = changes_pkt_data_freplace__open_opts(&opts); + if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open")) + goto out; + freplace_prog = bpf_object__find_program_by_name(freplace->obj, freplace_prog_name); + if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) + goto out; + bpf_program__set_autoload(freplace_prog, true); + bpf_program__set_autoattach(freplace_prog, true); + bpf_program__set_attach_target(freplace_prog, + bpf_program__fd(main->progs.dummy), + main_prog_name); + err = changes_pkt_data_freplace__load(freplace); + print_verifier_log(log); + if (expect_load) { + ASSERT_OK(err, "changes_pkt_data_freplace__load"); + } else { + ASSERT_ERR(err, "changes_pkt_data_freplace__load"); + ASSERT_HAS_SUBSTR(log, "Extension program changes packet data", "error log"); + } + +out: + changes_pkt_data_freplace__destroy(freplace); + changes_pkt_data__destroy(main); +} + +/* There are two global subprograms in both changes_pkt_data.skel.h: + * - one changes packet data; + * - another does not. + * It is ok to freplace subprograms that change packet data with those + * that either do or do not. It is only ok to freplace subprograms + * that do not change packet data with those that do not as well. + * The below tests check outcomes for each combination of such freplace. + */ +void test_changes_pkt_data_freplace(void) +{ + if (test__start_subtest("changes_with_changes")) + test_aux("changes_pkt_data", "changes_pkt_data", true); + if (test__start_subtest("changes_with_doesnt_change")) + test_aux("changes_pkt_data", "does_not_change_pkt_data", true); + if (test__start_subtest("doesnt_change_with_changes")) + test_aux("does_not_change_pkt_data", "changes_pkt_data", false); + if (test__start_subtest("doesnt_change_with_doesnt_change")) + test_aux("does_not_change_pkt_data", "does_not_change_pkt_data", true); +} diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data.c b/tools/testing/selftests/bpf/progs/changes_pkt_data.c new file mode 100644 index 000000000000..f87da8e9d6b3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +__noinline +long changes_pkt_data(struct __sk_buff *sk, __u32 len) +{ + return bpf_skb_pull_data(sk, len); +} + +__noinline __weak +long does_not_change_pkt_data(struct __sk_buff *sk, __u32 len) +{ + return 0; +} + +SEC("tc") +int dummy(struct __sk_buff *sk) +{ + changes_pkt_data(sk, 0); + does_not_change_pkt_data(sk, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c new file mode 100644 index 000000000000..0e525beb8603 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +SEC("?freplace") +long changes_pkt_data(struct __sk_buff *sk, __u32 len) +{ + return bpf_skb_pull_data(sk, len); +} + +SEC("?freplace") +long does_not_change_pkt_data(struct __sk_buff *sk, __u32 len) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; From f1692ee23dcaaddc24ba407b269707ee5df1301f Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 30 Apr 2025 16:19:49 +0800 Subject: [PATCH 039/129] bpf: consider that tail calls invalidate packet pointers commit 1a4607ffba35bf2a630aab299e34dd3f6e658d70 upstream. Tail-called programs could execute any of the helpers that invalidate packet pointers. Hence, conservatively assume that each tail call invalidates packet pointers. Making the change in bpf_helper_changes_pkt_data() automatically makes use of check_cfg() logic that computes 'changes_pkt_data' effect for global sub-programs, such that the following program could be rejected: int tail_call(struct __sk_buff *sk) { bpf_tail_call_static(sk, &jmp_table, 0); return 0; } SEC("tc") int not_safe(struct __sk_buff *sk) { int *p = (void *)(long)sk->data; ... make p valid ... tail_call(sk); *p = 42; /* this is unsafe */ ... } The tc_bpf2bpf.c:subprog_tc() needs change: mark it as a function that can invalidate packet pointers. Otherwise, it can't be freplaced with tailcall_freplace.c:entry_freplace() that does a tail call. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20241210041100.1898468-8-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov [ shung-hsi.yu: drop changes to tools/testing/selftests/bpf/progs/tc_bpf2bpf.c because it is not present. ] Signed-off-by: Shung-Hsi Yu Signed-off-by: Greg Kroah-Hartman --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index 1127ab198409..c4c27e03279c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7893,6 +7893,8 @@ bool bpf_helper_changes_pkt_data(enum bpf_func_id func_id) case BPF_FUNC_xdp_adjust_head: case BPF_FUNC_xdp_adjust_meta: case BPF_FUNC_xdp_adjust_tail: + /* tail-called program could call any of the above */ + case BPF_FUNC_tail_call: return true; default: return false; From b7c0d2d4ef13e0ce1c5e1d0e01eeb586bea709f4 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 30 Apr 2025 16:19:50 +0800 Subject: [PATCH 040/129] selftests/bpf: validate that tail call invalidates packet pointers commit d9706b56e13b7916461ca6b4b731e169ed44ed09 upstream. Add a test case with a tail call done from a global sub-program. Such tails calls should be considered as invalidating packet pointers. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20241210041100.1898468-9-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Shung-Hsi Yu Signed-off-by: Greg Kroah-Hartman --- .../selftests/bpf/progs/verifier_sock.c | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c index e85f0f1deac7..3c8f6646e33d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sock.c +++ b/tools/testing/selftests/bpf/progs/verifier_sock.c @@ -50,6 +50,13 @@ struct { __uint(map_flags, BPF_F_NO_PREALLOC); } sk_storage_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + SEC("cgroup/skb") __description("skb->sk: no NULL check") __failure __msg("invalid mem access 'sock_common_or_null'") @@ -1005,4 +1012,25 @@ int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk) return TCX_PASS; } +__noinline +int tail_call(struct __sk_buff *sk) +{ + bpf_tail_call_static(sk, &jmp_table, 0); + return 0; +} + +/* Tail calls invalidate packet pointers. */ +SEC("tc") +__failure __msg("invalid mem access") +int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk) +{ + int *p = (void *)(long)sk->data; + + if ((void *)(p + 1) > (void *)(long)sk->data_end) + return TCX_DROP; + tail_call(sk); + *p = 42; /* this is unsafe */ + return TCX_PASS; +} + char _license[] SEC("license") = "GPL"; From 3a467d938da2830009f2bea21b497377694a32a0 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 30 Apr 2025 16:19:51 +0800 Subject: [PATCH 041/129] bpf: fix null dereference when computing changes_pkt_data of prog w/o subprogs commit ac6542ad92759cda383ad62b4e4cbfc28136abc1 upstream. bpf_prog_aux->func field might be NULL if program does not have subprograms except for main sub-program. The fixed commit does bpf_prog_aux->func access unconditionally, which might lead to null pointer dereference. The bug could be triggered by replacing the following BPF program: SEC("tc") int main_changes(struct __sk_buff *sk) { bpf_skb_pull_data(sk, 0); return 0; } With the following BPF program: SEC("freplace") long changes_pkt_data(struct __sk_buff *sk) { return bpf_skb_pull_data(sk, 0); } bpf_prog_aux instance itself represents the main sub-program, use this property to fix the bug. Fixes: 81f6d0530ba0 ("bpf: check changes_pkt_data property for extension programs") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202412111822.qGw6tOyB-lkp@intel.com/ Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20241212070711.427443-1-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Shung-Hsi Yu Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9fe9f72a6194..756e179a1efe 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -19908,6 +19908,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, } if (tgt_prog) { struct bpf_prog_aux *aux = tgt_prog->aux; + bool tgt_changes_pkt_data; if (bpf_prog_is_dev_bound(prog->aux) && !bpf_prog_dev_bound_match(prog, tgt_prog)) { @@ -19936,8 +19937,10 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, "Extension programs should be JITed\n"); return -EINVAL; } - if (prog->aux->changes_pkt_data && - !aux->func[subprog]->aux->changes_pkt_data) { + tgt_changes_pkt_data = aux->func + ? aux->func[subprog]->aux->changes_pkt_data + : aux->changes_pkt_data; + if (prog->aux->changes_pkt_data && !tgt_changes_pkt_data) { bpf_log(log, "Extension program changes packet data, while original does not\n"); return -EINVAL; From 47e24c86cb20515d1da56aadf063775232e22336 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 30 Apr 2025 16:19:52 +0800 Subject: [PATCH 042/129] selftests/bpf: extend changes_pkt_data with cases w/o subprograms commit 04789af756a4a43e72986185f66f148e65b32fed upstream. Extend changes_pkt_data tests with test cases freplacing the main program that does not have subprograms. Try four combinations when both main program and replacement do and do not change packet data. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20241212070711.427443-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Shung-Hsi Yu Signed-off-by: Greg Kroah-Hartman --- .../bpf/prog_tests/changes_pkt_data.c | 55 +++++++++++++++---- .../selftests/bpf/progs/changes_pkt_data.c | 27 ++++++--- .../bpf/progs/changes_pkt_data_freplace.c | 6 +- 3 files changed, 66 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c index c0c7202f6c5c..7526de379081 100644 --- a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c +++ b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c @@ -10,10 +10,14 @@ static void print_verifier_log(const char *log) fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); } -static void test_aux(const char *main_prog_name, const char *freplace_prog_name, bool expect_load) +static void test_aux(const char *main_prog_name, + const char *to_be_replaced, + const char *replacement, + bool expect_load) { struct changes_pkt_data_freplace *freplace = NULL; struct bpf_program *freplace_prog = NULL; + struct bpf_program *main_prog = NULL; LIBBPF_OPTS(bpf_object_open_opts, opts); struct changes_pkt_data *main = NULL; char log[16*1024]; @@ -26,6 +30,10 @@ static void test_aux(const char *main_prog_name, const char *freplace_prog_name, main = changes_pkt_data__open_opts(&opts); if (!ASSERT_OK_PTR(main, "changes_pkt_data__open")) goto out; + main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name); + if (!ASSERT_OK_PTR(main_prog, "main_prog")) + goto out; + bpf_program__set_autoload(main_prog, true); err = changes_pkt_data__load(main); print_verifier_log(log); if (!ASSERT_OK(err, "changes_pkt_data__load")) @@ -33,14 +41,14 @@ static void test_aux(const char *main_prog_name, const char *freplace_prog_name, freplace = changes_pkt_data_freplace__open_opts(&opts); if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open")) goto out; - freplace_prog = bpf_object__find_program_by_name(freplace->obj, freplace_prog_name); + freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement); if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) goto out; bpf_program__set_autoload(freplace_prog, true); bpf_program__set_autoattach(freplace_prog, true); bpf_program__set_attach_target(freplace_prog, - bpf_program__fd(main->progs.dummy), - main_prog_name); + bpf_program__fd(main_prog), + to_be_replaced); err = changes_pkt_data_freplace__load(freplace); print_verifier_log(log); if (expect_load) { @@ -62,15 +70,38 @@ out: * that either do or do not. It is only ok to freplace subprograms * that do not change packet data with those that do not as well. * The below tests check outcomes for each combination of such freplace. + * Also test a case when main subprogram itself is replaced and is a single + * subprogram in a program. */ void test_changes_pkt_data_freplace(void) { - if (test__start_subtest("changes_with_changes")) - test_aux("changes_pkt_data", "changes_pkt_data", true); - if (test__start_subtest("changes_with_doesnt_change")) - test_aux("changes_pkt_data", "does_not_change_pkt_data", true); - if (test__start_subtest("doesnt_change_with_changes")) - test_aux("does_not_change_pkt_data", "changes_pkt_data", false); - if (test__start_subtest("doesnt_change_with_doesnt_change")) - test_aux("does_not_change_pkt_data", "does_not_change_pkt_data", true); + struct { + const char *main; + const char *to_be_replaced; + bool changes; + } mains[] = { + { "main_with_subprogs", "changes_pkt_data", true }, + { "main_with_subprogs", "does_not_change_pkt_data", false }, + { "main_changes", "main_changes", true }, + { "main_does_not_change", "main_does_not_change", false }, + }; + struct { + const char *func; + bool changes; + } replacements[] = { + { "changes_pkt_data", true }, + { "does_not_change_pkt_data", false } + }; + char buf[64]; + + for (int i = 0; i < ARRAY_SIZE(mains); ++i) { + for (int j = 0; j < ARRAY_SIZE(replacements); ++j) { + snprintf(buf, sizeof(buf), "%s_with_%s", + mains[i].to_be_replaced, replacements[j].func); + if (!test__start_subtest(buf)) + continue; + test_aux(mains[i].main, mains[i].to_be_replaced, replacements[j].func, + mains[i].changes || !replacements[j].changes); + } + } } diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data.c b/tools/testing/selftests/bpf/progs/changes_pkt_data.c index f87da8e9d6b3..43cada48b28a 100644 --- a/tools/testing/selftests/bpf/progs/changes_pkt_data.c +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data.c @@ -4,22 +4,35 @@ #include __noinline -long changes_pkt_data(struct __sk_buff *sk, __u32 len) +long changes_pkt_data(struct __sk_buff *sk) { - return bpf_skb_pull_data(sk, len); + return bpf_skb_pull_data(sk, 0); } __noinline __weak -long does_not_change_pkt_data(struct __sk_buff *sk, __u32 len) +long does_not_change_pkt_data(struct __sk_buff *sk) { return 0; } -SEC("tc") -int dummy(struct __sk_buff *sk) +SEC("?tc") +int main_with_subprogs(struct __sk_buff *sk) +{ + changes_pkt_data(sk); + does_not_change_pkt_data(sk); + return 0; +} + +SEC("?tc") +int main_changes(struct __sk_buff *sk) +{ + bpf_skb_pull_data(sk, 0); + return 0; +} + +SEC("?tc") +int main_does_not_change(struct __sk_buff *sk) { - changes_pkt_data(sk, 0); - does_not_change_pkt_data(sk, 0); return 0; } diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c index 0e525beb8603..f9a622705f1b 100644 --- a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c @@ -4,13 +4,13 @@ #include SEC("?freplace") -long changes_pkt_data(struct __sk_buff *sk, __u32 len) +long changes_pkt_data(struct __sk_buff *sk) { - return bpf_skb_pull_data(sk, len); + return bpf_skb_pull_data(sk, 0); } SEC("?freplace") -long does_not_change_pkt_data(struct __sk_buff *sk, __u32 len) +long does_not_change_pkt_data(struct __sk_buff *sk) { return 0; } From 28cfd94980bb69033718a9d684a44ccc13c32821 Mon Sep 17 00:00:00 2001 From: Ryan Matthews Date: Sun, 4 May 2025 15:13:55 -0400 Subject: [PATCH 043/129] Revert "PCI: imx6: Skip controller_id generation logic for i.MX7D" This reverts commit 2a12efc567a270a155e3b886258297abd79cdea0 which is commit f068ffdd034c93f0c768acdc87d4d2d7023c1379 upstream. This is a backport mistake. Deleting "IMX7D" here skips more than just controller_id logic. It skips reset assignments too, which causes: imx6q-pcie 33800000.pcie: PCIe PLL lock timeout In my case, in addition to broken PCIe, kernel boot hangs entirely. This isn't a problem upstream because before this, they moved the rest of the code out of the switch case. Signed-off-by: Ryan Matthews Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pci-imx6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 822a750b064b..20c8f2cba453 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -1281,6 +1281,7 @@ static int imx6_pcie_probe(struct platform_device *pdev) switch (imx6_pcie->drvdata->variant) { case IMX8MQ: case IMX8MQ_EP: + case IMX7D: if (dbi_base->start == IMX8MQ_PCIE2_BASE_ADDR) imx6_pcie->controller_id = 1; From 46acbfbcc0e26efee6e3cfcc00ed2012c877f6ef Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Sun, 4 May 2025 15:13:56 -0400 Subject: [PATCH 044/129] PCI: imx6: Skip controller_id generation logic for i.MX7D MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f068ffdd034c93f0c768acdc87d4d2d7023c1379 ] The i.MX7D only has one PCIe controller, so controller_id should always be 0. The previous code is incorrect although yielding the correct result. Fix by removing "IMX7D" from the switch case branch. Fixes: 2d8ed461dbc9 ("PCI: imx6: Add support for i.MX8MQ") Link: https://lore.kernel.org/r/20241126075702.4099164-5-hongxing.zhu@nxp.com Signed-off-by: Richard Zhu Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Reviewed-by: Frank Li [Because this switch case does more than just controller_id logic, move the "IMX7D" case label instead of removing it entirely.] Signed-off-by: Ryan Matthews Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pci-imx6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 20c8f2cba453..cedfbd425863 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -1281,10 +1281,10 @@ static int imx6_pcie_probe(struct platform_device *pdev) switch (imx6_pcie->drvdata->variant) { case IMX8MQ: case IMX8MQ_EP: - case IMX7D: if (dbi_base->start == IMX8MQ_PCIE2_BASE_ADDR) imx6_pcie->controller_id = 1; - + fallthrough; + case IMX7D: imx6_pcie->pciephy_reset = devm_reset_control_get_exclusive(dev, "pciephy"); if (IS_ERR(imx6_pcie->pciephy_reset)) { From a1723e9c53fe6431415be19302a56543daf503f5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 24 Jan 2025 17:18:33 -0800 Subject: [PATCH 045/129] KVM: x86: Load DR6 with guest value only before entering .vcpu_run() loop commit c2fee09fc167c74a64adb08656cb993ea475197e upstream. Move the conditional loading of hardware DR6 with the guest's DR6 value out of the core .vcpu_run() loop to fix a bug where KVM can load hardware with a stale vcpu->arch.dr6. When the guest accesses a DR and host userspace isn't debugging the guest, KVM disables DR interception and loads the guest's values into hardware on VM-Enter and saves them on VM-Exit. This allows the guest to access DRs at will, e.g. so that a sequence of DR accesses to configure a breakpoint only generates one VM-Exit. For DR0-DR3, the logic/behavior is identical between VMX and SVM, and also identical between KVM_DEBUGREG_BP_ENABLED (userspace debugging the guest) and KVM_DEBUGREG_WONT_EXIT (guest using DRs), and so KVM handles loading DR0-DR3 in common code, _outside_ of the core kvm_x86_ops.vcpu_run() loop. But for DR6, the guest's value doesn't need to be loaded into hardware for KVM_DEBUGREG_BP_ENABLED, and SVM provides a dedicated VMCB field whereas VMX requires software to manually load the guest value, and so loading the guest's value into DR6 is handled by {svm,vmx}_vcpu_run(), i.e. is done _inside_ the core run loop. Unfortunately, saving the guest values on VM-Exit is initiated by common x86, again outside of the core run loop. If the guest modifies DR6 (in hardware, when DR interception is disabled), and then the next VM-Exit is a fastpath VM-Exit, KVM will reload hardware DR6 with vcpu->arch.dr6 and clobber the guest's actual value. The bug shows up primarily with nested VMX because KVM handles the VMX preemption timer in the fastpath, and the window between hardware DR6 being modified (in guest context) and DR6 being read by guest software is orders of magnitude larger in a nested setup. E.g. in non-nested, the VMX preemption timer would need to fire precisely between #DB injection and the #DB handler's read of DR6, whereas with a KVM-on-KVM setup, the window where hardware DR6 is "dirty" extends all the way from L1 writing DR6 to VMRESUME (in L1). L1's view: ========== CPU 0/KVM-7289 [023] d.... 2925.640961: kvm_entry: vcpu 0 A: L1 Writes DR6 CPU 0/KVM-7289 [023] d.... 2925.640963: : Set DRs, DR6 = 0xffff0ff1 B: CPU 0/KVM-7289 [023] d.... 2925.640967: kvm_exit: vcpu 0 reason EXTERNAL_INTERRUPT intr_info 0x800000ec D: L1 reads DR6, arch.dr6 = 0 CPU 0/KVM-7289 [023] d.... 2925.640969: : Sync DRs, DR6 = 0xffff0ff0 CPU 0/KVM-7289 [023] d.... 2925.640976: kvm_entry: vcpu 0 L2 reads DR6, L1 disables DR interception CPU 0/KVM-7289 [023] d.... 2925.640980: kvm_exit: vcpu 0 reason DR_ACCESS info1 0x0000000000000216 CPU 0/KVM-7289 [023] d.... 2925.640983: kvm_entry: vcpu 0 CPU 0/KVM-7289 [023] d.... 2925.640983: : Set DRs, DR6 = 0xffff0ff0 L2 detects failure CPU 0/KVM-7289 [023] d.... 2925.640987: kvm_exit: vcpu 0 reason HLT L1 reads DR6 (confirms failure) CPU 0/KVM-7289 [023] d.... 2925.640990: : Sync DRs, DR6 = 0xffff0ff0 L0's view: ========== L2 reads DR6, arch.dr6 = 0 CPU 23/KVM-5046 [001] d.... 3410.005610: kvm_exit: vcpu 23 reason DR_ACCESS info1 0x0000000000000216 CPU 23/KVM-5046 [001] ..... 3410.005610: kvm_nested_vmexit: vcpu 23 reason DR_ACCESS info1 0x0000000000000216 L2 => L1 nested VM-Exit CPU 23/KVM-5046 [001] ..... 3410.005610: kvm_nested_vmexit_inject: reason: DR_ACCESS ext_inf1: 0x0000000000000216 CPU 23/KVM-5046 [001] d.... 3410.005610: kvm_entry: vcpu 23 CPU 23/KVM-5046 [001] d.... 3410.005611: kvm_exit: vcpu 23 reason VMREAD CPU 23/KVM-5046 [001] d.... 3410.005611: kvm_entry: vcpu 23 CPU 23/KVM-5046 [001] d.... 3410.005612: kvm_exit: vcpu 23 reason VMREAD CPU 23/KVM-5046 [001] d.... 3410.005612: kvm_entry: vcpu 23 L1 writes DR7, L0 disables DR interception CPU 23/KVM-5046 [001] d.... 3410.005612: kvm_exit: vcpu 23 reason DR_ACCESS info1 0x0000000000000007 CPU 23/KVM-5046 [001] d.... 3410.005613: kvm_entry: vcpu 23 L0 writes DR6 = 0 (arch.dr6) CPU 23/KVM-5046 [001] d.... 3410.005613: : Set DRs, DR6 = 0xffff0ff0 A: B: CPU 23/KVM-5046 [001] d.... 3410.005614: kvm_exit: vcpu 23 reason PREEMPTION_TIMER CPU 23/KVM-5046 [001] d.... 3410.005614: kvm_entry: vcpu 23 C: L0 writes DR6 = 0 (arch.dr6) CPU 23/KVM-5046 [001] d.... 3410.005614: : Set DRs, DR6 = 0xffff0ff0 L1 => L2 nested VM-Enter CPU 23/KVM-5046 [001] d.... 3410.005616: kvm_exit: vcpu 23 reason VMRESUME L0 reads DR6, arch.dr6 = 0 Reported-by: John Stultz Closes: https://lkml.kernel.org/r/CANDhNCq5_F3HfFYABqFGCA1bPd_%2BxgNj-iDQhH4tDk%2Bwi8iZZg%40mail.gmail.com Fixes: 375e28ffc0cf ("KVM: X86: Set host DR6 only on VMX and for KVM_DEBUGREG_WONT_EXIT") Fixes: d67668e9dd76 ("KVM: x86, SVM: isolate vcpu->arch.dr6 from vmcb->save.dr6") Cc: stable@vger.kernel.org Cc: Jim Mattson Tested-by: John Stultz Link: https://lore.kernel.org/r/20250125011833.3644371-1-seanjc@google.com Signed-off-by: Sean Christopherson [jth: Handled conflicts with kvm_x86_ops reshuffle] Signed-off-by: James Houghton Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/svm.c | 13 ++++++------- arch/x86/kvm/vmx/vmx.c | 11 +++++++---- arch/x86/kvm/x86.c | 3 +++ 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 9b419f0de713..e59ded976166 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -48,6 +48,7 @@ KVM_X86_OP(set_idt) KVM_X86_OP(get_gdt) KVM_X86_OP(set_gdt) KVM_X86_OP(sync_dirty_debug_regs) +KVM_X86_OP(set_dr6) KVM_X86_OP(set_dr7) KVM_X86_OP(cache_reg) KVM_X86_OP(get_rflags) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 39672561c6be..5dfb8cc9616e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1595,6 +1595,7 @@ struct kvm_x86_ops { void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); + void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1d06b8fc15a8..29c1be65cb71 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2014,11 +2014,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) svm->asid = sd->next_asid++; } -static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value) +static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) { - struct vmcb *vmcb = svm->vmcb; + struct vmcb *vmcb = to_svm(vcpu)->vmcb; - if (svm->vcpu.arch.guest_state_protected) + if (vcpu->arch.guest_state_protected) return; if (unlikely(value != vmcb->save.dr6)) { @@ -4220,10 +4220,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) * Run with all-zero DR6 unless needed, so that we can get the exact cause * of a #DB. */ - if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) - svm_set_dr6(svm, vcpu->arch.dr6); - else - svm_set_dr6(svm, DR6_ACTIVE_LOW); + if (likely(!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))) + svm_set_dr6(vcpu, DR6_ACTIVE_LOW); clgi(); kvm_load_guest_xsave_state(vcpu); @@ -5002,6 +5000,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_idt = svm_set_idt, .get_gdt = svm_get_gdt, .set_gdt = svm_set_gdt, + .set_dr6 = svm_set_dr6, .set_dr7 = svm_set_dr7, .sync_dirty_debug_regs = svm_sync_dirty_debug_regs, .cache_reg = svm_cache_reg, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 52098844290a..e5a2c230110e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5617,6 +5617,12 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) set_debugreg(DR6_RESERVED, 6); } +static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) +{ + lockdep_assert_irqs_disabled(); + set_debugreg(vcpu->arch.dr6, 6); +} + static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) { vmcs_writel(GUEST_DR7, val); @@ -7356,10 +7362,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->host_state.cr4 = cr4; } - /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */ - if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) - set_debugreg(vcpu->arch.dr6, 6); - /* When single-stepping over STI and MOV SS, we must clear the * corresponding interruptibility bits in the guest state. Otherwise * vmentry fails as it then expects bit 14 (BS) in pending debug @@ -8292,6 +8294,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .set_idt = vmx_set_idt, .get_gdt = vmx_get_gdt, .set_gdt = vmx_set_gdt, + .set_dr6 = vmx_set_dr6, .set_dr7 = vmx_set_dr7, .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, .cache_reg = vmx_cache_reg, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f67fe8a65820..1eeb01afa40b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10772,6 +10772,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(vcpu->arch.eff_db[1], 1); set_debugreg(vcpu->arch.eff_db[2], 2); set_debugreg(vcpu->arch.eff_db[3], 3); + /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */ + if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) + static_call(kvm_x86_set_dr6)(vcpu, vcpu->arch.dr6); } else if (unlikely(hw_breakpoint_active())) { set_debugreg(0, 7); } From a6dbcf69d4c4bd101a2e947ee235ef074bcef0cd Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 28 Feb 2025 15:46:30 +0000 Subject: [PATCH 046/129] iommu: Handle race with default domain setup commit b46064a18810bad3aea089a79993ca5ea7a3d2b2 upstream. It turns out that deferred default domain creation leaves a subtle race window during iommu_device_register() wherein a client driver may asynchronously probe in parallel and get as far as performing DMA API operations with dma-direct, only to be switched to iommu-dma underfoot once the default domain attachment finally happens, with obviously disastrous consequences. Even the wonky of_iommu_configure() path is at risk, since iommu_fwspec_init() will no longer defer client probe as the instance ops are (necessarily) already registered, and the "replay" iommu_probe_device() call can see dev->iommu_group already set and so think there's nothing to do either. Fortunately we already have the right tool in the right place in the form of iommu_device_use_default_domain(), which just needs to ensure that said default domain is actually ready to *be* used. Deferring the client probe shouldn't have too much impact, given that this only happens while the IOMMU driver is probing, and thus due to kick the deferred probe list again once it finishes. Reported-by: Charan Teja Kalla Fixes: 98ac73f99bc4 ("iommu: Require a default_domain for all iommu drivers") Reviewed-by: Jason Gunthorpe Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/e88b94c9b575034a2c98a48b3d383654cbda7902.1740753261.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman Signed-off-by: Robin Murphy --- drivers/iommu/iommu.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3f1029c0825e..f2b3a4e2e54f 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -566,6 +566,18 @@ int iommu_probe_device(struct device *dev) mutex_lock(&iommu_probe_device_lock); ret = __iommu_probe_device(dev, NULL); mutex_unlock(&iommu_probe_device_lock); + + /* + * The dma_configure replay paths need bus_iommu_probe() to + * finish before they can call arch_setup_dma_ops() + */ + if (IS_ENABLED(CONFIG_IOMMU_DMA) && !ret && dev->iommu_group) { + mutex_lock(&dev->iommu_group->mutex); + if (!dev->iommu_group->default_domain && + !dev_iommu_ops(dev)->set_platform_dma_ops) + ret = -EPROBE_DEFER; + mutex_unlock(&dev->iommu_group->mutex); + } if (ret) return ret; @@ -3149,6 +3161,12 @@ int iommu_device_use_default_domain(struct device *dev) return 0; mutex_lock(&group->mutex); + /* We may race against bus_iommu_probe() finalising groups here */ + if (IS_ENABLED(CONFIG_IOMMU_DMA) && !group->default_domain && + !dev_iommu_ops(dev)->set_platform_dma_ops) { + ret = -EPROBE_DEFER; + goto unlock_out; + } if (group->owner_cnt) { if (group->owner || !iommu_is_default_domain(group) || !xa_empty(&group->pasid_array)) { From 8890eeae5c82e2de5f06240bad4c9bf1ce0ca641 Mon Sep 17 00:00:00 2001 From: Sheetal Date: Fri, 4 Apr 2025 10:59:53 +0000 Subject: [PATCH 047/129] ASoC: soc-pcm: Fix hw_params() and DAPM widget sequence [ Upstream commit 9aff2e8df240e84a36f2607f98a0a9924a24e65d ] Issue: When multiple audio streams share a common BE DAI, the BE DAI widget can be powered up before its hardware parameters are configured. This incorrect sequence leads to intermittent pcm_write errors. For example, the below Tegra use-case throws an error: aplay(2 streams) -> AMX(mux) -> ADX(demux) -> arecord(2 streams), here, 'AMX TX' and 'ADX RX' are common BE DAIs. For above usecase when failure happens below sequence is observed: aplay(1) FE open() - BE DAI callbacks added to the list - BE DAI state = SND_SOC_DPCM_STATE_OPEN aplay(2) FE open() - BE DAI callbacks are not added to the list as the state is already SND_SOC_DPCM_STATE_OPEN during aplay(1) FE open(). aplay(2) FE hw_params() - BE DAI hw_params() callback ignored aplay(2) FE prepare() - Widget is powered ON without BE DAI hw_params() call aplay(1) FE hw_params() - BE DAI hw_params() is now called Fix: Add BE DAIs in the list if its state is either SND_SOC_DPCM_STATE_OPEN or SND_SOC_DPCM_STATE_HW_PARAMS as well. It ensures the widget is powered ON after BE DAI hw_params() callback. Fixes: 0c25db3f7621 ("ASoC: soc-pcm: Don't reconnect an already active BE") Signed-off-by: Sheetal Link: https://patch.msgid.link/20250404105953.2784819-1-sheetal@nvidia.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-pcm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 60248a6820aa..30e93f9aad76 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1534,10 +1534,13 @@ static int dpcm_add_paths(struct snd_soc_pcm_runtime *fe, int stream, /* * Filter for systems with 'component_chaining' enabled. * This helps to avoid unnecessary re-configuration of an - * already active BE on such systems. + * already active BE on such systems and ensures the BE DAI + * widget is powered ON after hw_params() BE DAI callback. */ if (fe->card->component_chaining && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_NEW) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_OPEN) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_PARAMS) && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_CLOSE)) continue; From 9a8d4d7072d4df108479b1adc4b0840e96f6f61d Mon Sep 17 00:00:00 2001 From: Donet Tom Date: Mon, 10 Mar 2025 07:44:10 -0500 Subject: [PATCH 048/129] book3s64/radix : Align section vmemmap start address to PAGE_SIZE [ Upstream commit 9cf7e13fecbab0894f6986fc6986ab2eba8de52e ] A vmemmap altmap is a device-provided region used to provide backing storage for struct pages. For each namespace, the altmap should belong to that same namespace. If the namespaces are created unaligned, there is a chance that the section vmemmap start address could also be unaligned. If the section vmemmap start address is unaligned, the altmap page allocated from the current namespace might be used by the previous namespace also. During the free operation, since the altmap is shared between two namespaces, the previous namespace may detect that the page does not belong to its altmap and incorrectly assume that the page is a normal page. It then attempts to free the normal page, which leads to a kernel crash. Kernel attempted to read user page (18) - exploit attempt? (uid: 0) BUG: Kernel NULL pointer dereference on read at 0x00000018 Faulting instruction address: 0xc000000000530c7c Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries CPU: 32 PID: 2104 Comm: ndctl Kdump: loaded Tainted: G W NIP: c000000000530c7c LR: c000000000530e00 CTR: 0000000000007ffe REGS: c000000015e57040 TRAP: 0300 Tainted: G W MSR: 800000000280b033 CR: 84482404 CFAR: c000000000530dfc DAR: 0000000000000018 DSISR: 40000000 IRQMASK: 0 GPR00: c000000000530e00 c000000015e572e0 c000000002c5cb00 c00c000101008040 GPR04: 0000000000000000 0000000000000007 0000000000000001 000000000000001f GPR08: 0000000000000005 0000000000000000 0000000000000018 0000000000002000 GPR12: c0000000001d2fb0 c0000060de6b0080 0000000000000000 c0000060dbf90020 GPR16: c00c000101008000 0000000000000001 0000000000000000 c000000125b20f00 GPR20: 0000000000000001 0000000000000000 ffffffffffffffff c00c000101007fff GPR24: 0000000000000001 0000000000000000 0000000000000000 0000000000000000 GPR28: 0000000004040201 0000000000000001 0000000000000000 c00c000101008040 NIP [c000000000530c7c] get_pfnblock_flags_mask+0x7c/0xd0 LR [c000000000530e00] free_unref_page_prepare+0x130/0x4f0 Call Trace: free_unref_page+0x50/0x1e0 free_reserved_page+0x40/0x68 free_vmemmap_pages+0x98/0xe0 remove_pte_table+0x164/0x1e8 remove_pmd_table+0x204/0x2c8 remove_pud_table+0x1c4/0x288 remove_pagetable+0x1c8/0x310 vmemmap_free+0x24/0x50 section_deactivate+0x28c/0x2a0 __remove_pages+0x84/0x110 arch_remove_memory+0x38/0x60 memunmap_pages+0x18c/0x3d0 devm_action_release+0x30/0x50 release_nodes+0x68/0x140 devres_release_group+0x100/0x190 dax_pmem_compat_release+0x44/0x80 [dax_pmem_compat] device_for_each_child+0x8c/0x100 [dax_pmem_compat_remove+0x2c/0x50 [dax_pmem_compat] nvdimm_bus_remove+0x78/0x140 [libnvdimm] device_remove+0x70/0xd0 Another issue is that if there is no altmap, a PMD-sized vmemmap page will be allocated from RAM, regardless of the alignment of the section start address. If the section start address is not aligned to the PMD size, a VM_BUG_ON will be triggered when setting the PMD-sized page to page table. In this patch, we are aligning the section vmemmap start address to PAGE_SIZE. After alignment, the start address will not be part of the current namespace, and a normal page will be allocated for the vmemmap mapping of the current section. For the remaining sections, altmaps will be allocated. During the free operation, the normal page will be correctly freed. In the same way, a PMD_SIZE vmemmap page will be allocated only if the section start address is PMD_SIZE-aligned; otherwise, it will fall back to a PAGE-sized vmemmap allocation. Without this patch ================== NS1 start NS2 start _________________________________________________________ | NS1 | NS2 | --------------------------------------------------------- | Altmap| Altmap | .....|Altmap| Altmap | ........... | NS1 | NS1 | | NS2 | NS2 | In the above scenario, NS1 and NS2 are two namespaces. The vmemmap for NS1 comes from Altmap NS1, which belongs to NS1, and the vmemmap for NS2 comes from Altmap NS2, which belongs to NS2. The vmemmap start for NS2 is not aligned, so Altmap NS2 is shared by both NS1 and NS2. During the free operation in NS1, Altmap NS2 is not part of NS1's altmap, causing it to attempt to free an invalid page. With this patch =============== NS1 start NS2 start _________________________________________________________ | NS1 | NS2 | --------------------------------------------------------- | Altmap| Altmap | .....| Normal | Altmap | Altmap |....... | NS1 | NS1 | | Page | NS2 | NS2 | If the vmemmap start for NS2 is not aligned then we are allocating a normal page. NS1 and NS2 vmemmap will be freed correctly. Fixes: 368a0590d954 ("powerpc/book3s64/vmemmap: switch radix to use a different vmemmap handling function") Co-developed-by: Ritesh Harjani (IBM) Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Donet Tom Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/8f98ec2b442977c618f7256cec88eb17dde3f2b9.1741609795.git.donettom@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/mm/book3s64/radix_pgtable.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index c6a4ac766b2b..28460e334080 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1056,6 +1056,19 @@ int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, in pmd_t *pmd; pte_t *pte; + /* + * Make sure we align the start vmemmap addr so that we calculate + * the correct start_pfn in altmap boundary check to decided whether + * we should use altmap or RAM based backing memory allocation. Also + * the address need to be aligned for set_pte operation. + + * If the start addr is already PMD_SIZE aligned we will try to use + * a pmd mapping. We don't want to be too aggressive here beacause + * that will cause more allocations in RAM. So only if the namespace + * vmemmap start addr is PMD_SIZE aligned we will use PMD mapping. + */ + + start = ALIGN_DOWN(start, PAGE_SIZE); for (addr = start; addr < end; addr = next) { next = pmd_addr_end(addr, end); @@ -1081,8 +1094,8 @@ int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, in * in altmap block allocation failures, in which case * we fallback to RAM for vmemmap allocation. */ - if (altmap && (!IS_ALIGNED(addr, PMD_SIZE) || - altmap_cross_boundary(altmap, addr, PMD_SIZE))) { + if (!IS_ALIGNED(addr, PMD_SIZE) || (altmap && + altmap_cross_boundary(altmap, addr, PMD_SIZE))) { /* * make sure we don't create altmap mappings * covering things outside the device. From 27a1efe21f9e4fa6016190c71a2a38d532a847d6 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Tue, 1 Apr 2025 06:12:18 +0530 Subject: [PATCH 049/129] powerpc/boot: Check for ld-option support [ Upstream commit b2accfe7ca5bc9f9af28e603b79bdd5ad8df5c0b ] Commit 579aee9fc594 ("powerpc: suppress some linker warnings in recent linker versions") enabled support to add linker option "--no-warn-rwx-segments", if the version is greater than 2.39. Similar build warning were reported recently from linker version 2.35.2. ld: warning: arch/powerpc/boot/zImage.epapr has a LOAD segment with RWX permissions ld: warning: arch/powerpc/boot/zImage.pseries has a LOAD segment with RWX permissions Fix the warning by checking for "--no-warn-rwx-segments" option support in linker to enable it, instead of checking for the version range. Fixes: 579aee9fc594 ("powerpc: suppress some linker warnings in recent linker versions") Reported-by: Venkat Rao Bagalkote Suggested-by: Christophe Leroy Tested-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/linuxppc-dev/61cf556c-4947-4bd6-af63-892fc0966dad@linux.ibm.com/ Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250401004218.24869-1-maddy@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/boot/wrapper | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 352d7de24018..fea9694f1047 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -234,10 +234,8 @@ fi # suppress some warnings in recent ld versions nowarn="-z noexecstack" -if ! ld_is_lld; then - if [ "$LD_VERSION" -ge "$(echo 2.39 | ld_version)" ]; then - nowarn="$nowarn --no-warn-rwx-segments" - fi +if [ $(${CROSS}ld -v --no-warn-rwx-segments &>/dev/null; echo $?) -eq 0 ]; then + nowarn="$nowarn --no-warn-rwx-segments" fi platformo=$object/"$platform".o From c5a5de348c05faa4d5bbfb43c9123e164c48be15 Mon Sep 17 00:00:00 2001 From: Chen Linxuan Date: Tue, 15 Apr 2025 12:06:16 +0300 Subject: [PATCH 050/129] drm/i915/pxp: fix undefined reference to `intel_pxp_gsccs_is_ready_for_sessions' [ Upstream commit 7e21ea8149a0e41c3666ee52cc063a6f797a7a2a ] On x86_64 with gcc version 13.3.0, I compile kernel with: make defconfig ./scripts/kconfig/merge_config.sh .config <( echo CONFIG_COMPILE_TEST=y ) make KCFLAGS="-fno-inline-functions -fno-inline-small-functions -fno-inline-functions-called-once" Then I get a linker error: ld: vmlinux.o: in function `pxp_fw_dependencies_completed': kintel_pxp.c:(.text+0x95728f): undefined reference to `intel_pxp_gsccs_is_ready_for_sessions' This is caused by not having a intel_pxp_gsccs_is_ready_for_sessions() header stub for CONFIG_DRM_I915_PXP=n. Add it. Signed-off-by: Chen Linxuan Fixes: 99afb7cc8c44 ("drm/i915/pxp: Add ARB session creation and cleanup") Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20250415090616.2649889-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit b484c1e225a6a582fc78c4d7af7b286408bb7d41) Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h index 298ad38e6c7d..c36d956b9b82 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h @@ -25,6 +25,7 @@ int intel_pxp_gsccs_init(struct intel_pxp *pxp); int intel_pxp_gsccs_create_session(struct intel_pxp *pxp, int arb_session_id); void intel_pxp_gsccs_end_arb_fw_session(struct intel_pxp *pxp, u32 arb_session_id); +bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); #else static inline void intel_pxp_gsccs_fini(struct intel_pxp *pxp) @@ -36,8 +37,11 @@ static inline int intel_pxp_gsccs_init(struct intel_pxp *pxp) return 0; } +static inline bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp) +{ + return false; +} + #endif -bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); - #endif /*__INTEL_PXP_GSCCS_H__ */ From 36a9a2647810e57e704dde59abdf831380ca9102 Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Fri, 21 Mar 2025 21:52:25 +0300 Subject: [PATCH 051/129] wifi: plfxlc: Remove erroneous assert in plfxlc_mac_release [ Upstream commit 0fb15ae3b0a9221be01715dac0335647c79f3362 ] plfxlc_mac_release() asserts that mac->lock is held. This assertion is incorrect, because even if it was possible, it would not be the valid behaviour. The function is used when probe fails or after the device is disconnected. In both cases mac->lock can not be held as the driver is not working with the device at the moment. All functions that use mac->lock unlock it just after it was held. There is also no need to hold mac->lock for plfxlc_mac_release() itself, as mac data is not affected, except for mac->flags, which is modified atomically. This bug leads to the following warning: ================================================================ WARNING: CPU: 0 PID: 127 at drivers/net/wireless/purelifi/plfxlc/mac.c:106 plfxlc_mac_release+0x7d/0xa0 Modules linked in: CPU: 0 PID: 127 Comm: kworker/0:2 Not tainted 6.1.124-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Workqueue: usb_hub_wq hub_event RIP: 0010:plfxlc_mac_release+0x7d/0xa0 drivers/net/wireless/purelifi/plfxlc/mac.c:106 Call Trace: probe+0x941/0xbd0 drivers/net/wireless/purelifi/plfxlc/usb.c:694 usb_probe_interface+0x5c0/0xaf0 drivers/usb/core/driver.c:396 really_probe+0x2ab/0xcb0 drivers/base/dd.c:639 __driver_probe_device+0x1a2/0x3d0 drivers/base/dd.c:785 driver_probe_device+0x50/0x420 drivers/base/dd.c:815 __device_attach_driver+0x2cf/0x510 drivers/base/dd.c:943 bus_for_each_drv+0x183/0x200 drivers/base/bus.c:429 __device_attach+0x359/0x570 drivers/base/dd.c:1015 bus_probe_device+0xba/0x1e0 drivers/base/bus.c:489 device_add+0xb48/0xfd0 drivers/base/core.c:3696 usb_set_configuration+0x19dd/0x2020 drivers/usb/core/message.c:2165 usb_generic_driver_probe+0x84/0x140 drivers/usb/core/generic.c:238 usb_probe_device+0x130/0x260 drivers/usb/core/driver.c:293 really_probe+0x2ab/0xcb0 drivers/base/dd.c:639 __driver_probe_device+0x1a2/0x3d0 drivers/base/dd.c:785 driver_probe_device+0x50/0x420 drivers/base/dd.c:815 __device_attach_driver+0x2cf/0x510 drivers/base/dd.c:943 bus_for_each_drv+0x183/0x200 drivers/base/bus.c:429 __device_attach+0x359/0x570 drivers/base/dd.c:1015 bus_probe_device+0xba/0x1e0 drivers/base/bus.c:489 device_add+0xb48/0xfd0 drivers/base/core.c:3696 usb_new_device+0xbdd/0x18f0 drivers/usb/core/hub.c:2620 hub_port_connect drivers/usb/core/hub.c:5477 [inline] hub_port_connect_change drivers/usb/core/hub.c:5617 [inline] port_event drivers/usb/core/hub.c:5773 [inline] hub_event+0x2efe/0x5730 drivers/usb/core/hub.c:5855 process_one_work+0x8a9/0x11d0 kernel/workqueue.c:2292 worker_thread+0xa47/0x1200 kernel/workqueue.c:2439 kthread+0x28d/0x320 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 ================================================================ Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 68d57a07bfe5 ("wireless: add plfxlc driver for pureLiFi X, XL, XC devices") Reported-by: syzbot+7d4f142f6c288de8abfe@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7d4f142f6c288de8abfe Signed-off-by: Murad Masimov Link: https://patch.msgid.link/20250321185226.71-2-m.masimov@mt-integration.ru Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/purelifi/plfxlc/mac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/purelifi/plfxlc/mac.c b/drivers/net/wireless/purelifi/plfxlc/mac.c index 506d2f31efb5..7ebc0df0944c 100644 --- a/drivers/net/wireless/purelifi/plfxlc/mac.c +++ b/drivers/net/wireless/purelifi/plfxlc/mac.c @@ -103,7 +103,6 @@ int plfxlc_mac_init_hw(struct ieee80211_hw *hw) void plfxlc_mac_release(struct plfxlc_mac *mac) { plfxlc_chip_release(&mac->chip); - lockdep_assert_held(&mac->lock); } int plfxlc_op_start(struct ieee80211_hw *hw) From 029f11349ec97c083a7ed97b3eeba590f91e329d Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 23 Apr 2025 13:51:54 +0530 Subject: [PATCH 052/129] powerpc/boot: Fix dash warning [ Upstream commit e3f506b78d921e48a00d005bea5c45ec36a99240 ] 'commit b2accfe7ca5b ("powerpc/boot: Check for ld-option support")' suppressed linker warnings, but the expressed used did not go well with POSIX shell (dash) resulting with this warning arch/powerpc/boot/wrapper: 237: [: 0: unexpected operator ld: warning: arch/powerpc/boot/zImage.epapr has a LOAD segment with RWX permissions Fix the check to handle the reported warning. Patch also fixes couple of shellcheck reported errors for the same line. In arch/powerpc/boot/wrapper line 237: if [ $(${CROSS}ld -v --no-warn-rwx-segments &>/dev/null; echo $?) -eq 0 ]; then ^-- SC2046 (warning): Quote this to prevent word splitting. ^------^ SC2086 (info): Double quote to prevent globbing and word splitting. ^---------^ SC3020 (warning): In POSIX sh, &> is undefined. Fixes: b2accfe7ca5b ("powerpc/boot: Check for ld-option support") Reported-by: Stephen Rothwell Suggested-by: Stephen Rothwell Tested-by: Venkat Rao Bagalkote Reviewed-by: Stephen Rothwell Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250423082154.30625-1-maddy@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/boot/wrapper | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index fea9694f1047..ddb02cf0caaf 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -234,7 +234,7 @@ fi # suppress some warnings in recent ld versions nowarn="-z noexecstack" -if [ $(${CROSS}ld -v --no-warn-rwx-segments &>/dev/null; echo $?) -eq 0 ]; then +if "${CROSS}ld" -v --no-warn-rwx-segments >/dev/null 2>&1; then nowarn="$nowarn --no-warn-rwx-segments" fi From 3576e9a80b6c4381b01ce0cbaa07f5e92d4492ed Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 23 Apr 2025 17:51:31 +0300 Subject: [PATCH 053/129] vxlan: vnifilter: Fix unlocked deletion of default FDB entry [ Upstream commit 087a9eb9e5978e3ba362e1163691e41097e8ca20 ] When a VNI is deleted from a VXLAN device in 'vnifilter' mode, the FDB entry associated with the default remote (assuming one was configured) is deleted without holding the hash lock. This is wrong and will result in a warning [1] being generated by the lockdep annotation that was added by commit ebe642067455 ("vxlan: Create wrappers for FDB lookup"). Reproducer: # ip link add vx0 up type vxlan dstport 4789 external vnifilter local 192.0.2.1 # bridge vni add vni 10010 remote 198.51.100.1 dev vx0 # bridge vni del vni 10010 dev vx0 Fix by acquiring the hash lock before the deletion and releasing it afterwards. Blame the original commit that introduced the issue rather than the one that exposed it. [1] WARNING: CPU: 3 PID: 392 at drivers/net/vxlan/vxlan_core.c:417 vxlan_find_mac+0x17f/0x1a0 [...] RIP: 0010:vxlan_find_mac+0x17f/0x1a0 [...] Call Trace: __vxlan_fdb_delete+0xbe/0x560 vxlan_vni_delete_group+0x2ba/0x940 vxlan_vni_del.isra.0+0x15f/0x580 vxlan_process_vni_filter+0x38b/0x7b0 vxlan_vnifilter_process+0x3bb/0x510 rtnetlink_rcv_msg+0x2f7/0xb70 netlink_rcv_skb+0x131/0x360 netlink_unicast+0x426/0x710 netlink_sendmsg+0x75a/0xc20 __sock_sendmsg+0xc1/0x150 ____sys_sendmsg+0x5aa/0x7b0 ___sys_sendmsg+0xfc/0x180 __sys_sendmsg+0x121/0x1b0 do_syscall_64+0xbb/0x1d0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fixes: f9c4bb0b245c ("vxlan: vni filtering support on collect metadata device") Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250423145131.513029-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/vxlan/vxlan_vnifilter.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c index 6e6e9f05509a..06d19e90eadb 100644 --- a/drivers/net/vxlan/vxlan_vnifilter.c +++ b/drivers/net/vxlan/vxlan_vnifilter.c @@ -627,7 +627,11 @@ static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, * default dst remote_ip previously added for this vni */ if (!vxlan_addr_any(&vninode->remote_ip) || - !vxlan_addr_any(&dst->remote_ip)) + !vxlan_addr_any(&dst->remote_ip)) { + u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, + vninode->vni); + + spin_lock_bh(&vxlan->hash_lock[hash_index]); __vxlan_fdb_delete(vxlan, all_zeros_mac, (vxlan_addr_any(&vninode->remote_ip) ? dst->remote_ip : vninode->remote_ip), @@ -635,6 +639,8 @@ static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, vninode->vni, vninode->vni, dst->remote_ifindex, true); + spin_unlock_bh(&vxlan->hash_lock[hash_index]); + } if (vxlan->dev->flags & IFF_UP) { if (vxlan_addr_multicast(&vninode->remote_ip) && From bab395c8912766c8ebd107b2887d2dae8c8a92fb Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 23 Apr 2025 11:36:08 +0300 Subject: [PATCH 054/129] net/mlx5: E-Switch, Initialize MAC Address for Default GID [ Upstream commit 5d1a04f347e6cbf5ffe74da409a5d71fbe8c5f19 ] Initialize the source MAC address when creating the default GID entry. Since this entry is used only for loopback traffic, it only needs to be a unicast address. A zeroed-out MAC address is sufficient for this purpose. Without this fix, random bits would be assigned as the source address. If these bits formed a multicast address, the firmware would return an error, preventing the user from switching to switchdev mode: Error: mlx5_core: Failed setting eswitch to offloads. kernel answers: Invalid argument Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic") Signed-off-by: Maor Gottlieb Signed-off-by: Mark Bloch Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250423083611.324567-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index a42f6cd99b74..f585ef5a3424 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -118,8 +118,8 @@ static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid * static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev) { + u8 mac[ETH_ALEN] = {}; union ib_gid gid; - u8 mac[ETH_ALEN]; mlx5_rdma_make_default_gid(dev, &gid); return mlx5_core_roce_gid_set(dev, 0, From dd20a33a0db001bf975eedb06fa19a5ff9b27891 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Wed, 23 Apr 2025 11:36:11 +0300 Subject: [PATCH 055/129] net/mlx5: E-switch, Fix error handling for enabling roce [ Upstream commit 90538d23278a981e344d364e923162fce752afeb ] The cited commit assumes enabling roce always succeeds. But it is not true. Add error handling for it. Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Reviewed-by: Maor Gottlieb Signed-off-by: Mark Bloch Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250423083611.324567-6-mbloch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 5 ++++- drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 9 +++++---- drivers/net/ethernet/mellanox/mlx5/core/rdma.h | 4 ++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7eba3a5bb97c..326c72b3df86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3499,7 +3499,9 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) int err; mutex_init(&esw->offloads.termtbl_mutex); - mlx5_rdma_enable_roce(esw->dev); + err = mlx5_rdma_enable_roce(esw->dev); + if (err) + goto err_roce; err = mlx5_esw_host_number_init(esw); if (err) @@ -3560,6 +3562,7 @@ err_vport_metadata: esw_offloads_metadata_uninit(esw); err_metadata: mlx5_rdma_disable_roce(esw->dev); +err_roce: mutex_destroy(&esw->offloads.termtbl_mutex); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index f585ef5a3424..5c552b71e371 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -140,17 +140,17 @@ void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) mlx5_nic_vport_disable_roce(dev); } -void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { int err; if (!MLX5_CAP_GEN(dev, roce)) - return; + return 0; err = mlx5_nic_vport_enable_roce(dev); if (err) { mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err); - return; + return err; } err = mlx5_rdma_add_roce_addr(dev); @@ -165,10 +165,11 @@ void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) goto del_roce_addr; } - return; + return err; del_roce_addr: mlx5_rdma_del_roce_addr(dev); disable_roce: mlx5_nic_vport_disable_roce(dev); + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h index 750cff2a71a4..3d9e76c3d42f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h @@ -8,12 +8,12 @@ #ifdef CONFIG_MLX5_ESWITCH -void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev); +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev); void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ -static inline void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) {} +static inline int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {} #endif /* CONFIG_MLX5_ESWITCH */ From 2e8d44ebaa7babdd5c5ab50ca275826e241920d6 Mon Sep 17 00:00:00 2001 From: En-Wei Wu Date: Mon, 21 Apr 2025 21:00:37 +0800 Subject: [PATCH 056/129] Bluetooth: btusb: avoid NULL pointer dereference in skb_dequeue() [ Upstream commit 0317b033abcd1d8dd2798f0e2de5e84543d0bd22 ] A NULL pointer dereference can occur in skb_dequeue() when processing a QCA firmware crash dump on WCN7851 (0489:e0f3). [ 93.672166] Bluetooth: hci0: ACL memdump size(589824) [ 93.672475] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 93.672517] Workqueue: hci0 hci_devcd_rx [bluetooth] [ 93.672598] RIP: 0010:skb_dequeue+0x50/0x80 The issue stems from handle_dump_pkt_qca() returning 0 even when a dump packet is successfully processed. This is because it incorrectly forwards the return value of hci_devcd_init() (which returns 0 on success). As a result, the caller (btusb_recv_acl_qca() or btusb_recv_evt_qca()) assumes the packet was not handled and passes it to hci_recv_frame(), leading to premature kfree() of the skb. Later, hci_devcd_rx() attempts to dequeue the same skb from the dump queue, resulting in a NULL pointer dereference. Fix this by: 1. Making handle_dump_pkt_qca() return 0 on success and negative errno on failure, consistent with kernel conventions. 2. Splitting dump packet detection into separate functions for ACL and event packets for better structure and readability. This ensures dump packets are properly identified and consumed, avoiding double handling and preventing NULL pointer access. Fixes: 20981ce2d5a5 ("Bluetooth: btusb: Add WCN6855 devcoredump support") Signed-off-by: En-Wei Wu Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/btusb.c | 101 +++++++++++++++++++++++++++----------- 1 file changed, 73 insertions(+), 28 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index bc3f63f1ccd8..d6195565ef7a 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3521,22 +3521,16 @@ static void btusb_coredump_qca(struct hci_dev *hdev) bt_dev_err(hdev, "%s: triggle crash failed (%d)", __func__, err); } -/* - * ==0: not a dump pkt. - * < 0: fails to handle a dump pkt - * > 0: otherwise. - */ +/* Return: 0 on success, negative errno on failure. */ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) { - int ret = 1; + int ret = 0; u8 pkt_type; u8 *sk_ptr; unsigned int sk_len; u16 seqno; u32 dump_size; - struct hci_event_hdr *event_hdr; - struct hci_acl_hdr *acl_hdr; struct qca_dump_hdr *dump_hdr; struct btusb_data *btdata = hci_get_drvdata(hdev); struct usb_device *udev = btdata->udev; @@ -3546,30 +3540,14 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) sk_len = skb->len; if (pkt_type == HCI_ACLDATA_PKT) { - acl_hdr = hci_acl_hdr(skb); - if (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE) - return 0; sk_ptr += HCI_ACL_HDR_SIZE; sk_len -= HCI_ACL_HDR_SIZE; - event_hdr = (struct hci_event_hdr *)sk_ptr; - } else { - event_hdr = hci_event_hdr(skb); } - if ((event_hdr->evt != HCI_VENDOR_PKT) - || (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) - return 0; - sk_ptr += HCI_EVENT_HDR_SIZE; sk_len -= HCI_EVENT_HDR_SIZE; dump_hdr = (struct qca_dump_hdr *)sk_ptr; - if ((sk_len < offsetof(struct qca_dump_hdr, data)) - || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) - || (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) - return 0; - - /*it is dump pkt now*/ seqno = le16_to_cpu(dump_hdr->seqno); if (seqno == 0) { set_bit(BTUSB_HW_SSR_ACTIVE, &btdata->flags); @@ -3643,17 +3621,84 @@ out: return ret; } +/* Return: true if the ACL packet is a dump packet, false otherwise. */ +static bool acl_pkt_is_dump_qca(struct hci_dev *hdev, struct sk_buff *skb) +{ + u8 *sk_ptr; + unsigned int sk_len; + + struct hci_event_hdr *event_hdr; + struct hci_acl_hdr *acl_hdr; + struct qca_dump_hdr *dump_hdr; + + sk_ptr = skb->data; + sk_len = skb->len; + + acl_hdr = hci_acl_hdr(skb); + if (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE) + return false; + + sk_ptr += HCI_ACL_HDR_SIZE; + sk_len -= HCI_ACL_HDR_SIZE; + event_hdr = (struct hci_event_hdr *)sk_ptr; + + if ((event_hdr->evt != HCI_VENDOR_PKT) || + (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) + return false; + + sk_ptr += HCI_EVENT_HDR_SIZE; + sk_len -= HCI_EVENT_HDR_SIZE; + + dump_hdr = (struct qca_dump_hdr *)sk_ptr; + if ((sk_len < offsetof(struct qca_dump_hdr, data)) || + (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || + (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) + return false; + + return true; +} + +/* Return: true if the event packet is a dump packet, false otherwise. */ +static bool evt_pkt_is_dump_qca(struct hci_dev *hdev, struct sk_buff *skb) +{ + u8 *sk_ptr; + unsigned int sk_len; + + struct hci_event_hdr *event_hdr; + struct qca_dump_hdr *dump_hdr; + + sk_ptr = skb->data; + sk_len = skb->len; + + event_hdr = hci_event_hdr(skb); + + if ((event_hdr->evt != HCI_VENDOR_PKT) + || (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) + return false; + + sk_ptr += HCI_EVENT_HDR_SIZE; + sk_len -= HCI_EVENT_HDR_SIZE; + + dump_hdr = (struct qca_dump_hdr *)sk_ptr; + if ((sk_len < offsetof(struct qca_dump_hdr, data)) || + (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || + (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) + return false; + + return true; +} + static int btusb_recv_acl_qca(struct hci_dev *hdev, struct sk_buff *skb) { - if (handle_dump_pkt_qca(hdev, skb)) - return 0; + if (acl_pkt_is_dump_qca(hdev, skb)) + return handle_dump_pkt_qca(hdev, skb); return hci_recv_frame(hdev, skb); } static int btusb_recv_evt_qca(struct hci_dev *hdev, struct sk_buff *skb) { - if (handle_dump_pkt_qca(hdev, skb)) - return 0; + if (evt_pkt_is_dump_qca(hdev, skb)) + return handle_dump_pkt_qca(hdev, skb); return hci_recv_frame(hdev, skb); } From 8dde02229b3c782102ee9ef629e93c0c4f3219b6 Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan Date: Thu, 9 May 2024 14:18:32 -0700 Subject: [PATCH 057/129] net: Rename mono_delivery_time to tstamp_type for scalabilty [ Upstream commit 4d25ca2d6801cfcf26f7f39c561611ba5be99bf8 ] mono_delivery_time was added to check if skb->tstamp has delivery time in mono clock base (i.e. EDT) otherwise skb->tstamp has timestamp in ingress and delivery_time at egress. Renaming the bitfield from mono_delivery_time to tstamp_type is for extensibilty for other timestamps such as userspace timestamp (i.e. SO_TXTIME) set via sock opts. As we are renaming the mono_delivery_time to tstamp_type, it makes sense to start assigning tstamp_type based on enum defined in this commit. Earlier we used bool arg flag to check if the tstamp is mono in function skb_set_delivery_time, Now the signature of the functions accepts tstamp_type to distinguish between mono and real time. Also skb_set_delivery_type_by_clockid is a new function which accepts clockid to determine the tstamp_type. In future tstamp_type:1 can be extended to support userspace timestamp by increasing the bitfield. Signed-off-by: Abhishek Chauhan Reviewed-by: Willem de Bruijn Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240509211834.3235191-2-quic_abchauha@quicinc.com Signed-off-by: Martin KaFai Lau Stable-dep-of: 3908feb1bd7f ("Bluetooth: L2CAP: copy RX timestamp to new fragments") Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 52 ++++++++++++++++------ include/net/inet_frag.h | 4 +- net/bridge/netfilter/nf_conntrack_bridge.c | 6 +-- net/core/dev.c | 2 +- net/core/filter.c | 10 ++--- net/ieee802154/6lowpan/reassembly.c | 2 +- net/ipv4/inet_fragment.c | 2 +- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_output.c | 9 ++-- net/ipv4/tcp_output.c | 14 +++--- net/ipv6/ip6_output.c | 6 +-- net/ipv6/netfilter.c | 6 +-- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/reassembly.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/sched/act_bpf.c | 4 +- net/sched/cls_bpf.c | 4 +- 17 files changed, 78 insertions(+), 51 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5f11f9873341..f7d392d849be 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -685,6 +685,11 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif +enum skb_tstamp_type { + SKB_CLOCK_REALTIME, + SKB_CLOCK_MONOTONIC, +}; + /** * DOC: Basic sk_buff geometry * @@ -804,10 +809,8 @@ typedef unsigned char *sk_buff_data_t; * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required - * @mono_delivery_time: When set, skb->tstamp has the - * delivery_time in mono clock base (i.e. EDT). Otherwise, the - * skb->tstamp has the (rcv) timestamp at ingress and - * delivery_time at egress. + * @tstamp_type: When set, skb->tstamp has the + * delivery_time clock base of skb->tstamp. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @alloc_cpu: CPU which did the skb allocation. @@ -935,7 +938,7 @@ struct sk_buff { /* private: */ __u8 __mono_tc_offset[0]; /* public: */ - __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */ + __u8 tstamp_type:1; /* See skb_tstamp_type */ #ifdef CONFIG_NET_XGRESS __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ __u8 tc_skip_classify:1; @@ -4189,7 +4192,7 @@ static inline void skb_get_new_timestampns(const struct sk_buff *skb, static inline void __net_timestamp(struct sk_buff *skb) { skb->tstamp = ktime_get_real(); - skb->mono_delivery_time = 0; + skb->tstamp_type = SKB_CLOCK_REALTIME; } static inline ktime_t net_timedelta(ktime_t t) @@ -4198,10 +4201,33 @@ static inline ktime_t net_timedelta(ktime_t t) } static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, - bool mono) + u8 tstamp_type) { skb->tstamp = kt; - skb->mono_delivery_time = kt && mono; + + if (kt) + skb->tstamp_type = tstamp_type; + else + skb->tstamp_type = SKB_CLOCK_REALTIME; +} + +static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb, + ktime_t kt, clockid_t clockid) +{ + u8 tstamp_type = SKB_CLOCK_REALTIME; + + switch (clockid) { + case CLOCK_REALTIME: + break; + case CLOCK_MONOTONIC: + tstamp_type = SKB_CLOCK_MONOTONIC; + break; + default: + WARN_ON_ONCE(1); + kt = 0; + } + + skb_set_delivery_time(skb, kt, tstamp_type); } DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); @@ -4211,8 +4237,8 @@ DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); */ static inline void skb_clear_delivery_time(struct sk_buff *skb) { - if (skb->mono_delivery_time) { - skb->mono_delivery_time = 0; + if (skb->tstamp_type) { + skb->tstamp_type = SKB_CLOCK_REALTIME; if (static_branch_unlikely(&netstamp_needed_key)) skb->tstamp = ktime_get_real(); else @@ -4222,7 +4248,7 @@ static inline void skb_clear_delivery_time(struct sk_buff *skb) static inline void skb_clear_tstamp(struct sk_buff *skb) { - if (skb->mono_delivery_time) + if (skb->tstamp_type) return; skb->tstamp = 0; @@ -4230,7 +4256,7 @@ static inline void skb_clear_tstamp(struct sk_buff *skb) static inline ktime_t skb_tstamp(const struct sk_buff *skb) { - if (skb->mono_delivery_time) + if (skb->tstamp_type) return 0; return skb->tstamp; @@ -4238,7 +4264,7 @@ static inline ktime_t skb_tstamp(const struct sk_buff *skb) static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond) { - if (!skb->mono_delivery_time && skb->tstamp) + if (skb->tstamp_type != SKB_CLOCK_MONOTONIC && skb->tstamp) return skb->tstamp; if (static_branch_unlikely(&netstamp_needed_key) || cond) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 153960663ce4..5af6eb14c5db 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -76,7 +76,7 @@ struct frag_v6_compare_key { * @stamp: timestamp of the last received fragment * @len: total length of the original datagram * @meat: length of received fragments so far - * @mono_delivery_time: stamp has a mono delivery time (EDT) + * @tstamp_type: stamp has a mono delivery time (EDT) * @flags: fragment queue flags * @max_size: maximum received fragment size * @fqdir: pointer to struct fqdir @@ -97,7 +97,7 @@ struct inet_frag_queue { ktime_t stamp; int len; int meat; - u8 mono_delivery_time; + u8 tstamp_type; __u8 flags; u16 max_size; struct fqdir *fqdir; diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 6ef04f9fe481..4fbfbafdfa02 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -32,7 +32,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; - bool mono_delivery_time = skb->mono_delivery_time; + u8 tstamp_type = skb->tstamp_type; unsigned int hlen, ll_rs, mtu; ktime_t tstamp = skb->tstamp; struct ip_frag_state state; @@ -82,7 +82,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, if (iter.frag) ip_fraglist_prepare(skb, &iter); - skb_set_delivery_time(skb, tstamp, mono_delivery_time); + skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, data, skb); if (err || !iter.frag) break; @@ -113,7 +113,7 @@ slow_path: goto blackhole; } - skb_set_delivery_time(skb2, tstamp, mono_delivery_time); + skb_set_delivery_time(skb2, tstamp, tstamp_type); err = output(net, sk, data, skb2); if (err) goto blackhole; diff --git a/net/core/dev.c b/net/core/dev.c index c31a7f7bedf3..4006fd164b7b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2189,7 +2189,7 @@ EXPORT_SYMBOL(net_disable_timestamp); static inline void net_timestamp_set(struct sk_buff *skb) { skb->tstamp = 0; - skb->mono_delivery_time = 0; + skb->tstamp_type = SKB_CLOCK_REALTIME; if (static_branch_unlikely(&netstamp_needed_key)) skb->tstamp = ktime_get_real(); } diff --git a/net/core/filter.c b/net/core/filter.c index c4c27e03279c..066277b91a1b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7734,13 +7734,13 @@ BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb, if (!tstamp) return -EINVAL; skb->tstamp = tstamp; - skb->mono_delivery_time = 1; + skb->tstamp_type = SKB_CLOCK_MONOTONIC; break; case BPF_SKB_TSTAMP_UNSPEC: if (tstamp) return -EINVAL; skb->tstamp = 0; - skb->mono_delivery_time = 0; + skb->tstamp_type = SKB_CLOCK_REALTIME; break; default: return -EINVAL; @@ -9438,7 +9438,7 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog, TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK); *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg, TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2); - /* skb->tc_at_ingress && skb->mono_delivery_time, + /* skb->tc_at_ingress && skb->tstamp_type, * read 0 as the (rcv) timestamp. */ *insn++ = BPF_MOV64_IMM(value_reg, 0); @@ -9463,7 +9463,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, * the bpf prog is aware the tstamp could have delivery time. * Thus, write skb->tstamp as is if tstamp_type_access is true. * Otherwise, writing at ingress will have to clear the - * mono_delivery_time bit also. + * skb->tstamp_type bit also. */ if (!prog->tstamp_type_access) { __u8 tmp_reg = BPF_REG_AX; @@ -9473,7 +9473,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1); /* goto */ *insn++ = BPF_JMP_A(2); - /* : mono_delivery_time */ + /* : skb->tstamp_type */ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK); *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET); } diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 6dd960ec558c..ba0455ad7701 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -130,7 +130,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq, goto err; fq->q.stamp = skb->tstamp; - fq->q.mono_delivery_time = skb->mono_delivery_time; + fq->q.tstamp_type = skb->tstamp_type; if (frag_type == LOWPAN_DISPATCH_FRAG1) fq->q.flags |= INET_FRAG_FIRST_IN; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index c88c9034d630..496308c02384 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -619,7 +619,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, skb_mark_not_on_list(head); head->prev = NULL; head->tstamp = q->stamp; - head->mono_delivery_time = q->mono_delivery_time; + head->tstamp_type = q->tstamp_type; if (sk) refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 877d1e03150c..484edc8513e4 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -360,7 +360,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) qp->iif = dev->ifindex; qp->q.stamp = skb->tstamp; - qp->q.mono_delivery_time = skb->mono_delivery_time; + qp->q.tstamp_type = skb->tstamp_type; qp->q.meat += skb->len; qp->ecn |= ecn; add_frag_mem_limit(qp->q.fqdir, skb->truesize); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 765bd3f2a840..b8cfe6afc84b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -764,7 +764,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, { struct iphdr *iph; struct sk_buff *skb2; - bool mono_delivery_time = skb->mono_delivery_time; + u8 tstamp_type = skb->tstamp_type; struct rtable *rt = skb_rtable(skb); unsigned int mtu, hlen, ll_rs; struct ip_fraglist_iter iter; @@ -856,7 +856,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, } } - skb_set_delivery_time(skb, tstamp, mono_delivery_time); + skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, skb); if (!err) @@ -912,7 +912,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - skb_set_delivery_time(skb2, tstamp, mono_delivery_time); + skb_set_delivery_time(skb2, tstamp, tstamp_type); err = output(net, sk, skb2); if (err) goto fail; @@ -1648,7 +1648,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); nskb->ip_summed = CHECKSUM_NONE; - nskb->mono_delivery_time = !!transmit_time; + if (transmit_time) + nskb->tstamp_type = SKB_CLOCK_MONOTONIC; if (txhash) skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4); ip_push_pending_frames(sk, &fl4); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3771ed22c2f5..560273e7f773 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1266,7 +1266,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tp = tcp_sk(sk); prior_wstamp = tp->tcp_wstamp_ns; tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC); if (clone_it) { oskb = skb; @@ -1607,7 +1607,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, skb_split(skb, buff, len); - skb_set_delivery_time(buff, skb->tstamp, true); + skb_set_delivery_time(buff, skb->tstamp, SKB_CLOCK_MONOTONIC); tcp_fragment_tstamp(skb, buff); old_factor = tcp_skb_pcount(skb); @@ -2703,7 +2703,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { /* "skb_mstamp_ns" is used as a start point for the retransmit timer */ tp->tcp_wstamp_ns = tp->tcp_clock_cache; - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC); list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); tcp_init_tso_segs(skb, mss_now); goto repair; /* Skip network transmission */ @@ -3688,11 +3688,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_SYN_COOKIES if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok)) skb_set_delivery_time(skb, cookie_init_timestamp(req, now), - true); + SKB_CLOCK_MONOTONIC); else #endif { - skb_set_delivery_time(skb, now, true); + skb_set_delivery_time(skb, now, SKB_CLOCK_MONOTONIC); if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */ tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb); } @@ -3741,7 +3741,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb, synack_type, &opts); - skb_set_delivery_time(skb, now, true); + skb_set_delivery_time(skb, now, SKB_CLOCK_MONOTONIC); tcp_add_tx_delay(skb, tp); return skb; @@ -3923,7 +3923,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); - skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, true); + skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, SKB_CLOCK_MONOTONIC); /* Now full SYN+DATA was cloned and sent (or not), * remove the SYN from the original skb (syn_data) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cd89a2b35dfb..c86d5dca29df 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -864,7 +864,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; - bool mono_delivery_time = skb->mono_delivery_time; + u8 tstamp_type = skb->tstamp_type; struct ip6_frag_state state; unsigned int mtu, hlen, nexthdr_offset; ktime_t tstamp = skb->tstamp; @@ -958,7 +958,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (iter.frag) ip6_fraglist_prepare(skb, &iter); - skb_set_delivery_time(skb, tstamp, mono_delivery_time); + skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), @@ -1019,7 +1019,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - skb_set_delivery_time(frag, tstamp, mono_delivery_time); + skb_set_delivery_time(frag, tstamp, tstamp_type); err = output(net, sk, frag); if (err) goto fail; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 857713d7a38a..7c4af48d529e 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -126,7 +126,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; - bool mono_delivery_time = skb->mono_delivery_time; + u8 tstamp_type = skb->tstamp_type; ktime_t tstamp = skb->tstamp; struct ip6_frag_state state; u8 *prevhdr, nexthdr = 0; @@ -192,7 +192,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (iter.frag) ip6_fraglist_prepare(skb, &iter); - skb_set_delivery_time(skb, tstamp, mono_delivery_time); + skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, data, skb); if (err || !iter.frag) break; @@ -225,7 +225,7 @@ slow_path: goto blackhole; } - skb_set_delivery_time(skb2, tstamp, mono_delivery_time); + skb_set_delivery_time(skb2, tstamp, tstamp_type); err = output(net, sk, data, skb2); if (err) goto blackhole; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index c78b13ea5b19..82e51b2ec4f5 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -268,7 +268,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; - fq->q.mono_delivery_time = skb->mono_delivery_time; + fq->q.tstamp_type = skb->tstamp_type; fq->q.meat += skb->len; fq->ecn |= ecn; if (payload_len > fq->q.max_size) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 2af98edef87e..cb219d4bdf25 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -198,7 +198,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; - fq->q.mono_delivery_time = skb->mono_delivery_time; + fq->q.tstamp_type = skb->tstamp_type; fq->q.meat += skb->len; fq->ecn |= ecn; add_frag_mem_limit(fq->q.fqdir, skb->truesize); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f285e52b8b85..624ab1424eba 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -934,7 +934,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 mark = inet_twsk(sk)->tw_mark; else mark = READ_ONCE(sk->sk_mark); - skb_set_delivery_time(buff, tcp_transmit_time(sk), true); + skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); } if (txhash) { /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index b0455fda7d0b..ac87fcff4795 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -54,8 +54,8 @@ TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb, bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(filter, skb); } - if (unlikely(!skb->tstamp && skb->mono_delivery_time)) - skb->mono_delivery_time = 0; + if (unlikely(!skb->tstamp && skb->tstamp_type)) + skb->tstamp_type = SKB_CLOCK_REALTIME; if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK) skb_orphan(skb); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 382c7a71f81f..db7151c6b70b 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -104,8 +104,8 @@ TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb, bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(prog->filter, skb); } - if (unlikely(!skb->tstamp && skb->mono_delivery_time)) - skb->mono_delivery_time = 0; + if (unlikely(!skb->tstamp && skb->tstamp_type)) + skb->tstamp_type = SKB_CLOCK_REALTIME; if (prog->exts_integrated) { res->class = 0; From 8c7bfb6198a7f2489a149b81178f1cb3750c5d3a Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Thu, 24 Apr 2025 22:51:03 +0300 Subject: [PATCH 058/129] Bluetooth: L2CAP: copy RX timestamp to new fragments [ Upstream commit 3908feb1bd7f319a10e18d84369a48163264cc7d ] Copy timestamp too when allocating new skb for received fragment. Fixes missing RX timestamps with fragmentation. Fixes: 4d7ea8ee90e4 ("Bluetooth: L2CAP: Fix handling fragmented length") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/l2cap_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index d4dcdb2370cc..72ee41b894a5 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7386,6 +7386,9 @@ static int l2cap_recv_frag(struct l2cap_conn *conn, struct sk_buff *skb, return -ENOMEM; /* Init rx_len */ conn->rx_len = len; + + skb_set_delivery_time(conn->rx_skb, skb->tstamp, + skb->tstamp_type); } /* Copy as much as the rx_skb can hold */ From dc7ffe02adda25de57d3075fedabc4d1faabd726 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:07 +0300 Subject: [PATCH 059/129] net: mscc: ocelot: treat 802.1ad tagged traffic as 802.1Q-untagged [ Upstream commit 36dd1141be70b5966906919714dc504a24c65ddf ] I was revisiting the topic of 802.1ad treatment in the Ocelot switch [0] and realized that not only is its basic VLAN classification pipeline improper for offloading vlan_protocol 802.1ad bridges, but also improper for offloading regular 802.1Q bridges already. Namely, 802.1ad-tagged traffic should be treated as VLAN-untagged by bridged ports, but this switch treats it as if it was 802.1Q-tagged with the same VID as in the 802.1ad header. This is markedly different to what the Linux bridge expects; see the "other_tpid()" function in tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh. An idea came to me that the VCAP IS1 TCAM is more powerful than I'm giving it credit for, and that it actually overwrites the classified VID before the VLAN Table lookup takes place. In other words, it can be used even to save a packet from being dropped on ingress due to VLAN membership. Add a sophisticated TCAM rule hardcoded into the driver to force the switch to behave like a Linux bridge with vlan_filtering 1 vlan_protocol 802.1Q. Regarding the lifetime of the filter: eventually the bridge will disappear, and vlan_filtering on the port will be restored to 0 for standalone mode. Then the filter will be deleted. [0]: https://lore.kernel.org/netdev/20201009122947.nvhye4hvcha3tljh@skbuf/ Fixes: 7142529f1688 ("net: mscc: ocelot: add VLAN filtering") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller Stable-dep-of: 5ec6d7d737a4 ("net: mscc: ocelot: delete PVID VLAN when readding it as non-PVID") Signed-off-by: Sasha Levin --- drivers/net/ethernet/mscc/ocelot.c | 188 ++++++++++++++++++++++-- drivers/net/ethernet/mscc/ocelot_vcap.c | 1 + include/soc/mscc/ocelot_vcap.h | 2 + 3 files changed, 180 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index f6aa5d6b6597..d3e7aff5a794 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -453,9 +453,158 @@ static u16 ocelot_vlan_unaware_pvid(struct ocelot *ocelot, return VLAN_N_VID - bridge_num - 1; } +/** + * ocelot_update_vlan_reclassify_rule() - Make switch aware only to bridge VLAN TPID + * + * @ocelot: Switch private data structure + * @port: Index of ingress port + * + * IEEE 802.1Q-2018 clauses "5.5 C-VLAN component conformance" and "5.6 S-VLAN + * component conformance" suggest that a C-VLAN component should only recognize + * and filter on C-Tags, and an S-VLAN component should only recognize and + * process based on C-Tags. + * + * In Linux, as per commit 1a0b20b25732 ("Merge branch 'bridge-next'"), C-VLAN + * components are largely represented by a bridge with vlan_protocol 802.1Q, + * and S-VLAN components by a bridge with vlan_protocol 802.1ad. + * + * Currently the driver only offloads vlan_protocol 802.1Q, but the hardware + * design is non-conformant, because the switch assigns each frame to a VLAN + * based on an entirely different question, as detailed in figure "Basic VLAN + * Classification Flow" from its manual and reproduced below. + * + * Set TAG_TYPE, PCP, DEI, VID to port-default values in VLAN_CFG register + * if VLAN_AWARE_ENA[port] and frame has outer tag then: + * if VLAN_INNER_TAG_ENA[port] and frame has inner tag then: + * TAG_TYPE = (Frame.InnerTPID <> 0x8100) + * Set PCP, DEI, VID to values from inner VLAN header + * else: + * TAG_TYPE = (Frame.OuterTPID <> 0x8100) + * Set PCP, DEI, VID to values from outer VLAN header + * if VID == 0 then: + * VID = VLAN_CFG.VLAN_VID + * + * Summarized, the switch will recognize both 802.1Q and 802.1ad TPIDs as VLAN + * "with equal rights", and just set the TAG_TYPE bit to 0 (if 802.1Q) or to 1 + * (if 802.1ad). It will classify based on whichever of the tags is "outer", no + * matter what TPID that may have (or "inner", if VLAN_INNER_TAG_ENA[port]). + * + * In the VLAN Table, the TAG_TYPE information is not accessible - just the + * classified VID is - so it is as if each VLAN Table entry is for 2 VLANs: + * C-VLAN X, and S-VLAN X. + * + * Whereas the Linux bridge behavior is to only filter on frames with a TPID + * equal to the vlan_protocol, and treat everything else as VLAN-untagged. + * + * Consider an ingress packet tagged with 802.1ad VID=3 and 802.1Q VID=5, + * received on a bridge vlan_filtering=1 vlan_protocol=802.1Q port. This frame + * should be treated as 802.1Q-untagged, and classified to the PVID of that + * bridge port. Not to VID=3, and not to VID=5. + * + * The VCAP IS1 TCAM has everything we need to overwrite the choices made in + * the basic VLAN classification pipeline: it can match on TAG_TYPE in the key, + * and it can modify the classified VID in the action. Thus, for each port + * under a vlan_filtering bridge, we can insert a rule in VCAP IS1 lookup 0 to + * match on 802.1ad tagged frames and modify their classified VID to the 802.1Q + * PVID of the port. This effectively makes it appear to the outside world as + * if those packets were processed as VLAN-untagged. + * + * The rule needs to be updated each time the bridge PVID changes, and needs + * to be deleted if the bridge PVID is deleted, or if the port becomes + * VLAN-unaware. + */ +static int ocelot_update_vlan_reclassify_rule(struct ocelot *ocelot, int port) +{ + unsigned long cookie = OCELOT_VCAP_IS1_VLAN_RECLASSIFY(ocelot, port); + struct ocelot_vcap_block *block_vcap_is1 = &ocelot->block[VCAP_IS1]; + struct ocelot_port *ocelot_port = ocelot->ports[port]; + const struct ocelot_bridge_vlan *pvid_vlan; + struct ocelot_vcap_filter *filter; + int err, val, pcp, dei; + bool vid_replace_ena; + u16 vid; + + pvid_vlan = ocelot_port->pvid_vlan; + vid_replace_ena = ocelot_port->vlan_aware && pvid_vlan; + + filter = ocelot_vcap_block_find_filter_by_id(block_vcap_is1, cookie, + false); + if (!vid_replace_ena) { + /* If the reclassification filter doesn't need to exist, delete + * it if it was previously installed, and exit doing nothing + * otherwise. + */ + if (filter) + return ocelot_vcap_filter_del(ocelot, filter); + + return 0; + } + + /* The reclassification rule must apply. See if it already exists + * or if it must be created. + */ + + /* Treating as VLAN-untagged means using as classified VID equal to + * the bridge PVID, and PCP/DEI set to the port default QoS values. + */ + vid = pvid_vlan->vid; + val = ocelot_read_gix(ocelot, ANA_PORT_QOS_CFG, port); + pcp = ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL_X(val); + dei = !!(val & ANA_PORT_QOS_CFG_DP_DEFAULT_VAL); + + if (filter) { + bool changed = false; + + /* Filter exists, just update it */ + if (filter->action.vid != vid) { + filter->action.vid = vid; + changed = true; + } + if (filter->action.pcp != pcp) { + filter->action.pcp = pcp; + changed = true; + } + if (filter->action.dei != dei) { + filter->action.dei = dei; + changed = true; + } + + if (!changed) + return 0; + + return ocelot_vcap_filter_replace(ocelot, filter); + } + + /* Filter doesn't exist, create it */ + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + filter->key_type = OCELOT_VCAP_KEY_ANY; + filter->ingress_port_mask = BIT(port); + filter->vlan.tpid = OCELOT_VCAP_BIT_1; + filter->prio = 1; + filter->id.cookie = cookie; + filter->id.tc_offload = false; + filter->block_id = VCAP_IS1; + filter->type = OCELOT_VCAP_FILTER_OFFLOAD; + filter->lookup = 0; + filter->action.vid_replace_ena = true; + filter->action.pcp_dei_ena = true; + filter->action.vid = vid; + filter->action.pcp = pcp; + filter->action.dei = dei; + + err = ocelot_vcap_filter_add(ocelot, filter, NULL); + if (err) + kfree(filter); + + return err; +} + /* Default vlan to clasify for untagged frames (may be zero) */ -static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, - const struct ocelot_bridge_vlan *pvid_vlan) +static int ocelot_port_set_pvid(struct ocelot *ocelot, int port, + const struct ocelot_bridge_vlan *pvid_vlan) { struct ocelot_port *ocelot_port = ocelot->ports[port]; u16 pvid = ocelot_vlan_unaware_pvid(ocelot, ocelot_port->bridge); @@ -475,15 +624,23 @@ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, * happens automatically), but also 802.1p traffic which gets * classified to VLAN 0, but that is always in our RX filter, so it * would get accepted were it not for this setting. + * + * Also, we only support the bridge 802.1Q VLAN protocol, so + * 802.1ad-tagged frames (carrying S-Tags) should be considered + * 802.1Q-untagged, and also dropped. */ if (!pvid_vlan && ocelot_port->vlan_aware) val = ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | - ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA; + ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA | + ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA; ocelot_rmw_gix(ocelot, val, ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | - ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA, + ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA | + ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA, ANA_PORT_DROP_CFG, port); + + return ocelot_update_vlan_reclassify_rule(ocelot, port); } static struct ocelot_bridge_vlan *ocelot_bridge_vlan_find(struct ocelot *ocelot, @@ -631,7 +788,10 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, ANA_PORT_VLAN_CFG_VLAN_POP_CNT_M, ANA_PORT_VLAN_CFG, port); - ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan); + err = ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan); + if (err) + return err; + ocelot_port_manage_port_tag(ocelot, port); return 0; @@ -684,9 +844,12 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, return err; /* Default ingress vlan classification */ - if (pvid) - ocelot_port_set_pvid(ocelot, port, - ocelot_bridge_vlan_find(ocelot, vid)); + if (pvid) { + err = ocelot_port_set_pvid(ocelot, port, + ocelot_bridge_vlan_find(ocelot, vid)); + if (err) + return err; + } /* Untagged egress vlan clasification */ ocelot_port_manage_port_tag(ocelot, port); @@ -712,8 +875,11 @@ int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid) return err; /* Ingress */ - if (del_pvid) - ocelot_port_set_pvid(ocelot, port, NULL); + if (del_pvid) { + err = ocelot_port_set_pvid(ocelot, port, NULL); + if (err) + return err; + } /* Egress */ ocelot_port_manage_port_tag(ocelot, port); @@ -2607,7 +2773,7 @@ int ocelot_port_set_default_prio(struct ocelot *ocelot, int port, u8 prio) ANA_PORT_QOS_CFG, port); - return 0; + return ocelot_update_vlan_reclassify_rule(ocelot, port); } EXPORT_SYMBOL_GPL(ocelot_port_set_default_prio); diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index 73cdec5ca6a3..5734b86aed5b 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -695,6 +695,7 @@ static void is1_entry_set(struct ocelot *ocelot, int ix, vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_L2_MC, filter->dmac_mc); vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_L2_BC, filter->dmac_bc); vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_VLAN_TAGGED, tag->tagged); + vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_TPID, tag->tpid); vcap_key_set(vcap, &data, VCAP_IS1_HK_VID, tag->vid.value, tag->vid.mask); vcap_key_set(vcap, &data, VCAP_IS1_HK_PCP, diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index c601a4598b0d..eb19668a06db 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -13,6 +13,7 @@ */ #define OCELOT_VCAP_ES0_TAG_8021Q_RXVLAN(ocelot, port, upstream) ((upstream) << 16 | (port)) #define OCELOT_VCAP_IS1_TAG_8021Q_TXVLAN(ocelot, port) (port) +#define OCELOT_VCAP_IS1_VLAN_RECLASSIFY(ocelot, port) ((ocelot)->num_phys_ports + (port)) #define OCELOT_VCAP_IS2_TAG_8021Q_TXVLAN(ocelot, port) (port) #define OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, port) ((ocelot)->num_phys_ports + (port)) #define OCELOT_VCAP_IS2_MRP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2) @@ -499,6 +500,7 @@ struct ocelot_vcap_key_vlan { struct ocelot_vcap_u8 pcp; /* PCP (3 bit) */ enum ocelot_vcap_bit dei; /* DEI */ enum ocelot_vcap_bit tagged; /* Tagged/untagged frame */ + enum ocelot_vcap_bit tpid; }; struct ocelot_vcap_key_etype { From a74777bb18278bf70f6f15a863f34a4474504ae8 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 25 Apr 2025 01:37:33 +0300 Subject: [PATCH 060/129] net: mscc: ocelot: delete PVID VLAN when readding it as non-PVID [ Upstream commit 5ec6d7d737a491256cd37e33910f7ac1978db591 ] The following set of commands: ip link add br0 type bridge vlan_filtering 1 # vlan_default_pvid 1 is implicit ip link set swp0 master br0 bridge vlan add dev swp0 vid 1 should result in the dropping of untagged and 802.1p-tagged traffic, but we see that it continues to be accepted. Whereas, had we deleted VID 1 instead, the aforementioned dropping would have worked This is because the ANA_PORT_DROP_CFG update logic doesn't run, because ocelot_vlan_add() only calls ocelot_port_set_pvid() if the new VLAN has the BRIDGE_VLAN_INFO_PVID flag. Similar to other drivers like mt7530_port_vlan_add() which handle this case correctly, we need to test whether the VLAN we're changing used to have the BRIDGE_VLAN_INFO_PVID flag, but lost it now. That amounts to a PVID deletion and should be treated as such. Regarding blame attribution: this never worked properly since the introduction of bridge VLAN filtering in commit 7142529f1688 ("net: mscc: ocelot: add VLAN filtering"). However, there was a significant paradigm shift which aligned the ANA_PORT_DROP_CFG register with the PVID concept rather than with the native VLAN concept, and that change wasn't targeted for 'stable'. Realistically, that is as far as this fix needs to be propagated to. Fixes: be0576fed6d3 ("net: mscc: ocelot: move the logic to drop 802.1p traffic to the pvid deletion") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250424223734.3096202-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mscc/ocelot.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index d3e7aff5a794..252d8e6f18c3 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -830,6 +830,7 @@ EXPORT_SYMBOL(ocelot_vlan_prepare); int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, bool untagged) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; int err; /* Ignore VID 0 added to our RX filter by the 8021q module, since @@ -849,6 +850,11 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, ocelot_bridge_vlan_find(ocelot, vid)); if (err) return err; + } else if (ocelot_port->pvid_vlan && + ocelot_bridge_vlan_find(ocelot, vid) == ocelot_port->pvid_vlan) { + err = ocelot_port_set_pvid(ocelot, port, NULL); + if (err) + return err; } /* Untagged egress vlan clasification */ From 7cb10f17bddc415f30fbc00a4e2b490e0d94c462 Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Thu, 24 Apr 2025 10:38:48 +0200 Subject: [PATCH 061/129] net: ethernet: mtk-star-emac: fix spinlock recursion issues on rx/tx poll [ Upstream commit 6fe0866014486736cc3ba1c6fd4606d3dbe55c9c ] Use spin_lock_irqsave and spin_unlock_irqrestore instead of spin_lock and spin_unlock in mtk_star_emac driver to avoid spinlock recursion occurrence that can happen when enabling the DMA interrupts again in rx/tx poll. ``` BUG: spinlock recursion on CPU#0, swapper/0/0 lock: 0xffff00000db9cf20, .magic: dead4ead, .owner: swapper/0/0, .owner_cpu: 0 CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.15.0-rc2-next-20250417-00001-gf6a27738686c-dirty #28 PREEMPT Hardware name: MediaTek MT8365 Open Platform EVK (DT) Call trace: show_stack+0x18/0x24 (C) dump_stack_lvl+0x60/0x80 dump_stack+0x18/0x24 spin_dump+0x78/0x88 do_raw_spin_lock+0x11c/0x120 _raw_spin_lock+0x20/0x2c mtk_star_handle_irq+0xc0/0x22c [mtk_star_emac] __handle_irq_event_percpu+0x48/0x140 handle_irq_event+0x4c/0xb0 handle_fasteoi_irq+0xa0/0x1bc handle_irq_desc+0x34/0x58 generic_handle_domain_irq+0x1c/0x28 gic_handle_irq+0x4c/0x120 do_interrupt_handler+0x50/0x84 el1_interrupt+0x34/0x68 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x6c/0x70 regmap_mmio_read32le+0xc/0x20 (P) _regmap_bus_reg_read+0x6c/0xac _regmap_read+0x60/0xdc regmap_read+0x4c/0x80 mtk_star_rx_poll+0x2f4/0x39c [mtk_star_emac] __napi_poll+0x38/0x188 net_rx_action+0x164/0x2c0 handle_softirqs+0x100/0x244 __do_softirq+0x14/0x20 ____do_softirq+0x10/0x20 call_on_irq_stack+0x24/0x64 do_softirq_own_stack+0x1c/0x40 __irq_exit_rcu+0xd4/0x10c irq_exit_rcu+0x10/0x1c el1_interrupt+0x38/0x68 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x6c/0x70 cpuidle_enter_state+0xac/0x320 (P) cpuidle_enter+0x38/0x50 do_idle+0x1e4/0x260 cpu_startup_entry+0x34/0x3c rest_init+0xdc/0xe0 console_on_rootfs+0x0/0x6c __primary_switched+0x88/0x90 ``` Fixes: 0a8bd81fd6aa ("net: ethernet: mtk-star-emac: separate tx/rx handling with two NAPIs") Signed-off-by: Louis-Alexis Eyraud Reviewed-by: Maxime Chevallier Acked-by: Bartosz Golaszewski Link: https://patch.msgid.link/20250424-mtk_star_emac-fix-spinlock-recursion-issue-v2-1-f3fde2e529d8@collabora.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mediatek/mtk_star_emac.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 25989c79c92e..47a00e02365a 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1163,6 +1163,7 @@ static int mtk_star_tx_poll(struct napi_struct *napi, int budget) struct net_device *ndev = priv->ndev; unsigned int head = ring->head; unsigned int entry = ring->tail; + unsigned long flags; while (entry != head && count < (MTK_STAR_RING_NUM_DESCS - 1)) { ret = mtk_star_tx_complete_one(priv); @@ -1182,9 +1183,9 @@ static int mtk_star_tx_poll(struct napi_struct *napi, int budget) netif_wake_queue(ndev); if (napi_complete(napi)) { - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); mtk_star_enable_dma_irq(priv, false, true); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } return 0; @@ -1341,6 +1342,7 @@ push_new_skb: static int mtk_star_rx_poll(struct napi_struct *napi, int budget) { struct mtk_star_priv *priv; + unsigned long flags; int work_done = 0; priv = container_of(napi, struct mtk_star_priv, rx_napi); @@ -1348,9 +1350,9 @@ static int mtk_star_rx_poll(struct napi_struct *napi, int budget) work_done = mtk_star_rx(priv, budget); if (work_done < budget) { napi_complete_done(napi, work_done); - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); mtk_star_enable_dma_irq(priv, true, false); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } return work_done; From 5ba9380f79f288c490e655ddaa3fddffa4072463 Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Thu, 24 Apr 2025 10:38:49 +0200 Subject: [PATCH 062/129] net: ethernet: mtk-star-emac: rearm interrupts in rx_poll only when advised [ Upstream commit e54b4db35e201a9173da9cb7abc8377e12abaf87 ] In mtk_star_rx_poll function, on event processing completion, the mtk_star_emac driver calls napi_complete_done but ignores its return code and enable RX DMA interrupts inconditionally. This return code gives the info if a device should avoid rearming its interrupts or not, so fix this behaviour by taking it into account. Fixes: 8c7bd5a454ff ("net: ethernet: mtk-star-emac: new driver") Signed-off-by: Louis-Alexis Eyraud Acked-by: Bartosz Golaszewski Link: https://patch.msgid.link/20250424-mtk_star_emac-fix-spinlock-recursion-issue-v2-2-f3fde2e529d8@collabora.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mediatek/mtk_star_emac.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 47a00e02365a..c2ab87828d85 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1348,8 +1348,7 @@ static int mtk_star_rx_poll(struct napi_struct *napi, int budget) priv = container_of(napi, struct mtk_star_priv, rx_napi); work_done = mtk_star_rx(priv, budget); - if (work_done < budget) { - napi_complete_done(napi, work_done); + if (work_done < budget && napi_complete_done(napi, work_done)) { spin_lock_irqsave(&priv->lock, flags); mtk_star_enable_dma_irq(priv, true, false); spin_unlock_irqrestore(&priv->lock, flags); From 5e51c0b51b25e4a21d6b52e34cddde76ebe8dea2 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 14 Sep 2023 15:31:57 -0700 Subject: [PATCH 063/129] pds_core: check health in devcmd wait [ Upstream commit f7b5bd725b737de3f2c4a836e07c82ba156d75df ] Similar to what we do in the AdminQ, check for devcmd health while waiting for an answer. Signed-off-by: Shannon Nelson Reviewed-by: Brett Creeley Signed-off-by: David S. Miller Stable-dep-of: dfd76010f8e8 ("pds_core: remove write-after-free of client_id") Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/pds_core/dev.c | 11 +++++++++-- include/linux/pds/pds_core_if.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index f0e39ab40045..e65a1632df50 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -42,6 +42,8 @@ int pdsc_err_to_errno(enum pds_core_status_code code) return -ERANGE; case PDS_RC_BAD_ADDR: return -EFAULT; + case PDS_RC_BAD_PCI: + return -ENXIO; case PDS_RC_EOPCODE: case PDS_RC_EINTR: case PDS_RC_DEV_CMD: @@ -65,7 +67,7 @@ bool pdsc_is_fw_running(struct pdsc *pdsc) /* Firmware is useful only if the running bit is set and * fw_status != 0xff (bad PCI read) */ - return (pdsc->fw_status != 0xff) && + return (pdsc->fw_status != PDS_RC_BAD_PCI) && (pdsc->fw_status & PDS_CORE_FW_STS_F_RUNNING); } @@ -131,6 +133,7 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds) unsigned long max_wait; unsigned long duration; int timeout = 0; + bool running; int done = 0; int err = 0; int status; @@ -139,6 +142,10 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds) max_wait = start_time + (max_seconds * HZ); while (!done && !timeout) { + running = pdsc_is_fw_running(pdsc); + if (!running) + break; + done = pdsc_devcmd_done(pdsc); if (done) break; @@ -155,7 +162,7 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds) dev_dbg(dev, "DEVCMD %d %s after %ld secs\n", opcode, pdsc_devcmd_str(opcode), duration / HZ); - if (!done || timeout) { + if ((!done || timeout) && running) { dev_err(dev, "DEVCMD %d %s timeout, done %d timeout %d max_seconds=%d\n", opcode, pdsc_devcmd_str(opcode), done, timeout, max_seconds); diff --git a/include/linux/pds/pds_core_if.h b/include/linux/pds/pds_core_if.h index e838a2b90440..17a87c1a55d7 100644 --- a/include/linux/pds/pds_core_if.h +++ b/include/linux/pds/pds_core_if.h @@ -79,6 +79,7 @@ enum pds_core_status_code { PDS_RC_EVFID = 31, /* VF ID does not exist */ PDS_RC_BAD_FW = 32, /* FW file is invalid or corrupted */ PDS_RC_ECLIENT = 33, /* No such client id */ + PDS_RC_BAD_PCI = 255, /* Broken PCI when reading status */ }; /** From 452fa190dd2e4751cc7244c1e39565fb4a262b7d Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 16 Feb 2024 14:29:51 -0800 Subject: [PATCH 064/129] pds_core: delete VF dev on reset [ Upstream commit 2dac60e062340c1e5c975ad6465192d11c40d47a ] When the VF is hit with a reset, remove the aux device in the prepare for reset and try to restore it after the reset. The userland mechanics will need to recover and rebuild whatever uses the device afterwards. Reviewed-by: Brett Creeley Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller Stable-dep-of: dfd76010f8e8 ("pds_core: remove write-after-free of client_id") Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/pds_core/auxbus.c | 18 +++++++++++++++++- drivers/net/ethernet/amd/pds_core/main.c | 16 ++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c index fb7a5403e630..b76a9b7e0aed 100644 --- a/drivers/net/ethernet/amd/pds_core/auxbus.c +++ b/drivers/net/ethernet/amd/pds_core/auxbus.c @@ -177,6 +177,9 @@ int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf) struct pds_auxiliary_dev *padev; int err = 0; + if (!cf) + return -ENODEV; + mutex_lock(&pf->config_lock); padev = pf->vfs[cf->vf_id].padev; @@ -195,14 +198,27 @@ int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf) int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf) { struct pds_auxiliary_dev *padev; - enum pds_core_vif_types vt; char devname[PDS_DEVNAME_LEN]; + enum pds_core_vif_types vt; + unsigned long mask; u16 vt_support; int client_id; int err = 0; + if (!cf) + return -ENODEV; + mutex_lock(&pf->config_lock); + mask = BIT_ULL(PDSC_S_FW_DEAD) | + BIT_ULL(PDSC_S_STOPPING_DRIVER); + if (cf->state & mask) { + dev_err(pf->dev, "%s: can't add dev, VF client in bad state %#lx\n", + __func__, cf->state); + err = -ENXIO; + goto out_unlock; + } + /* We only support vDPA so far, so it is the only one to * be verified that it is available in the Core device and * enabled in the devlink param. In the future this might diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index eddbf0acdde7..346a69e95c88 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -475,6 +475,14 @@ static void pdsc_reset_prepare(struct pci_dev *pdev) pdsc_stop_health_thread(pdsc); pdsc_fw_down(pdsc); + if (pdev->is_virtfn) { + struct pdsc *pf; + + pf = pdsc_get_pf_struct(pdsc->pdev); + if (!IS_ERR(pf)) + pdsc_auxbus_dev_del(pdsc, pf); + } + pdsc_unmap_bars(pdsc); pci_release_regions(pdev); pci_disable_device(pdev); @@ -510,6 +518,14 @@ static void pdsc_reset_done(struct pci_dev *pdev) pdsc_fw_up(pdsc); pdsc_restart_health_thread(pdsc); + + if (pdev->is_virtfn) { + struct pdsc *pf; + + pf = pdsc_get_pf_struct(pdsc->pdev); + if (!IS_ERR(pf)) + pdsc_auxbus_dev_add(pdsc, pf); + } } static const struct pci_error_handlers pdsc_err_handler = { From da23d7edb2158bcc104e7ea91ac1f79a469a31b0 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 20 Mar 2025 12:44:07 -0700 Subject: [PATCH 065/129] pds_core: make pdsc_auxbus_dev_del() void [ Upstream commit e8562da829432d04a0de1830146984c89844f35e ] Since there really is no useful return, advertising a return value is rather misleading. Make pdsc_auxbus_dev_del() a void function. Link: https://patch.msgid.link/r/20250320194412.67983-2-shannon.nelson@amd.com Reviewed-by: Leon Romanovsky Reviewed-by: Jonathan Cameron Reviewed-by: Kalesh AP Reviewed-by: Dave Jiang Signed-off-by: Shannon Nelson Signed-off-by: Jason Gunthorpe Stable-dep-of: dfd76010f8e8 ("pds_core: remove write-after-free of client_id") Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/pds_core/auxbus.c | 7 +------ drivers/net/ethernet/amd/pds_core/core.h | 2 +- drivers/net/ethernet/amd/pds_core/devlink.c | 6 ++++-- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c index b76a9b7e0aed..d53b2124b149 100644 --- a/drivers/net/ethernet/amd/pds_core/auxbus.c +++ b/drivers/net/ethernet/amd/pds_core/auxbus.c @@ -172,13 +172,9 @@ static struct pds_auxiliary_dev *pdsc_auxbus_dev_register(struct pdsc *cf, return padev; } -int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf) +void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf) { struct pds_auxiliary_dev *padev; - int err = 0; - - if (!cf) - return -ENODEV; mutex_lock(&pf->config_lock); @@ -192,7 +188,6 @@ int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf) pf->vfs[cf->vf_id].padev = NULL; mutex_unlock(&pf->config_lock); - return err; } int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf) diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index 858bebf79776..480f9e8cbc4d 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -301,7 +301,7 @@ int pdsc_register_notify(struct notifier_block *nb); void pdsc_unregister_notify(struct notifier_block *nb); void pdsc_notify(unsigned long event, void *data); int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf); -int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf); +void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf); void pdsc_process_adminq(struct pdsc_qcq *qcq); void pdsc_work_thread(struct work_struct *work); diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index 0032e8e35181..9b77bb73d25d 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -55,8 +55,10 @@ int pdsc_dl_enable_set(struct devlink *dl, u32 id, for (vf_id = 0; vf_id < pdsc->num_vfs; vf_id++) { struct pdsc *vf = pdsc->vfs[vf_id].vf; - err = ctx->val.vbool ? pdsc_auxbus_dev_add(vf, pdsc) : - pdsc_auxbus_dev_del(vf, pdsc); + if (ctx->val.vbool) + err = pdsc_auxbus_dev_add(vf, pdsc); + else + pdsc_auxbus_dev_del(vf, pdsc); } return err; From 0861fccd43b8bafb533d97308862d20b7db3a2ad Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 20 Mar 2025 12:44:08 -0700 Subject: [PATCH 066/129] pds_core: specify auxiliary_device to be created [ Upstream commit b699bdc720c0255d1bb76cecba7382c1f2107af5 ] In preparation for adding a new auxiliary_device for the PF, make the vif type an argument to pdsc_auxbus_dev_add(). Pass in the address of the padev pointer so that the caller can specify where to save it and keep the mutex usage within the function. Link: https://patch.msgid.link/r/20250320194412.67983-3-shannon.nelson@amd.com Reviewed-by: Leon Romanovsky Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Signed-off-by: Shannon Nelson Signed-off-by: Jason Gunthorpe Stable-dep-of: dfd76010f8e8 ("pds_core: remove write-after-free of client_id") Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/pds_core/auxbus.c | 37 ++++++++++----------- drivers/net/ethernet/amd/pds_core/core.h | 7 ++-- drivers/net/ethernet/amd/pds_core/devlink.c | 5 +-- drivers/net/ethernet/amd/pds_core/main.c | 11 +++--- 4 files changed, 33 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c index d53b2124b149..4d3387bebe6a 100644 --- a/drivers/net/ethernet/amd/pds_core/auxbus.c +++ b/drivers/net/ethernet/amd/pds_core/auxbus.c @@ -172,29 +172,32 @@ static struct pds_auxiliary_dev *pdsc_auxbus_dev_register(struct pdsc *cf, return padev; } -void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf) +void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf, + struct pds_auxiliary_dev **pd_ptr) { struct pds_auxiliary_dev *padev; + if (!*pd_ptr) + return; + mutex_lock(&pf->config_lock); - padev = pf->vfs[cf->vf_id].padev; - if (padev) { - pds_client_unregister(pf, padev->client_id); - auxiliary_device_delete(&padev->aux_dev); - auxiliary_device_uninit(&padev->aux_dev); - padev->client_id = 0; - } - pf->vfs[cf->vf_id].padev = NULL; + padev = *pd_ptr; + pds_client_unregister(pf, padev->client_id); + auxiliary_device_delete(&padev->aux_dev); + auxiliary_device_uninit(&padev->aux_dev); + padev->client_id = 0; + *pd_ptr = NULL; mutex_unlock(&pf->config_lock); } -int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf) +int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf, + enum pds_core_vif_types vt, + struct pds_auxiliary_dev **pd_ptr) { struct pds_auxiliary_dev *padev; char devname[PDS_DEVNAME_LEN]; - enum pds_core_vif_types vt; unsigned long mask; u16 vt_support; int client_id; @@ -203,6 +206,9 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf) if (!cf) return -ENODEV; + if (vt >= PDS_DEV_TYPE_MAX) + return -EINVAL; + mutex_lock(&pf->config_lock); mask = BIT_ULL(PDSC_S_FW_DEAD) | @@ -214,17 +220,10 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf) goto out_unlock; } - /* We only support vDPA so far, so it is the only one to - * be verified that it is available in the Core device and - * enabled in the devlink param. In the future this might - * become a loop for several VIF types. - */ - /* Verify that the type is supported and enabled. It is not * an error if there is no auxbus device support for this * VF, it just means something else needs to happen with it. */ - vt = PDS_DEV_TYPE_VDPA; vt_support = !!le16_to_cpu(pf->dev_ident.vif_types[vt]); if (!(vt_support && pf->viftype_status[vt].supported && @@ -250,7 +249,7 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf) err = PTR_ERR(padev); goto out_unlock; } - pf->vfs[cf->vf_id].padev = padev; + *pd_ptr = padev; out_unlock: mutex_unlock(&pf->config_lock); diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index 480f9e8cbc4d..61ee607ee48a 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -300,8 +300,11 @@ void pdsc_health_thread(struct work_struct *work); int pdsc_register_notify(struct notifier_block *nb); void pdsc_unregister_notify(struct notifier_block *nb); void pdsc_notify(unsigned long event, void *data); -int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf); -void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf); +int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf, + enum pds_core_vif_types vt, + struct pds_auxiliary_dev **pd_ptr); +void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf, + struct pds_auxiliary_dev **pd_ptr); void pdsc_process_adminq(struct pdsc_qcq *qcq); void pdsc_work_thread(struct work_struct *work); diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index 9b77bb73d25d..bee70e46e34c 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -56,9 +56,10 @@ int pdsc_dl_enable_set(struct devlink *dl, u32 id, struct pdsc *vf = pdsc->vfs[vf_id].vf; if (ctx->val.vbool) - err = pdsc_auxbus_dev_add(vf, pdsc); + err = pdsc_auxbus_dev_add(vf, pdsc, vt_entry->vif_id, + &pdsc->vfs[vf_id].padev); else - pdsc_auxbus_dev_del(vf, pdsc); + pdsc_auxbus_dev_del(vf, pdsc, &pdsc->vfs[vf_id].padev); } return err; diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 346a69e95c88..76652e0e5b6d 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -189,7 +189,8 @@ static int pdsc_init_vf(struct pdsc *vf) devl_unlock(dl); pf->vfs[vf->vf_id].vf = vf; - err = pdsc_auxbus_dev_add(vf, pf); + err = pdsc_auxbus_dev_add(vf, pf, PDS_DEV_TYPE_VDPA, + &pf->vfs[vf->vf_id].padev); if (err) { devl_lock(dl); devl_unregister(dl); @@ -415,7 +416,7 @@ static void pdsc_remove(struct pci_dev *pdev) pf = pdsc_get_pf_struct(pdsc->pdev); if (!IS_ERR(pf)) { - pdsc_auxbus_dev_del(pdsc, pf); + pdsc_auxbus_dev_del(pdsc, pf, &pf->vfs[pdsc->vf_id].padev); pf->vfs[pdsc->vf_id].vf = NULL; } } else { @@ -480,7 +481,8 @@ static void pdsc_reset_prepare(struct pci_dev *pdev) pf = pdsc_get_pf_struct(pdsc->pdev); if (!IS_ERR(pf)) - pdsc_auxbus_dev_del(pdsc, pf); + pdsc_auxbus_dev_del(pdsc, pf, + &pf->vfs[pdsc->vf_id].padev); } pdsc_unmap_bars(pdsc); @@ -524,7 +526,8 @@ static void pdsc_reset_done(struct pci_dev *pdev) pf = pdsc_get_pf_struct(pdsc->pdev); if (!IS_ERR(pf)) - pdsc_auxbus_dev_add(pdsc, pf); + pdsc_auxbus_dev_add(pdsc, pf, PDS_DEV_TYPE_VDPA, + &pf->vfs[pdsc->vf_id].padev); } } From 9b467c5bcdb45a41d2a49fbb9ffca73d1380e99b Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 25 Apr 2025 13:38:57 -0700 Subject: [PATCH 067/129] pds_core: remove write-after-free of client_id [ Upstream commit dfd76010f8e821b66116dec3c7d90dd2403d1396 ] A use-after-free error popped up in stress testing: [Mon Apr 21 21:21:33 2025] BUG: KFENCE: use-after-free write in pdsc_auxbus_dev_del+0xef/0x160 [pds_core] [Mon Apr 21 21:21:33 2025] Use-after-free write at 0x000000007013ecd1 (in kfence-#47): [Mon Apr 21 21:21:33 2025] pdsc_auxbus_dev_del+0xef/0x160 [pds_core] [Mon Apr 21 21:21:33 2025] pdsc_remove+0xc0/0x1b0 [pds_core] [Mon Apr 21 21:21:33 2025] pci_device_remove+0x24/0x70 [Mon Apr 21 21:21:33 2025] device_release_driver_internal+0x11f/0x180 [Mon Apr 21 21:21:33 2025] driver_detach+0x45/0x80 [Mon Apr 21 21:21:33 2025] bus_remove_driver+0x83/0xe0 [Mon Apr 21 21:21:33 2025] pci_unregister_driver+0x1a/0x80 The actual device uninit usually happens on a separate thread scheduled after this code runs, but there is no guarantee of order of thread execution, so this could be a problem. There's no actual need to clear the client_id at this point, so simply remove the offending code. Fixes: 10659034c622 ("pds_core: add the aux client API") Signed-off-by: Shannon Nelson Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250425203857.71547-1-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/pds_core/auxbus.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c index 4d3387bebe6a..889a18962270 100644 --- a/drivers/net/ethernet/amd/pds_core/auxbus.c +++ b/drivers/net/ethernet/amd/pds_core/auxbus.c @@ -186,7 +186,6 @@ void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf, pds_client_unregister(pf, padev->client_id); auxiliary_device_delete(&padev->aux_dev); auxiliary_device_uninit(&padev->aux_dev); - padev->client_id = 0; *pd_ptr = NULL; mutex_unlock(&pf->config_lock); From db205b92dfe0501e5b92fb7cf00971d0e44ba3eb Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:05 -0300 Subject: [PATCH 068/129] net_sched: drr: Fix double list add in class with netem as child qdisc [ Upstream commit f99a3fbf023e20b626be4b0f042463d598050c9a ] As described in Gerrard's report [1], there are use cases where a netem child qdisc will make the parent qdisc's enqueue callback reentrant. In the case of drr, there won't be a UAF, but the code will add the same classifier to the list twice, which will cause memory corruption. In addition to checking for qlen being zero, this patch checks whether the class was already added to the active_list (cl_is_active) before adding to the list to cover for the reentrant case. [1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/ Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-2-victor@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_drr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 19901e77cd3b..edadb3a7bd14 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -35,6 +35,11 @@ struct drr_sched { struct Qdisc_class_hash clhash; }; +static bool cl_is_active(struct drr_class *cl) +{ + return !list_empty(&cl->alist); +} + static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) { struct drr_sched *q = qdisc_priv(sch); @@ -336,7 +341,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; int err = 0; - bool first; cl = drr_classify(skb, sch, &err); if (cl == NULL) { @@ -346,7 +350,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -356,7 +359,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - if (first) { + if (!cl_is_active(cl)) { list_add_tail(&cl->alist, &q->active); cl->deficit = cl->quantum; } From 6082a87af4c52f58150d40dec1716011d871ac21 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:06 -0300 Subject: [PATCH 069/129] net_sched: hfsc: Fix a UAF vulnerability in class with netem as child qdisc [ Upstream commit 141d34391abbb315d68556b7c67ad97885407547 ] As described in Gerrard's report [1], we have a UAF case when an hfsc class has a netem child qdisc. The crux of the issue is that hfsc is assuming that checking for cl->qdisc->q.qlen == 0 guarantees that it hasn't inserted the class in the vttree or eltree (which is not true for the netem duplicate case). This patch checks the n_active class variable to make sure that the code won't insert the class in the vttree or eltree twice, catering for the reentrant case. [1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/ Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Reported-by: Gerrard Tai Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-3-victor@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_hfsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 371255e62433..4be8760b8bc3 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1566,7 +1566,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) return err; } - if (first) { + if (first && !cl->cl_nactive) { if (cl->cl_flags & HFSC_RSC) init_ed(cl, len); if (cl->cl_flags & HFSC_FSC) From 72c3da7e6ceb74e74ddbb5a305a35c9fdfcac6e3 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:07 -0300 Subject: [PATCH 070/129] net_sched: ets: Fix double list add in class with netem as child qdisc [ Upstream commit 1a6d0c00fa07972384b0c308c72db091d49988b6 ] As described in Gerrard's report [1], there are use cases where a netem child qdisc will make the parent qdisc's enqueue callback reentrant. In the case of ets, there won't be a UAF, but the code will add the same classifier to the list twice, which will cause memory corruption. In addition to checking for qlen being zero, this patch checks whether the class was already added to the active_list (cl_is_active) before doing the addition to cater for the reentrant case. [1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/ Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-4-victor@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_ets.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 9fd70462b41d..18269f86d21e 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -74,6 +74,11 @@ static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = { [TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 }, }; +static bool cl_is_active(struct ets_class *cl) +{ + return !list_empty(&cl->alist); +} + static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr, unsigned int *quantum, struct netlink_ext_ack *extack) @@ -416,7 +421,6 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct ets_sched *q = qdisc_priv(sch); struct ets_class *cl; int err = 0; - bool first; cl = ets_classify(skb, sch, &err); if (!cl) { @@ -426,7 +430,6 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -436,7 +439,7 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - if (first && !ets_class_is_strict(q, cl)) { + if (!cl_is_active(cl) && !ets_class_is_strict(q, cl)) { list_add_tail(&cl->alist, &q->active); cl->deficit = cl->quantum; } From a43783119e01849fbf2fe8855634e8989b240cb4 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:08 -0300 Subject: [PATCH 071/129] net_sched: qfq: Fix double list add in class with netem as child qdisc [ Upstream commit f139f37dcdf34b67f5bf92bc8e0f7f6b3ac63aa4 ] As described in Gerrard's report [1], there are use cases where a netem child qdisc will make the parent qdisc's enqueue callback reentrant. In the case of qfq, there won't be a UAF, but the code will add the same classifier to the list twice, which will cause memory corruption. This patch checks whether the class was already added to the agg->active list (cl_is_active) before doing the addition to cater for the reentrant case. [1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/ Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-5-victor@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_qfq.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 546c10adcacd..c7d4ee8cedff 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -202,6 +202,11 @@ struct qfq_sched { */ enum update_reason {enqueue, requeue}; +static bool cl_is_active(struct qfq_class *cl) +{ + return !list_empty(&cl->alist); +} + static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) { struct qfq_sched *q = qdisc_priv(sch); @@ -1217,7 +1222,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct qfq_class *cl; struct qfq_aggregate *agg; int err = 0; - bool first; cl = qfq_classify(skb, sch, &err); if (cl == NULL) { @@ -1239,7 +1243,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; - first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { pr_debug("qfq_enqueue: enqueue failed %d\n", err); @@ -1255,8 +1258,8 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, ++sch->q.qlen; agg = cl->agg; - /* if the queue was not empty, then done here */ - if (!first) { + /* if the class is active, then done here */ + if (cl_is_active(cl)) { if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) && list_first_entry(&agg->active, struct qfq_class, alist) == cl && cl->deficit < len) From eae60cfe25d022d7f0321dba4cc23ad8e87ade48 Mon Sep 17 00:00:00 2001 From: Xuanqiang Luo Date: Fri, 25 Apr 2025 15:26:32 -0700 Subject: [PATCH 072/129] ice: Check VF VSI Pointer Value in ice_vc_add_fdir_fltr() [ Upstream commit 425c5f266b2edeee0ce16fedd8466410cdcfcfe3 ] As mentioned in the commit baeb705fd6a7 ("ice: always check VF VSI pointer values"), we need to perform a null pointer check on the return value of ice_get_vf_vsi() before using it. Fixes: 6ebbe97a4881 ("ice: Add a per-VF limit on number of FDIR filters") Signed-off-by: Xuanqiang Luo Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20250425222636.3188441-3-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 3ca5f44dea26..88c1acd5e8f0 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -1824,6 +1824,11 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg) pf = vf->pf; dev = ice_pf_to_dev(pf); vf_vsi = ice_get_vf_vsi(vf); + if (!vf_vsi) { + dev_err(dev, "Can not get FDIR vf_vsi for VF %u\n", vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err_exit; + } #define ICE_VF_MAX_FDIR_FILTERS 128 if (!ice_fdir_num_avail_fltr(&pf->hw, vf_vsi) || From ce4f77bef276e7d2eb7ab03a5d08bcbaa40710ec Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 29 Apr 2025 14:48:41 +0200 Subject: [PATCH 073/129] ALSA: ump: Fix buffer overflow at UMP SysEx message conversion [ Upstream commit 56f1f30e6795b890463d9b20b11e576adf5a2f77 ] The conversion function from MIDI 1.0 to UMP packet contains an internal buffer to keep the incoming MIDI bytes, and its size is 4, as it was supposed to be the max size for a MIDI1 UMP packet data. However, the implementation overlooked that SysEx is handled in a different format, and it can be up to 6 bytes, as found in do_convert_to_ump(). It leads eventually to a buffer overflow, and may corrupt the memory when a longer SysEx message is received. The fix is simply to extend the buffer size to 6 to fit with the SysEx UMP message. Fixes: 0b5288f5fe63 ("ALSA: ump: Add legacy raw MIDI support") Reported-by: Argusee Link: https://patch.msgid.link/20250429124845.25128-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- include/sound/ump_convert.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sound/ump_convert.h b/include/sound/ump_convert.h index d099ae27f849..682499b871ea 100644 --- a/include/sound/ump_convert.h +++ b/include/sound/ump_convert.h @@ -19,7 +19,7 @@ struct ump_cvt_to_ump_bank { /* context for converting from MIDI1 byte stream to UMP packet */ struct ump_cvt_to_ump { /* MIDI1 intermediate buffer */ - unsigned char buf[4]; + unsigned char buf[6]; /* up to 6 bytes for SysEx */ int len; int cmd_bytes; From 012a413b28e1a86bd8959035005ef5c085a1294e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 24 Apr 2025 10:18:01 -0700 Subject: [PATCH 074/129] nvme-pci: fix queue unquiesce check on slot_reset [ Upstream commit a75401227eeb827b1a162df1aa9d5b33da921c43 ] A zero return means the reset was successfully scheduled. We don't want to unquiesce the queues while the reset_work is pending, as that will just flush out requeued requests to a failed completion. Fixes: 71a5bb153be104 ("nvme: ensure disabling pairs with unquiesce") Reported-by: Dhankaran Singh Ajravat Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a763df0200ab..fdde38903ebc 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3377,7 +3377,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) dev_info(dev->ctrl.device, "restart after slot reset\n"); pci_restore_state(pdev); - if (!nvme_try_sched_reset(&dev->ctrl)) + if (nvme_try_sched_reset(&dev->ctrl)) nvme_unquiesce_io_queues(&dev->ctrl); return PCI_ERS_RESULT_RECOVERED; } From 1bbbdfc247f4733c1dd76eda851856970cc7bd81 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 25 Apr 2025 16:50:47 +0100 Subject: [PATCH 075/129] net: dlink: Correct endianness handling of led_mode [ Upstream commit e7e5ae71831c44d58627a991e603845a2fed2cab ] As it's name suggests, parse_eeprom() parses EEPROM data. This is done by reading data, 16 bits at a time as follows: for (i = 0; i < 128; i++) ((__le16 *) sromdata)[i] = cpu_to_le16(read_eeprom(np, i)); sromdata is at the same memory location as psrom. And the type of psrom is a pointer to struct t_SROM. As can be seen in the loop above, data is stored in sromdata, and thus psrom, as 16-bit little-endian values. However, the integer fields of t_SROM are host byte order integers. And in the case of led_mode this leads to a little endian value being incorrectly treated as host byte order. Looking at rio_set_led_mode, this does appear to be a bug as that code masks led_mode with 0x1, 0x2 and 0x8. Logic that would be effected by a reversed byte order. This problem would only manifest on big endian hosts. Found by inspection while investigating a sparse warning regarding the crc field of t_SROM. I believe that warning is a false positive. And although I plan to send a follow-up to use little-endian types for other the integer fields of PSROM_t I do not believe that will involve any bug fixes. Compile tested only. Fixes: c3f45d322cbd ("dl2k: Add support for IP1000A-based cards") Signed-off-by: Simon Horman Link: https://patch.msgid.link/20250425-dlink-led-mode-v1-1-6bae3c36e736@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/dlink/dl2k.c | 2 +- drivers/net/ethernet/dlink/dl2k.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index db6615aa921b..ce46f3ac3b5a 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -352,7 +352,7 @@ parse_eeprom (struct net_device *dev) eth_hw_addr_set(dev, psrom->mac_addr); if (np->chip_id == CHIP_IP1000A) { - np->led_mode = psrom->led_mode; + np->led_mode = le16_to_cpu(psrom->led_mode); return 0; } diff --git a/drivers/net/ethernet/dlink/dl2k.h b/drivers/net/ethernet/dlink/dl2k.h index 195dc6cfd895..0e33e2eaae96 100644 --- a/drivers/net/ethernet/dlink/dl2k.h +++ b/drivers/net/ethernet/dlink/dl2k.h @@ -335,7 +335,7 @@ typedef struct t_SROM { u16 sub_system_id; /* 0x06 */ u16 pci_base_1; /* 0x08 (IP1000A only) */ u16 pci_base_2; /* 0x0a (IP1000A only) */ - u16 led_mode; /* 0x0c (IP1000A only) */ + __le16 led_mode; /* 0x0c (IP1000A only) */ u16 reserved1[9]; /* 0x0e-0x1f */ u8 mac_addr[6]; /* 0x20-0x25 */ u8 reserved2[10]; /* 0x26-0x2f */ From 2399d1a75033e9d611baf99b9ea6babad65d712f Mon Sep 17 00:00:00 2001 From: Da Xue Date: Fri, 25 Apr 2025 15:20:09 -0400 Subject: [PATCH 076/129] net: mdio: mux-meson-gxl: set reversed bit when using internal phy [ Upstream commit b23285e93bef729e67519a5209d5b7fde3b4af50 ] This bit is necessary to receive packets from the internal PHY. Without this bit set, no activity occurs on the interface. Normally u-boot sets this bit, but if u-boot is compiled without net support, the interface will be up but without any activity. If bit is set once, it will work until the IP is powered down or reset. The vendor SDK sets this bit along with the PHY_ID bits. Signed-off-by: Da Xue Fixes: 9a24e1ff4326 ("net: mdio: add amlogic gxl mdio mux support") Link: https://patch.msgid.link/20250425192009.1439508-1-da@libre.computer Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/mdio/mdio-mux-meson-gxl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/mdio/mdio-mux-meson-gxl.c b/drivers/net/mdio/mdio-mux-meson-gxl.c index 76188575ca1f..19153d44800a 100644 --- a/drivers/net/mdio/mdio-mux-meson-gxl.c +++ b/drivers/net/mdio/mdio-mux-meson-gxl.c @@ -17,6 +17,7 @@ #define REG2_LEDACT GENMASK(23, 22) #define REG2_LEDLINK GENMASK(25, 24) #define REG2_DIV4SEL BIT(27) +#define REG2_REVERSED BIT(28) #define REG2_ADCBYPASS BIT(30) #define REG2_CLKINSEL BIT(31) #define ETH_REG3 0x4 @@ -65,7 +66,7 @@ static void gxl_enable_internal_mdio(struct gxl_mdio_mux *priv) * The only constraint is that it must match the one in * drivers/net/phy/meson-gxl.c to properly match the PHY. */ - writel(FIELD_PREP(REG2_PHYID, EPHY_GXL_ID), + writel(REG2_REVERSED | FIELD_PREP(REG2_PHYID, EPHY_GXL_ID), priv->regs + ETH_REG2); /* Enable the internal phy */ From 683ad6d7f4899ac9698323f2d3503e5ddfc58868 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 22 Apr 2025 14:03:09 -0700 Subject: [PATCH 077/129] igc: fix lock order in igc_ptp_reset [ Upstream commit c7d6cb96d5c33b5148f3dc76fcd30a9b8cd9e973 ] Commit 1a931c4f5e68 ("igc: add lock preventing multiple simultaneous PTM transactions") added a new mutex to protect concurrent PTM transactions. This lock is acquired in igc_ptp_reset() in order to ensure the PTM registers are properly disabled after a device reset. The flow where the lock is acquired already holds a spinlock, so acquiring a mutex leads to a sleep-while-locking bug, reported both by smatch, and the kernel test robot. The critical section in igc_ptp_reset() does correctly use the readx_poll_timeout_atomic variants, but the standard PTM flow uses regular sleeping variants. This makes converting the mutex to a spinlock a bit tricky. Instead, re-order the locking in igc_ptp_reset. Acquire the mutex first, and then the tmreg_lock spinlock. This is safe because there is no other ordering dependency on these locks, as this is the only place where both locks were acquired simultaneously. Indeed, any other flow acquiring locks in that order would be wrong regardless. Signed-off-by: Jacob Keller Fixes: 1a931c4f5e68 ("igc: add lock preventing multiple simultaneous PTM transactions") Link: https://lore.kernel.org/intel-wired-lan/Z_-P-Hc1yxcw0lTB@stanley.mountain/ Link: https://lore.kernel.org/intel-wired-lan/202504211511.f7738f5d-lkp@intel.com/T/#u Reviewed-by: Przemek Kitszel Reviewed-by: Vitaly Lifshits Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_ptp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index b6bb01a486d9..a82af96e6bd1 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -1237,6 +1237,8 @@ void igc_ptp_reset(struct igc_adapter *adapter) /* reset the tstamp_config */ igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); + mutex_lock(&adapter->ptm_lock); + spin_lock_irqsave(&adapter->tmreg_lock, flags); switch (adapter->hw.mac.type) { @@ -1255,7 +1257,6 @@ void igc_ptp_reset(struct igc_adapter *adapter) if (!igc_is_crosststamp_supported(adapter)) break; - mutex_lock(&adapter->ptm_lock); wr32(IGC_PCIE_DIG_DELAY, IGC_PCIE_DIG_DELAY_DEFAULT); wr32(IGC_PCIE_PHY_DELAY, IGC_PCIE_PHY_DELAY_DEFAULT); @@ -1279,7 +1280,6 @@ void igc_ptp_reset(struct igc_adapter *adapter) netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); igc_ptm_reset(hw); - mutex_unlock(&adapter->ptm_lock); break; default: /* No work to do. */ @@ -1296,5 +1296,7 @@ void igc_ptp_reset(struct igc_adapter *adapter) out: spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + mutex_unlock(&adapter->ptm_lock); + wrfl(); } From cb625f783f70dc6614f03612b8e64ad99cb0a13c Mon Sep 17 00:00:00 2001 From: Chad Monroe Date: Sun, 27 Apr 2025 02:05:44 +0100 Subject: [PATCH 078/129] net: ethernet: mtk_eth_soc: fix SER panic with 4GB+ RAM [ Upstream commit 6e0490fc36cdac696f96e57b61d93b9ae32e0f4c ] If the mtk_poll_rx() function detects the MTK_RESETTING flag, it will jump to release_desc and refill the high word of the SDP on the 4GB RFB. Subsequently, mtk_rx_clean will process an incorrect SDP, leading to a panic. Add patch from MediaTek's SDK to resolve this. Fixes: 2d75891ebc09 ("net: ethernet: mtk_eth_soc: support 36-bit DMA addressing on MT7988") Link: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/71f47ea785699c6aa3b922d66c2bdc1a43da25b1 Signed-off-by: Chad Monroe Link: https://patch.msgid.link/4adc2aaeb0fb1b9cdc56bf21cf8e7fa328daa345.1745715843.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index dc89dbc13b25..d2ec8f642c2f 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2180,14 +2180,18 @@ skip_rx: ring->data[idx] = new_data; rxd->rxd1 = (unsigned int)dma_addr; release_desc: + if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA)) { + if (unlikely(dma_addr == DMA_MAPPING_ERROR)) + addr64 = FIELD_GET(RX_DMA_ADDR64_MASK, + rxd->rxd2); + else + addr64 = RX_DMA_PREP_ADDR64(dma_addr); + } + if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) rxd->rxd2 = RX_DMA_LSO; else - rxd->rxd2 = RX_DMA_PREP_PLEN0(ring->buf_size); - - if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA) && - likely(dma_addr != DMA_MAPPING_ERROR)) - rxd->rxd2 |= RX_DMA_PREP_ADDR64(dma_addr); + rxd->rxd2 = RX_DMA_PREP_PLEN0(ring->buf_size) | addr64; ring->calc_idx = idx; done++; From 8acf08b1e4ec9ed416b80df55276ef51e6a3fbaf Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 26 Apr 2025 17:48:55 +0300 Subject: [PATCH 079/129] net: dsa: felix: fix broken taprio gate states after clock jump [ Upstream commit 426d487bca38b34f39c483edfc6313a036446b33 ] Simplest setup to reproduce the issue: connect 2 ports of the LS1028A-RDB together (eno0 with swp0) and run: $ ip link set eno0 up && ip link set swp0 up $ tc qdisc replace dev swp0 parent root handle 100 taprio num_tc 8 \ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \ base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \ sched-entry S 20 300000 sched-entry S 48 200000 \ sched-entry S 20 300000 sched-entry S 83 200000 \ sched-entry S 40 300000 sched-entry S 00 200000 flags 2 $ ptp4l -i eno0 -f /etc/linuxptp/configs/gPTP.cfg -m & $ ptp4l -i swp0 -f /etc/linuxptp/configs/gPTP.cfg -m One will observe that the PTP state machine on swp0 starts synchronizing, then it attempts to do a clock step, and after that, it never fails to recover from the condition below. ptp4l[82.427]: selected best master clock 00049f.fffe.05f627 ptp4l[82.428]: port 1 (swp0): MASTER to UNCALIBRATED on RS_SLAVE ptp4l[83.252]: port 1 (swp0): UNCALIBRATED to SLAVE on MASTER_CLOCK_SELECTED ptp4l[83.886]: rms 4537731277 max 9075462553 freq -18518 +/- 11467 delay 818 +/- 0 ptp4l[84.170]: timed out while polling for tx timestamp ptp4l[84.171]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it ptp4l[84.172]: port 1 (swp0): send peer delay request failed ptp4l[84.173]: port 1 (swp0): clearing fault immediately ptp4l[84.269]: port 1 (swp0): SLAVE to LISTENING on INIT_COMPLETE ptp4l[85.303]: timed out while polling for tx timestamp ptp4l[84.171]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it ptp4l[84.172]: port 1 (swp0): send peer delay request failed ptp4l[84.173]: port 1 (swp0): clearing fault immediately ptp4l[84.269]: port 1 (swp0): SLAVE to LISTENING on INIT_COMPLETE ptp4l[85.303]: timed out while polling for tx timestamp ptp4l[85.304]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it ptp4l[85.305]: port 1 (swp0): send peer delay response failed ptp4l[85.306]: port 1 (swp0): clearing fault immediately ptp4l[86.304]: timed out while polling for tx timestamp A hint is given by the non-zero statistics for dropped packets which were expecting hardware TX timestamps: $ ethtool --include-statistics -T swp0 (...) Statistics: tx_pkts: 30 tx_lost: 11 tx_err: 0 We know that when PTP clock stepping takes place (from ocelot_ptp_settime64() or from ocelot_ptp_adjtime()), vsc9959_tas_clock_adjust() is called. Another interesting hint is that placing an early return in vsc9959_tas_clock_adjust(), so as to neutralize this function, fixes the issue and TX timestamps are no longer dropped. The debugging function written by me and included below is intended to read the GCL RAM, after the admin schedule became operational, through the two status registers available for this purpose: QSYS_GCL_STATUS_REG_1 and QSYS_GCL_STATUS_REG_2. static void vsc9959_print_tas_gcl(struct ocelot *ocelot) { u32 val, list_length, interval, gate_state; int i, err; err = read_poll_timeout(ocelot_read, val, !(val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING), 10, 100000, false, ocelot, QSYS_PARAM_STATUS_REG_8); if (err) { dev_err(ocelot->dev, "Failed to wait for TAS config pending bit to clear: %pe\n", ERR_PTR(err)); return; } val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_3); list_length = QSYS_PARAM_STATUS_REG_3_LIST_LENGTH_X(val); dev_info(ocelot->dev, "GCL length: %u\n", list_length); for (i = 0; i < list_length; i++) { ocelot_rmw(ocelot, QSYS_GCL_STATUS_REG_1_GCL_ENTRY_NUM(i), QSYS_GCL_STATUS_REG_1_GCL_ENTRY_NUM_M, QSYS_GCL_STATUS_REG_1); interval = ocelot_read(ocelot, QSYS_GCL_STATUS_REG_2); val = ocelot_read(ocelot, QSYS_GCL_STATUS_REG_1); gate_state = QSYS_GCL_STATUS_REG_1_GATE_STATE_X(val); dev_info(ocelot->dev, "GCL entry %d: states 0x%x interval %u\n", i, gate_state, interval); } } Calling it from two places: after the initial QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE performed by vsc9959_qos_port_tas_set(), and after the one done by vsc9959_tas_clock_adjust(), I notice the following difference. From the tc-taprio process context, where the schedule was initially configured, the GCL looks like this: mscc_felix 0000:00:00.5: GCL length: 8 mscc_felix 0000:00:00.5: GCL entry 0: states 0x20 interval 300000 mscc_felix 0000:00:00.5: GCL entry 1: states 0x10 interval 200000 mscc_felix 0000:00:00.5: GCL entry 2: states 0x20 interval 300000 mscc_felix 0000:00:00.5: GCL entry 3: states 0x48 interval 200000 mscc_felix 0000:00:00.5: GCL entry 4: states 0x20 interval 300000 mscc_felix 0000:00:00.5: GCL entry 5: states 0x83 interval 200000 mscc_felix 0000:00:00.5: GCL entry 6: states 0x40 interval 300000 mscc_felix 0000:00:00.5: GCL entry 7: states 0x0 interval 200000 But from the ptp4l clock stepping process context, when the vsc9959_tas_clock_adjust() hook is called, the GCL RAM of the operational schedule now looks like this: mscc_felix 0000:00:00.5: GCL length: 8 mscc_felix 0000:00:00.5: GCL entry 0: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 1: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 2: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 3: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 4: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 5: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 6: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 7: states 0x0 interval 0 I do not have a formal explanation, just experimental conclusions. It appears that after triggering QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE for a port's TAS, the GCL entry RAM is updated anyway, despite what the documentation claims: "Specify the time interval in QSYS::GCL_CFG_REG_2.TIME_INTERVAL. This triggers the actual RAM write with the gate state and the time interval for the entry number specified". We don't touch that register (through vsc9959_tas_gcl_set()) from vsc9959_tas_clock_adjust(), yet the GCL RAM is updated anyway. It seems to be updated with effectively stale memory, which in my testing can hold a variety of things, including even pieces of the previously applied schedule, for particular schedule lengths. As such, in most circumstances it is very difficult to pinpoint this issue, because the newly updated schedule would "behave strangely", but ultimately might still pass traffic to some extent, due to some gate entries still being present in the stale GCL entry RAM. It is easy to miss. With the particular schedule given at the beginning, the GCL RAM "happens" to be reproducibly rewritten with all zeroes, and this is consistent with what we see: when the time-aware shaper has gate entries with all gates closed, traffic is dropped on TX, no wonder we can't retrieve TX timestamps. Rewriting the GCL entry RAM when reapplying the new base time fixes the observed issue. Fixes: 8670dc33f48b ("net: dsa: felix: update base time of time-aware shaper when adjusting PTP time") Reported-by: Richie Pearn Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250426144859.3128352-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/ocelot/felix_vsc9959.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 8d27933c3733..f91f25578f07 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1543,7 +1543,7 @@ static void vsc9959_tas_clock_adjust(struct ocelot *ocelot) struct tc_taprio_qopt_offload *taprio; struct ocelot_port *ocelot_port; struct timespec64 base_ts; - int port; + int i, port; u32 val; mutex_lock(&ocelot->fwd_domain_lock); @@ -1575,6 +1575,9 @@ static void vsc9959_tas_clock_adjust(struct ocelot *ocelot) QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB_M, QSYS_PARAM_CFG_REG_3); + for (i = 0; i < taprio->num_entries; i++) + vsc9959_tas_gcl_set(ocelot, i, &taprio->entries[i]); + ocelot_rmw(ocelot, QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE, QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE, QSYS_TAS_PARAM_CFG_CTRL); From e0272844995aff8cbf2cc88b78c2b35ca51eda43 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 26 Apr 2025 17:32:09 +0200 Subject: [PATCH 080/129] net: ipv6: fix UDPv6 GSO segmentation with NAT [ Upstream commit b936a9b8d4a585ccb6d454921c36286bfe63e01d ] If any address or port is changed, update it in all packets and recalculate checksum. Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") Signed-off-by: Felix Fietkau Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250426153210.14044-1-nbd@nbd.name Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/udp_offload.c | 61 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 2ab16139c197..132cfc3b2c84 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -247,6 +247,62 @@ static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs) return segs; } +static void __udpv6_gso_segment_csum(struct sk_buff *seg, + struct in6_addr *oldip, + const struct in6_addr *newip, + __be16 *oldport, __be16 newport) +{ + struct udphdr *uh = udp_hdr(seg); + + if (ipv6_addr_equal(oldip, newip) && *oldport == newport) + return; + + if (uh->check) { + inet_proto_csum_replace16(&uh->check, seg, oldip->s6_addr32, + newip->s6_addr32, true); + + inet_proto_csum_replace2(&uh->check, seg, *oldport, newport, + false); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + + *oldip = *newip; + *oldport = newport; +} + +static struct sk_buff *__udpv6_gso_segment_list_csum(struct sk_buff *segs) +{ + const struct ipv6hdr *iph; + const struct udphdr *uh; + struct ipv6hdr *iph2; + struct sk_buff *seg; + struct udphdr *uh2; + + seg = segs; + uh = udp_hdr(seg); + iph = ipv6_hdr(seg); + uh2 = udp_hdr(seg->next); + iph2 = ipv6_hdr(seg->next); + + if (!(*(const u32 *)&uh->source ^ *(const u32 *)&uh2->source) && + ipv6_addr_equal(&iph->saddr, &iph2->saddr) && + ipv6_addr_equal(&iph->daddr, &iph2->daddr)) + return segs; + + while ((seg = seg->next)) { + uh2 = udp_hdr(seg); + iph2 = ipv6_hdr(seg); + + __udpv6_gso_segment_csum(seg, &iph2->saddr, &iph->saddr, + &uh2->source, uh->source); + __udpv6_gso_segment_csum(seg, &iph2->daddr, &iph->daddr, + &uh2->dest, uh->dest); + } + + return segs; +} + static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, netdev_features_t features, bool is_ipv6) @@ -259,7 +315,10 @@ static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss); - return is_ipv6 ? skb : __udpv4_gso_segment_list_csum(skb); + if (is_ipv6) + return __udpv6_gso_segment_list_csum(skb); + else + return __udpv4_gso_segment_list_csum(skb); } struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, From 3facfd720055fcbf256ecdefc592fecd1f84601e Mon Sep 17 00:00:00 2001 From: Shruti Parab Date: Mon, 28 Apr 2025 15:59:01 -0700 Subject: [PATCH 081/129] bnxt_en: Fix coredump logic to free allocated buffer [ Upstream commit ea9376cf68230e05492f22ca45d329f16e262c7b ] When handling HWRM_DBG_COREDUMP_LIST FW command in bnxt_hwrm_dbg_dma_data(), the allocated buffer info->dest_buf is not freed in the error path. In the normal path, info->dest_buf is assigned to coredump->data and it will eventually be freed after the coredump is collected. Free info->dest_buf immediately inside bnxt_hwrm_dbg_dma_data() in the error path. Fixes: c74751f4c392 ("bnxt_en: Return error if FW returns more data than dump length") Reported-by: Michael Chan Reviewed-by: Kalesh AP Signed-off-by: Shruti Parab Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index c06789882036..b57d2a25ae27 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -72,6 +72,11 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, memcpy(info->dest_buf + off, dma_buf, len); } else { rc = -ENOBUFS; + if (cmn_req->req_type == + cpu_to_le16(HWRM_DBG_COREDUMP_LIST)) { + kfree(info->dest_buf); + info->dest_buf = NULL; + } break; } } From 4d69864915a3a052538e4ba76cd6fd77cfc64ebe Mon Sep 17 00:00:00 2001 From: Shruti Parab Date: Mon, 28 Apr 2025 15:59:02 -0700 Subject: [PATCH 082/129] bnxt_en: Fix out-of-bound memcpy() during ethtool -w [ Upstream commit 6b87bd94f34370bbf1dfa59352bed8efab5bf419 ] When retrieving the FW coredump using ethtool, it can sometimes cause memory corruption: BUG: KFENCE: memory corruption in __bnxt_get_coredump+0x3ef/0x670 [bnxt_en] Corrupted memory at 0x000000008f0f30e8 [ ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ] (in kfence-#45): __bnxt_get_coredump+0x3ef/0x670 [bnxt_en] ethtool_get_dump_data+0xdc/0x1a0 __dev_ethtool+0xa1e/0x1af0 dev_ethtool+0xa8/0x170 dev_ioctl+0x1b5/0x580 sock_do_ioctl+0xab/0xf0 sock_ioctl+0x1ce/0x2e0 __x64_sys_ioctl+0x87/0xc0 do_syscall_64+0x5c/0xf0 entry_SYSCALL_64_after_hwframe+0x78/0x80 ... This happens when copying the coredump segment list in bnxt_hwrm_dbg_dma_data() with the HWRM_DBG_COREDUMP_LIST FW command. The info->dest_buf buffer is allocated based on the number of coredump segments returned by the FW. The segment list is then DMA'ed by the FW and the length of the DMA is returned by FW. The driver then copies this DMA'ed segment list to info->dest_buf. In some cases, this DMA length may exceed the info->dest_buf length and cause the above BUG condition. Fix it by capping the copy length to not exceed the length of info->dest_buf. The extra DMA data contains no useful information. This code path is shared for the HWRM_DBG_COREDUMP_LIST and the HWRM_DBG_COREDUMP_RETRIEVE FW commands. The buffering is different for these 2 FW commands. To simplify the logic, we need to move the line to adjust the buffer length for HWRM_DBG_COREDUMP_RETRIEVE up, so that the new check to cap the copy length will work for both commands. Fixes: c74751f4c392 ("bnxt_en: Return error if FW returns more data than dump length") Reviewed-by: Kalesh AP Signed-off-by: Shruti Parab Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/broadcom/bnxt/bnxt_coredump.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index b57d2a25ae27..32813cdd5aa5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -66,10 +66,19 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, } } + if (cmn_req->req_type == + cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE)) + info->dest_buf_size += len; + if (info->dest_buf) { if ((info->seg_start + off + len) <= BNXT_COREDUMP_BUF_LEN(info->buf_len)) { - memcpy(info->dest_buf + off, dma_buf, len); + u16 copylen = min_t(u16, len, + info->dest_buf_size - off); + + memcpy(info->dest_buf + off, dma_buf, copylen); + if (copylen < len) + break; } else { rc = -ENOBUFS; if (cmn_req->req_type == @@ -81,10 +90,6 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, } } - if (cmn_req->req_type == - cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE)) - info->dest_buf_size += len; - if (!(cmn_resp->flags & HWRM_DBG_CMN_FLAGS_MORE)) break; From 252a5a67adc7d6763c558d97e21850a33fe44ba1 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 28 Apr 2025 15:59:03 -0700 Subject: [PATCH 083/129] bnxt_en: Fix ethtool -d byte order for 32-bit values [ Upstream commit 02e8be5a032cae0f4ca33c6053c44d83cf4acc93 ] For version 1 register dump that includes the PCIe stats, the existing code incorrectly assumes that all PCIe stats are 64-bit values. Fix it by using an array containing the starting and ending index of the 32-bit values. The loop in bnxt_get_regs() will use the array to do proper endian swap for the 32-bit values. Fixes: b5d600b027eb ("bnxt_en: Add support for 'ethtool -d'") Reviewed-by: Shruti Parab Reviewed-by: Kalesh AP Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 36 ++++++++++++++++--- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 2e7ddbca9d53..dcedafa4d2e1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1393,6 +1393,17 @@ static int bnxt_get_regs_len(struct net_device *dev) return reg_len; } +#define BNXT_PCIE_32B_ENTRY(start, end) \ + { offsetof(struct pcie_ctx_hw_stats, start), \ + offsetof(struct pcie_ctx_hw_stats, end) } + +static const struct { + u16 start; + u16 end; +} bnxt_pcie_32b_entries[] = { + BNXT_PCIE_32B_ENTRY(pcie_ltssm_histogram[0], pcie_ltssm_histogram[3]), +}; + static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) { @@ -1424,12 +1435,27 @@ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs, req->pcie_stat_host_addr = cpu_to_le64(hw_pcie_stats_addr); rc = hwrm_req_send(bp, req); if (!rc) { - __le64 *src = (__le64 *)hw_pcie_stats; - u64 *dst = (u64 *)(_p + BNXT_PXP_REG_LEN); - int i; + u8 *dst = (u8 *)(_p + BNXT_PXP_REG_LEN); + u8 *src = (u8 *)hw_pcie_stats; + int i, j; - for (i = 0; i < sizeof(*hw_pcie_stats) / sizeof(__le64); i++) - dst[i] = le64_to_cpu(src[i]); + for (i = 0, j = 0; i < sizeof(*hw_pcie_stats); ) { + if (i >= bnxt_pcie_32b_entries[j].start && + i <= bnxt_pcie_32b_entries[j].end) { + u32 *dst32 = (u32 *)(dst + i); + + *dst32 = le32_to_cpu(*(__le32 *)(src + i)); + i += 4; + if (i > bnxt_pcie_32b_entries[j].end && + j < ARRAY_SIZE(bnxt_pcie_32b_entries) - 1) + j++; + } else { + u64 *dst64 = (u64 *)(dst + i); + + *dst64 = le64_to_cpu(*(__le64 *)(src + i)); + i += 8; + } + } } hwrm_req_drop(bp, req); } From ec00ea56450e22040f93033b8769e0db7ed554a2 Mon Sep 17 00:00:00 2001 From: Michael Liang Date: Tue, 29 Apr 2025 10:42:01 -0600 Subject: [PATCH 084/129] nvme-tcp: fix premature queue removal and I/O failover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 77e40bbce93059658aee02786a32c5c98a240a8a ] This patch addresses a data corruption issue observed in nvme-tcp during testing. In an NVMe native multipath setup, when an I/O timeout occurs, all inflight I/Os are canceled almost immediately after the kernel socket is shut down. These canceled I/Os are reported as host path errors, triggering a failover that succeeds on a different path. However, at this point, the original I/O may still be outstanding in the host's network transmission path (e.g., the NIC’s TX queue). From the user-space app's perspective, the buffer associated with the I/O is considered completed since they're acked on the different path and may be reused for new I/O requests. Because nvme-tcp enables zero-copy by default in the transmission path, this can lead to corrupted data being sent to the original target, ultimately causing data corruption. We can reproduce this data corruption by injecting delay on one path and triggering i/o timeout. To prevent this issue, this change ensures that all inflight transmissions are fully completed from host's perspective before returning from queue stop. To handle concurrent I/O timeout from multiple namespaces under the same controller, always wait in queue stop regardless of queue's state. This aligns with the behavior of queue stopping in other NVMe fabric transports. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Signed-off-by: Michael Liang Reviewed-by: Mohamed Khalfella Reviewed-by: Randy Jennings Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/tcp.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 84db7f4f861c..5b76670f34be 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1710,7 +1710,7 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) cancel_work_sync(&queue->io_work); } -static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +static void nvme_tcp_stop_queue_nowait(struct nvme_ctrl *nctrl, int qid) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; @@ -1724,6 +1724,31 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) mutex_unlock(&queue->queue_lock); } +static void nvme_tcp_wait_queue(struct nvme_ctrl *nctrl, int qid) +{ + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + struct nvme_tcp_queue *queue = &ctrl->queues[qid]; + int timeout = 100; + + while (timeout > 0) { + if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags) || + !sk_wmem_alloc_get(queue->sock->sk)) + return; + msleep(2); + timeout -= 2; + } + dev_warn(nctrl->device, + "qid %d: timeout draining sock wmem allocation expired\n", + qid); +} + +static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +{ + nvme_tcp_stop_queue_nowait(nctrl, qid); + nvme_tcp_wait_queue(nctrl, qid); +} + + static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue) { write_lock_bh(&queue->sock->sk->sk_callback_lock); @@ -1790,7 +1815,9 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) int i; for (i = 1; i < ctrl->queue_count; i++) - nvme_tcp_stop_queue(ctrl, i); + nvme_tcp_stop_queue_nowait(ctrl, i); + for (i = 1; i < ctrl->queue_count; i++) + nvme_tcp_wait_queue(ctrl, i); } static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl, From dae1ce27ceaea7e1522025b15252e3cc52802622 Mon Sep 17 00:00:00 2001 From: Thangaraj Samynathan Date: Tue, 29 Apr 2025 10:55:27 +0530 Subject: [PATCH 085/129] net: lan743x: Fix memleak issue when GSO enabled [ Upstream commit 2d52e2e38b85c8b7bc00dca55c2499f46f8c8198 ] Always map the `skb` to the LS descriptor. Previously skb was mapped to EXT descriptor when the number of fragments is zero with GSO enabled. Mapping the skb to EXT descriptor prevents it from being freed, leading to a memory leak Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Thangaraj Samynathan Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250429052527.10031-1-thangaraj.s@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/lan743x_main.c | 8 ++++++-- drivers/net/ethernet/microchip/lan743x_main.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 92010bfe5e41..5d2ceff72784 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1949,6 +1949,7 @@ static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx, if (nr_frags <= 0) { tx->frame_data0 |= TX_DESC_DATA0_LS_; tx->frame_data0 |= TX_DESC_DATA0_IOC_; + tx->frame_last = tx->frame_first; } tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; tx_descriptor->data0 = cpu_to_le32(tx->frame_data0); @@ -2018,6 +2019,7 @@ static int lan743x_tx_frame_add_fragment(struct lan743x_tx *tx, tx->frame_first = 0; tx->frame_data0 = 0; tx->frame_tail = 0; + tx->frame_last = 0; return -ENOMEM; } @@ -2058,16 +2060,18 @@ static void lan743x_tx_frame_end(struct lan743x_tx *tx, TX_DESC_DATA0_DTYPE_DATA_) { tx->frame_data0 |= TX_DESC_DATA0_LS_; tx->frame_data0 |= TX_DESC_DATA0_IOC_; + tx->frame_last = tx->frame_tail; } - tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; - buffer_info = &tx->buffer_info[tx->frame_tail]; + tx_descriptor = &tx->ring_cpu_ptr[tx->frame_last]; + buffer_info = &tx->buffer_info[tx->frame_last]; buffer_info->skb = skb; if (time_stamp) buffer_info->flags |= TX_BUFFER_INFO_FLAG_TIMESTAMP_REQUESTED; if (ignore_sync) buffer_info->flags |= TX_BUFFER_INFO_FLAG_IGNORE_SYNC; + tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; tx_descriptor->data0 = cpu_to_le32(tx->frame_data0); tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail); tx->last_tail = tx->frame_tail; diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 3b2c6046eb3a..b6c83c68241e 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -974,6 +974,7 @@ struct lan743x_tx { u32 frame_first; u32 frame_data0; u32 frame_tail; + u32 frame_last; struct lan743x_tx_buffer_info *buffer_info; From 0e32cbcbc4cb415e6d3967205c9ee88dc8454b63 Mon Sep 17 00:00:00 2001 From: Mattias Barthel Date: Tue, 29 Apr 2025 11:08:26 +0200 Subject: [PATCH 086/129] net: fec: ERR007885 Workaround for conventional TX [ Upstream commit a179aad12badc43201cbf45d1e8ed2c1383c76b9 ] Activate TX hang workaround also in fec_enet_txq_submit_skb() when TSO is not enabled. Errata: ERR007885 Symptoms: NETDEV WATCHDOG: eth0 (fec): transmit queue 0 timed out commit 37d6017b84f7 ("net: fec: Workaround for imx6sx enet tx hang when enable three queues") There is a TDAR race condition for mutliQ when the software sets TDAR and the UDMA clears TDAR simultaneously or in a small window (2-4 cycles). This will cause the udma_tx and udma_tx_arbiter state machines to hang. So, the Workaround is checking TDAR status four time, if TDAR cleared by hardware and then write TDAR, otherwise don't set TDAR. Fixes: 53bb20d1faba ("net: fec: add variable reg_desc_active to speed things up") Signed-off-by: Mattias Barthel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250429090826.3101258-1-mattiasbarthel@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fec_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 2d6b50903c92..7261838a09db 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -695,7 +695,12 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, txq->bd.cur = bdp; /* Trigger transmission start */ - writel(0, txq->bd.reg_desc_active); + if (!(fep->quirks & FEC_QUIRK_ERR007885) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active)) + writel(0, txq->bd.reg_desc_active); return 0; } From 7e1ca1bed3f66e00377f7d2147be390144924276 Mon Sep 17 00:00:00 2001 From: Sathesh B Edara Date: Tue, 29 Apr 2025 04:46:24 -0700 Subject: [PATCH 087/129] octeon_ep: Fix host hang issue during device reboot [ Upstream commit 34f42736b325287a7b2ce37e415838f539767bda ] When the host loses heartbeat messages from the device, the driver calls the device-specific ndo_stop function, which frees the resources. If the driver is unloaded in this scenario, it calls ndo_stop again, attempting to free resources that have already been freed, leading to a host hang issue. To resolve this, dev_close should be called instead of the device-specific stop function.dev_close internally calls ndo_stop to stop the network interface and performs additional cleanup tasks. During the driver unload process, if the device is already down, ndo_stop is not called. Fixes: 5cb96c29aa0e ("octeon_ep: add heartbeat monitor") Signed-off-by: Sathesh B Edara Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250429114624.19104-1-sedara@marvell.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 6f1fe7e283d4..7a30095b3486 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -917,7 +917,7 @@ static void octep_hb_timeout_task(struct work_struct *work) miss_cnt); rtnl_lock(); if (netif_running(oct->netdev)) - octep_stop(oct->netdev); + dev_close(oct->netdev); rtnl_unlock(); } From 67d587bfa349c25a4f0f2ab1a7911b210331d197 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 30 Apr 2025 17:30:49 +0800 Subject: [PATCH 088/129] net: hns3: store rx VLAN tag offload state for VF [ Upstream commit ef2383d078edcbe3055032436b16cdf206f26de2 ] The VF driver missed to store the rx VLAN tag strip state when user change the rx VLAN tag offload state. And it will default to enable the rx vlan tag strip when re-init VF device after reset. So if user disable rx VLAN tag offload, and trig reset, then the HW will still strip the VLAN tag from packet nad fill into RX BD, but the VF driver will ignore it for rx VLAN tag offload disabled. It may cause the rx VLAN tag dropped. Fixes: b2641e2ad456 ("net: hns3: Add support of hardware rx-vlan-offload to HNS3 VF driver") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250430093052.2400464-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../hisilicon/hns3/hns3vf/hclgevf_main.c | 25 ++++++++++++++----- .../hisilicon/hns3/hns3vf/hclgevf_main.h | 1 + 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 69bfcfb148de..1ba0b57c7a72 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1257,9 +1257,8 @@ static void hclgevf_sync_vlan_filter(struct hclgevf_dev *hdev) rtnl_unlock(); } -static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) +static int hclgevf_en_hw_strip_rxvtag_cmd(struct hclgevf_dev *hdev, bool enable) { - struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); struct hclge_vf_to_pf_msg send_msg; hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN, @@ -1268,6 +1267,19 @@ static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); } +static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) +{ + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + int ret; + + ret = hclgevf_en_hw_strip_rxvtag_cmd(hdev, enable); + if (ret) + return ret; + + hdev->rxvtag_strip_en = enable; + return 0; +} + static int hclgevf_reset_tqp(struct hnae3_handle *handle) { #define HCLGEVF_RESET_ALL_QUEUE_DONE 1U @@ -2143,12 +2155,13 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev) tc_valid, tc_size); } -static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev) +static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev, + bool rxvtag_strip_en) { struct hnae3_handle *nic = &hdev->nic; int ret; - ret = hclgevf_en_hw_strip_rxvtag(nic, true); + ret = hclgevf_en_hw_strip_rxvtag(nic, rxvtag_strip_en); if (ret) { dev_err(&hdev->pdev->dev, "failed to enable rx vlan offload, ret = %d\n", ret); @@ -2815,7 +2828,7 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev) if (ret) return ret; - ret = hclgevf_init_vlan_config(hdev); + ret = hclgevf_init_vlan_config(hdev, hdev->rxvtag_strip_en); if (ret) { dev_err(&hdev->pdev->dev, "failed(%d) to initialize VLAN config\n", ret); @@ -2928,7 +2941,7 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) goto err_config; } - ret = hclgevf_init_vlan_config(hdev); + ret = hclgevf_init_vlan_config(hdev, true); if (ret) { dev_err(&hdev->pdev->dev, "failed(%d) to initialize VLAN config\n", ret); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index cccef3228461..0208425ab594 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -253,6 +253,7 @@ struct hclgevf_dev { int *vector_irq; bool gro_en; + bool rxvtag_strip_en; unsigned long vlan_del_fail_bmap[BITS_TO_LONGS(VLAN_N_VID)]; From c1b1d3b0d8e4cc9ac172e95db2cb89bac412ddce Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Wed, 30 Apr 2025 17:30:50 +0800 Subject: [PATCH 089/129] net: hns3: fix an interrupt residual problem [ Upstream commit 8e6b9c6ea5a55045eed6526d8ee49e93192d1a58 ] When a VF is passthrough to a VM, and the VM is killed, the reported interrupt may not been handled, it will remain, and won't be clear by the nic engine even with a flr or tqp reset. When the VM restart, the interrupt of the first vector may be dropped by the second enable_irq in vfio, see the issue below: https://gitlab.com/qemu-project/qemu/-/issues/2884#note_2423361621 We notice that the vfio has always behaved this way, and the interrupt is a residue of the nic engine, so we fix the problem by moving the vector enable process out of the enable_irq loop. Fixes: 08a100689d4b ("net: hns3: re-organize vector handle") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20250430093052.2400464-3-shaojijie@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/hisilicon/hns3/hns3_enet.c | 82 +++++++++---------- 1 file changed, 39 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 801801e8803e..0ed01f4d6806 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -473,20 +473,14 @@ static void hns3_mask_vector_irq(struct hns3_enet_tqp_vector *tqp_vector, writel(mask_en, tqp_vector->mask_addr); } -static void hns3_vector_enable(struct hns3_enet_tqp_vector *tqp_vector) +static void hns3_irq_enable(struct hns3_enet_tqp_vector *tqp_vector) { napi_enable(&tqp_vector->napi); enable_irq(tqp_vector->vector_irq); - - /* enable vector */ - hns3_mask_vector_irq(tqp_vector, 1); } -static void hns3_vector_disable(struct hns3_enet_tqp_vector *tqp_vector) +static void hns3_irq_disable(struct hns3_enet_tqp_vector *tqp_vector) { - /* disable vector */ - hns3_mask_vector_irq(tqp_vector, 0); - disable_irq(tqp_vector->vector_irq); napi_disable(&tqp_vector->napi); cancel_work_sync(&tqp_vector->rx_group.dim.work); @@ -707,11 +701,42 @@ static int hns3_set_rx_cpu_rmap(struct net_device *netdev) return 0; } +static void hns3_enable_irqs_and_tqps(struct net_device *netdev) +{ + struct hns3_nic_priv *priv = netdev_priv(netdev); + struct hnae3_handle *h = priv->ae_handle; + u16 i; + + for (i = 0; i < priv->vector_num; i++) + hns3_irq_enable(&priv->tqp_vector[i]); + + for (i = 0; i < priv->vector_num; i++) + hns3_mask_vector_irq(&priv->tqp_vector[i], 1); + + for (i = 0; i < h->kinfo.num_tqps; i++) + hns3_tqp_enable(h->kinfo.tqp[i]); +} + +static void hns3_disable_irqs_and_tqps(struct net_device *netdev) +{ + struct hns3_nic_priv *priv = netdev_priv(netdev); + struct hnae3_handle *h = priv->ae_handle; + u16 i; + + for (i = 0; i < h->kinfo.num_tqps; i++) + hns3_tqp_disable(h->kinfo.tqp[i]); + + for (i = 0; i < priv->vector_num; i++) + hns3_mask_vector_irq(&priv->tqp_vector[i], 0); + + for (i = 0; i < priv->vector_num; i++) + hns3_irq_disable(&priv->tqp_vector[i]); +} + static int hns3_nic_net_up(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); struct hnae3_handle *h = priv->ae_handle; - int i, j; int ret; ret = hns3_nic_reset_all_ring(h); @@ -720,23 +745,13 @@ static int hns3_nic_net_up(struct net_device *netdev) clear_bit(HNS3_NIC_STATE_DOWN, &priv->state); - /* enable the vectors */ - for (i = 0; i < priv->vector_num; i++) - hns3_vector_enable(&priv->tqp_vector[i]); - - /* enable rcb */ - for (j = 0; j < h->kinfo.num_tqps; j++) - hns3_tqp_enable(h->kinfo.tqp[j]); + hns3_enable_irqs_and_tqps(netdev); /* start the ae_dev */ ret = h->ae_algo->ops->start ? h->ae_algo->ops->start(h) : 0; if (ret) { set_bit(HNS3_NIC_STATE_DOWN, &priv->state); - while (j--) - hns3_tqp_disable(h->kinfo.tqp[j]); - - for (j = i - 1; j >= 0; j--) - hns3_vector_disable(&priv->tqp_vector[j]); + hns3_disable_irqs_and_tqps(netdev); } return ret; @@ -823,17 +838,9 @@ static void hns3_reset_tx_queue(struct hnae3_handle *h) static void hns3_nic_net_down(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = hns3_get_handle(netdev); const struct hnae3_ae_ops *ops; - int i; - /* disable vectors */ - for (i = 0; i < priv->vector_num; i++) - hns3_vector_disable(&priv->tqp_vector[i]); - - /* disable rcb */ - for (i = 0; i < h->kinfo.num_tqps; i++) - hns3_tqp_disable(h->kinfo.tqp[i]); + hns3_disable_irqs_and_tqps(netdev); /* stop ae_dev */ ops = priv->ae_handle->ae_algo->ops; @@ -5870,8 +5877,6 @@ int hns3_set_channels(struct net_device *netdev, void hns3_external_lb_prepare(struct net_device *ndev, bool if_running) { struct hns3_nic_priv *priv = netdev_priv(ndev); - struct hnae3_handle *h = priv->ae_handle; - int i; if (!if_running) return; @@ -5882,11 +5887,7 @@ void hns3_external_lb_prepare(struct net_device *ndev, bool if_running) netif_carrier_off(ndev); netif_tx_disable(ndev); - for (i = 0; i < priv->vector_num; i++) - hns3_vector_disable(&priv->tqp_vector[i]); - - for (i = 0; i < h->kinfo.num_tqps; i++) - hns3_tqp_disable(h->kinfo.tqp[i]); + hns3_disable_irqs_and_tqps(ndev); /* delay ring buffer clearing to hns3_reset_notify_uninit_enet * during reset process, because driver may not be able @@ -5902,7 +5903,6 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running) { struct hns3_nic_priv *priv = netdev_priv(ndev); struct hnae3_handle *h = priv->ae_handle; - int i; if (!if_running) return; @@ -5918,11 +5918,7 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running) clear_bit(HNS3_NIC_STATE_DOWN, &priv->state); - for (i = 0; i < priv->vector_num; i++) - hns3_vector_enable(&priv->tqp_vector[i]); - - for (i = 0; i < h->kinfo.num_tqps; i++) - hns3_tqp_enable(h->kinfo.tqp[i]); + hns3_enable_irqs_and_tqps(ndev); netif_tx_wake_all_queues(ndev); From accb5a8ef12974a7969023354803c38981e07068 Mon Sep 17 00:00:00 2001 From: Hao Lan Date: Wed, 30 Apr 2025 17:30:51 +0800 Subject: [PATCH 090/129] net: hns3: fixed debugfs tm_qset size [ Upstream commit e317aebeefcb3b0c71f2305af3c22871ca6b3833 ] The size of the tm_qset file of debugfs is limited to 64 KB, which is too small in the scenario with 1280 qsets. The size needs to be expanded to 1 MB. Fixes: 5e69ea7ee2a6 ("net: hns3: refactor the debugfs process") Signed-off-by: Hao Lan Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20250430093052.2400464-4-shaojijie@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 4f385a18d288..36206273453f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -60,7 +60,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = { .name = "tm_qset", .cmd = HNAE3_DBG_CMD_TM_QSET, .dentry = HNS3_DBG_DENTRY_TM, - .buf_len = HNS3_DBG_READ_LEN, + .buf_len = HNS3_DBG_READ_LEN_1MB, .init = hns3_dbg_common_file_init, }, { From cb5922f74b4043d7e9239a662d0dde2f954d5403 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 30 Apr 2025 17:30:52 +0800 Subject: [PATCH 091/129] net: hns3: defer calling ptp_clock_register() [ Upstream commit 4971394d9d624f91689d766f31ce668d169d9959 ] Currently the ptp_clock_register() is called before relative ptp resource ready. It may cause unexpected result when upper layer called the ptp API during the timewindow. Fix it by moving the ptp_clock_register() to the function end. Fixes: 0bf5eb788512 ("net: hns3: add support for PTP") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250430093052.2400464-5-shaojijie@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index ddc691424c81..9a806ac727cf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -440,6 +440,13 @@ static int hclge_ptp_create_clock(struct hclge_dev *hdev) ptp->info.settime64 = hclge_ptp_settime; ptp->info.n_alarm = 0; + + spin_lock_init(&ptp->lock); + ptp->io_base = hdev->hw.hw.io_base + HCLGE_PTP_REG_OFFSET; + ptp->ts_cfg.rx_filter = HWTSTAMP_FILTER_NONE; + ptp->ts_cfg.tx_type = HWTSTAMP_TX_OFF; + hdev->ptp = ptp; + ptp->clock = ptp_clock_register(&ptp->info, &hdev->pdev->dev); if (IS_ERR(ptp->clock)) { dev_err(&hdev->pdev->dev, @@ -451,12 +458,6 @@ static int hclge_ptp_create_clock(struct hclge_dev *hdev) return -ENODEV; } - spin_lock_init(&ptp->lock); - ptp->io_base = hdev->hw.hw.io_base + HCLGE_PTP_REG_OFFSET; - ptp->ts_cfg.rx_filter = HWTSTAMP_FILTER_NONE; - ptp->ts_cfg.tx_type = HWTSTAMP_TX_OFF; - hdev->ptp = ptp; - return 0; } From e1301580bf0bf85769143da46a0668c36044d6bf Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 30 Apr 2025 15:30:40 +0200 Subject: [PATCH 092/129] net: vertexcom: mse102x: Fix possible stuck of SPI interrupt [ Upstream commit 55f362885951b2d00fd7fbb02ef0227deea572c2 ] The MSE102x doesn't provide any SPI commands for interrupt handling. So in case the interrupt fired before the driver requests the IRQ, the interrupt will never fire again. In order to fix this always poll for pending packets after opening the interface. Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250430133043.7722-2-wahrenst@gmx.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/vertexcom/mse102x.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 8f67c39f479e..45f4d2cb5b31 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -509,6 +509,7 @@ static irqreturn_t mse102x_irq(int irq, void *_mse) static int mse102x_net_open(struct net_device *ndev) { struct mse102x_net *mse = netdev_priv(ndev); + struct mse102x_net_spi *mses = to_mse102x_spi(mse); int ret; ret = request_threaded_irq(ndev->irq, NULL, mse102x_irq, IRQF_ONESHOT, @@ -524,6 +525,13 @@ static int mse102x_net_open(struct net_device *ndev) netif_carrier_on(ndev); + /* The SPI interrupt can stuck in case of pending packet(s). + * So poll for possible packet(s) to re-arm the interrupt. + */ + mutex_lock(&mses->lock); + mse102x_rx_pkt_spi(mse); + mutex_unlock(&mses->lock); + netif_dbg(mse, ifup, ndev, "network device up\n"); return 0; From 04d9c34079862fb88d1f2ee38e4d6fa73368594a Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 30 Apr 2025 15:30:41 +0200 Subject: [PATCH 093/129] net: vertexcom: mse102x: Fix LEN_MASK [ Upstream commit 74987089ec678b4018dba0a609e9f4bf6ef7f4ad ] The LEN_MASK for CMD_RTS doesn't cover the whole parameter mask. The Bit 11 is reserved, so adjust LEN_MASK accordingly. Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250430133043.7722-3-wahrenst@gmx.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/vertexcom/mse102x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 45f4d2cb5b31..55f34d9d111c 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -33,7 +33,7 @@ #define CMD_CTR (0x2 << CMD_SHIFT) #define CMD_MASK GENMASK(15, CMD_SHIFT) -#define LEN_MASK GENMASK(CMD_SHIFT - 1, 0) +#define LEN_MASK GENMASK(CMD_SHIFT - 2, 0) #define DET_CMD_LEN 4 #define DET_SOF_LEN 2 From 6ac8ec92f53f602d639dfa7eef7180b0ab60c7e0 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 30 Apr 2025 15:30:42 +0200 Subject: [PATCH 094/129] net: vertexcom: mse102x: Add range check for CMD_RTS [ Upstream commit d4dda902dac194e3231a1ed0f76c6c3b6340ba8a ] Since there is no protection in the SPI protocol against electrical interferences, the driver shouldn't blindly trust the length payload of CMD_RTS. So introduce a bounds check for incoming frames. Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250430133043.7722-4-wahrenst@gmx.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/vertexcom/mse102x.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 55f34d9d111c..2b1aac72601d 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -337,8 +338,9 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) } rxlen = cmd_resp & LEN_MASK; - if (!rxlen) { - net_dbg_ratelimited("%s: No frame length defined\n", __func__); + if (rxlen < ETH_ZLEN || rxlen > VLAN_ETH_FRAME_LEN) { + net_dbg_ratelimited("%s: Invalid frame length: %d\n", __func__, + rxlen); mse->stats.invalid_len++; return; } From 29ea1abc7d4fa635504128e846f008ffe692612e Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 30 Apr 2025 15:30:43 +0200 Subject: [PATCH 095/129] net: vertexcom: mse102x: Fix RX error handling [ Upstream commit ee512922ddd7d64afe2b28830a88f19063217649 ] In case the CMD_RTS got corrupted by interferences, the MSE102x doesn't allow a retransmission of the command. Instead the Ethernet frame must be shifted out of the SPI FIFO. Since the actual length is unknown, assume the maximum possible value. Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250430133043.7722-5-wahrenst@gmx.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/vertexcom/mse102x.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 2b1aac72601d..060a566bc6aa 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -263,7 +263,7 @@ static int mse102x_tx_frame_spi(struct mse102x_net *mse, struct sk_buff *txp, } static int mse102x_rx_frame_spi(struct mse102x_net *mse, u8 *buff, - unsigned int frame_len) + unsigned int frame_len, bool drop) { struct mse102x_net_spi *mses = to_mse102x_spi(mse); struct spi_transfer *xfer = &mses->spi_xfer; @@ -281,6 +281,9 @@ static int mse102x_rx_frame_spi(struct mse102x_net *mse, u8 *buff, netdev_err(mse->ndev, "%s: spi_sync() failed: %d\n", __func__, ret); mse->stats.xfer_err++; + } else if (drop) { + netdev_dbg(mse->ndev, "%s: Drop frame\n", __func__); + ret = -EINVAL; } else if (*sof != cpu_to_be16(DET_SOF)) { netdev_dbg(mse->ndev, "%s: SPI start of frame is invalid (0x%04x)\n", __func__, *sof); @@ -308,6 +311,7 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) struct sk_buff *skb; unsigned int rxalign; unsigned int rxlen; + bool drop = false; __be16 rx = 0; u16 cmd_resp; u8 *rxpkt; @@ -330,7 +334,8 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) net_dbg_ratelimited("%s: Unexpected response (0x%04x)\n", __func__, cmd_resp); mse->stats.invalid_rts++; - return; + drop = true; + goto drop; } net_dbg_ratelimited("%s: Unexpected response to first CMD\n", @@ -342,9 +347,16 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) net_dbg_ratelimited("%s: Invalid frame length: %d\n", __func__, rxlen); mse->stats.invalid_len++; - return; + drop = true; } + /* In case of a invalid CMD_RTS, the frame must be consumed anyway. + * So assume the maximum possible frame length. + */ +drop: + if (drop) + rxlen = VLAN_ETH_FRAME_LEN; + rxalign = ALIGN(rxlen + DET_SOF_LEN + DET_DFT_LEN, 4); skb = netdev_alloc_skb_ip_align(mse->ndev, rxalign); if (!skb) @@ -355,7 +367,7 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) * They are copied, but ignored. */ rxpkt = skb_put(skb, rxlen) - DET_SOF_LEN; - if (mse102x_rx_frame_spi(mse, rxpkt, rxlen)) { + if (mse102x_rx_frame_spi(mse, rxpkt, rxlen, drop)) { mse->ndev->stats.rx_errors++; dev_kfree_skb(skb); return; From 54583776e416e76c0083300460b840a719d92caf Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 31 Jul 2024 13:12:58 -0600 Subject: [PATCH 096/129] ASoC: Use of_property_read_bool() commit 69dd15a8ef0ae494179fd15023aa8172188db6b7 upstream. Use of_property_read_bool() to read boolean properties rather than of_get_property(). This is part of a larger effort to remove callers of of_get_property() and similar functions. of_get_property() leaks the DT property data pointer which is a problem for dynamically allocated nodes which may be freed. Signed-off-by: Rob Herring (Arm) Link: https://patch.msgid.link/20240731191312.1710417-20-robh@kernel.org Signed-off-by: Mark Brown Cc: Christophe Leroy Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/ak4613.c | 4 ++-- sound/soc/soc-core.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/ak4613.c b/sound/soc/codecs/ak4613.c index 619a817ee91c..4c1318ce8ae5 100644 --- a/sound/soc/codecs/ak4613.c +++ b/sound/soc/codecs/ak4613.c @@ -840,14 +840,14 @@ static void ak4613_parse_of(struct ak4613_priv *priv, /* Input 1 - 2 */ for (i = 0; i < 2; i++) { snprintf(prop, sizeof(prop), "asahi-kasei,in%d-single-end", i + 1); - if (!of_get_property(np, prop, NULL)) + if (!of_property_read_bool(np, prop)) priv->ic |= 1 << i; } /* Output 1 - 6 */ for (i = 0; i < 6; i++) { snprintf(prop, sizeof(prop), "asahi-kasei,out%d-single-end", i + 1); - if (!of_get_property(np, prop, NULL)) + if (!of_property_read_bool(np, prop)) priv->oc |= 1 << i; } diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index e65fe3a7c3e4..b9dfc10a9fc0 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -3249,10 +3249,10 @@ unsigned int snd_soc_daifmt_parse_format(struct device_node *np, * SND_SOC_DAIFMT_INV_MASK area */ snprintf(prop, sizeof(prop), "%sbitclock-inversion", prefix); - bit = !!of_get_property(np, prop, NULL); + bit = of_property_read_bool(np, prop); snprintf(prop, sizeof(prop), "%sframe-inversion", prefix); - frame = !!of_get_property(np, prop, NULL); + frame = of_property_read_bool(np, prop); switch ((bit << 4) + frame) { case 0x11: @@ -3289,12 +3289,12 @@ unsigned int snd_soc_daifmt_parse_clock_provider_raw(struct device_node *np, * check "[prefix]frame-master" */ snprintf(prop, sizeof(prop), "%sbitclock-master", prefix); - bit = !!of_get_property(np, prop, NULL); + bit = of_property_read_bool(np, prop); if (bit && bitclkmaster) *bitclkmaster = of_parse_phandle(np, prop, 0); snprintf(prop, sizeof(prop), "%sframe-master", prefix); - frame = !!of_get_property(np, prop, NULL); + frame = of_property_read_bool(np, prop); if (frame && framemaster) *framemaster = of_parse_phandle(np, prop, 0); From 85484bc611385317bab84d16d8462f82c0390049 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 22 Jan 2025 09:21:27 +0100 Subject: [PATCH 097/129] ASoC: soc-core: Stop using of_property_read_bool() for non-boolean properties commit 6eab7034579917f207ca6d8e3f4e11e85e0ab7d5 upstream. On R-Car: OF: /sound: Read of boolean property 'simple-audio-card,bitclock-master' with a value. OF: /sound: Read of boolean property 'simple-audio-card,frame-master' with a value. or: OF: /soc/sound@ec500000/ports/port@0/endpoint: Read of boolean property 'bitclock-master' with a value. OF: /soc/sound@ec500000/ports/port@0/endpoint: Read of boolean property 'frame-master' with a value. The use of of_property_read_bool() for non-boolean properties is deprecated in favor of of_property_present() when testing for property presence. Replace testing for presence before calling of_property_read_u32() by testing for an -EINVAL return value from the latter, to simplify the code. Signed-off-by: Geert Uytterhoeven Link: https://patch.msgid.link/db10e96fbda121e7456d70e97a013cbfc9755f4d.1737533954.git.geert+renesas@glider.be Signed-off-by: Mark Brown Cc: Christophe Leroy Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-core.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index b9dfc10a9fc0..7eea70eea68b 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2935,7 +2935,7 @@ int snd_soc_of_parse_pin_switches(struct snd_soc_card *card, const char *prop) unsigned int i, nb_controls; int ret; - if (!of_property_read_bool(dev->of_node, prop)) + if (!of_property_present(dev->of_node, prop)) return 0; strings = devm_kcalloc(dev, nb_controls_max, @@ -3009,23 +3009,17 @@ int snd_soc_of_parse_tdm_slot(struct device_node *np, if (rx_mask) snd_soc_of_get_slot_mask(np, "dai-tdm-slot-rx-mask", rx_mask); - if (of_property_read_bool(np, "dai-tdm-slot-num")) { - ret = of_property_read_u32(np, "dai-tdm-slot-num", &val); - if (ret) - return ret; + ret = of_property_read_u32(np, "dai-tdm-slot-num", &val); + if (ret && ret != -EINVAL) + return ret; + if (!ret && slots) + *slots = val; - if (slots) - *slots = val; - } - - if (of_property_read_bool(np, "dai-tdm-slot-width")) { - ret = of_property_read_u32(np, "dai-tdm-slot-width", &val); - if (ret) - return ret; - - if (slot_width) - *slot_width = val; - } + ret = of_property_read_u32(np, "dai-tdm-slot-width", &val); + if (ret && ret != -EINVAL) + return ret; + if (!ret && slot_width) + *slot_width = val; return 0; } @@ -3289,12 +3283,12 @@ unsigned int snd_soc_daifmt_parse_clock_provider_raw(struct device_node *np, * check "[prefix]frame-master" */ snprintf(prop, sizeof(prop), "%sbitclock-master", prefix); - bit = of_property_read_bool(np, prop); + bit = of_property_present(np, prop); if (bit && bitclkmaster) *bitclkmaster = of_parse_phandle(np, prop, 0); snprintf(prop, sizeof(prop), "%sframe-master", prefix); - frame = of_property_read_bool(np, prop); + frame = of_property_present(np, prop); if (frame && framemaster) *framemaster = of_parse_phandle(np, prop, 0); From b9ded1fb298b29d758f9bb3f6532577fff89cdfe Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 27 Mar 2024 09:04:44 -0700 Subject: [PATCH 098/129] riscv: Pass patch_text() the length in bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 51781ce8f4486c3738a6c85175b599ad1be71f89 upstream. patch_text_nosync() already handles an arbitrary length of code, so this removes a superfluous loop and reduces the number of icache flushes. Reviewed-by: Björn Töpel Signed-off-by: Samuel Holland Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20240327160520.791322-6-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt [apply to v6.6] Signed-off-by: Nam Cao Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/patch.h | 2 +- arch/riscv/kernel/patch.c | 14 +++++--------- arch/riscv/kernel/probes/kprobes.c | 18 ++++++++++-------- arch/riscv/net/bpf_jit_comp64.c | 7 ++++--- 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h index 9f5d6e14c405..7228e266b9a1 100644 --- a/arch/riscv/include/asm/patch.h +++ b/arch/riscv/include/asm/patch.h @@ -9,7 +9,7 @@ int patch_insn_write(void *addr, const void *insn, size_t len); int patch_text_nosync(void *addr, const void *insns, size_t len); int patch_text_set_nosync(void *addr, u8 c, size_t len); -int patch_text(void *addr, u32 *insns, int ninsns); +int patch_text(void *addr, u32 *insns, size_t len); extern int riscv_patch_in_stop_machine; diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 78387d843aa5..aeda87240dbc 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -19,7 +19,7 @@ struct patch_insn { void *addr; u32 *insns; - int ninsns; + size_t len; atomic_t cpu_count; }; @@ -234,14 +234,10 @@ NOKPROBE_SYMBOL(patch_text_nosync); static int patch_text_cb(void *data) { struct patch_insn *patch = data; - unsigned long len; - int i, ret = 0; + int ret = 0; if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) { - for (i = 0; ret == 0 && i < patch->ninsns; i++) { - len = GET_INSN_LENGTH(patch->insns[i]); - ret = patch_insn_write(patch->addr + i * len, &patch->insns[i], len); - } + ret = patch_insn_write(patch->addr, patch->insns, patch->len); /* * Make sure the patching store is effective *before* we * increment the counter which releases all waiting CPUs @@ -262,13 +258,13 @@ static int patch_text_cb(void *data) } NOKPROBE_SYMBOL(patch_text_cb); -int patch_text(void *addr, u32 *insns, int ninsns) +int patch_text(void *addr, u32 *insns, size_t len) { int ret; struct patch_insn patch = { .addr = addr, .insns = insns, - .ninsns = ninsns, + .len = len, .cpu_count = ATOMIC_INIT(0), }; diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 4fbc70e823f0..297427ffc4e0 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -23,13 +23,13 @@ post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *); static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { + size_t len = GET_INSN_LENGTH(p->opcode); u32 insn = __BUG_INSN_32; - unsigned long offset = GET_INSN_LENGTH(p->opcode); - p->ainsn.api.restore = (unsigned long)p->addr + offset; + p->ainsn.api.restore = (unsigned long)p->addr + len; - patch_text_nosync(p->ainsn.api.insn, &p->opcode, 1); - patch_text_nosync((void *)p->ainsn.api.insn + offset, &insn, 1); + patch_text_nosync(p->ainsn.api.insn, &p->opcode, len); + patch_text_nosync((void *)p->ainsn.api.insn + len, &insn, GET_INSN_LENGTH(insn)); } static void __kprobes arch_prepare_simulate(struct kprobe *p) @@ -116,16 +116,18 @@ void *alloc_insn_page(void) /* install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) { - u32 insn = (p->opcode & __INSN_LENGTH_MASK) == __INSN_LENGTH_32 ? - __BUG_INSN_32 : __BUG_INSN_16; + size_t len = GET_INSN_LENGTH(p->opcode); + u32 insn = len == 4 ? __BUG_INSN_32 : __BUG_INSN_16; - patch_text(p->addr, &insn, 1); + patch_text(p->addr, &insn, len); } /* remove breakpoint from text */ void __kprobes arch_disarm_kprobe(struct kprobe *p) { - patch_text(p->addr, &p->opcode, 1); + size_t len = GET_INSN_LENGTH(p->opcode); + + patch_text(p->addr, &p->opcode, len); } void __kprobes arch_remove_kprobe(struct kprobe *p) diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 26eeb3973631..16eb4cd11cbd 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -14,6 +14,7 @@ #include "bpf_jit.h" #define RV_FENTRY_NINSNS 2 +#define RV_FENTRY_NBYTES (RV_FENTRY_NINSNS * 4) #define RV_REG_TCC RV_REG_A6 #define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */ @@ -681,7 +682,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, if (ret) return ret; - if (memcmp(ip, old_insns, RV_FENTRY_NINSNS * 4)) + if (memcmp(ip, old_insns, RV_FENTRY_NBYTES)) return -EFAULT; ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call); @@ -690,8 +691,8 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, cpus_read_lock(); mutex_lock(&text_mutex); - if (memcmp(ip, new_insns, RV_FENTRY_NINSNS * 4)) - ret = patch_text(ip, new_insns, RV_FENTRY_NINSNS); + if (memcmp(ip, new_insns, RV_FENTRY_NBYTES)) + ret = patch_text(ip, new_insns, RV_FENTRY_NBYTES); mutex_unlock(&text_mutex); cpus_read_unlock(); From bbbf5e0f87078b715e7a665d662a2c0e77f044ae Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:10:23 -0700 Subject: [PATCH 099/129] sch_htb: make htb_qlen_notify() idempotent commit 5ba8b837b522d7051ef81bacf3d95383ff8edce5 upstream. htb_qlen_notify() always deactivates the HTB class and in fact could trigger a warning if it is already deactivated. Therefore, it is not idempotent and not friendly to its callers, like fq_codel_dequeue(). Let's make it idempotent to ease qdisc_tree_reduce_backlog() callers' life. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250403211033.166059-2-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_htb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 19035ef8387f..9a3f7ea80b34 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1485,6 +1485,8 @@ static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct htb_class *cl = (struct htb_class *)arg; + if (!cl->prio_activity) + return; htb_deactivate(qdisc_priv(sch), cl); } From 077e355dae44c77c46b3297c2f158b526384c022 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:10:24 -0700 Subject: [PATCH 100/129] sch_drr: make drr_qlen_notify() idempotent commit df008598b3a00be02a8051fde89ca0fbc416bd55 upstream. drr_qlen_notify() always deletes the DRR class from its active list with list_del(), therefore, it is not idempotent and not friendly to its callers, like fq_codel_dequeue(). Let's make it idempotent to ease qdisc_tree_reduce_backlog() callers' life. Also change other list_del()'s to list_del_init() just to be extra safe. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250403211033.166059-3-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_drr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index edadb3a7bd14..9b36955e32b1 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -110,6 +110,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -ENOBUFS; gnet_stats_basic_sync_init(&cl->bstats); + INIT_LIST_HEAD(&cl->alist); cl->common.classid = classid; cl->quantum = quantum; cl->qdisc = qdisc_create_dflt(sch->dev_queue, @@ -234,7 +235,7 @@ static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg) { struct drr_class *cl = (struct drr_class *)arg; - list_del(&cl->alist); + list_del_init(&cl->alist); } static int drr_dump_class(struct Qdisc *sch, unsigned long arg, @@ -393,7 +394,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch) if (unlikely(skb == NULL)) goto out; if (cl->qdisc->q.qlen == 0) - list_del(&cl->alist); + list_del_init(&cl->alist); bstats_update(&cl->bstats, skb); qdisc_bstats_update(sch, skb); @@ -434,7 +435,7 @@ static void drr_reset_qdisc(struct Qdisc *sch) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->qdisc->q.qlen) - list_del(&cl->alist); + list_del_init(&cl->alist); qdisc_reset(cl->qdisc); } } From 9030a91235ae4845ec71902c3e0cecfc9ed1f2df Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:10:25 -0700 Subject: [PATCH 101/129] sch_hfsc: make hfsc_qlen_notify() idempotent commit 51eb3b65544c9efd6a1026889ee5fb5aa62da3bb upstream. hfsc_qlen_notify() is not idempotent either and not friendly to its callers, like fq_codel_dequeue(). Let's make it idempotent to ease qdisc_tree_reduce_backlog() callers' life: 1. update_vf() decreases cl->cl_nactive, so we can check whether it is non-zero before calling it. 2. eltree_remove() always removes RB node cl->el_node, but we can use RB_EMPTY_NODE() + RB_CLEAR_NODE() to make it safe. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250403211033.166059-4-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_hfsc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 4be8760b8bc3..5d9cccfac4a1 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -203,7 +203,10 @@ eltree_insert(struct hfsc_class *cl) static inline void eltree_remove(struct hfsc_class *cl) { - rb_erase(&cl->el_node, &cl->sched->eligible); + if (!RB_EMPTY_NODE(&cl->el_node)) { + rb_erase(&cl->el_node, &cl->sched->eligible); + RB_CLEAR_NODE(&cl->el_node); + } } static inline void @@ -1224,7 +1227,8 @@ hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg) /* vttree is now handled in update_vf() so that update_vf(cl, 0, 0) * needs to be called explicitly to remove a class from vttree. */ - update_vf(cl, 0, 0); + if (cl->cl_nactive) + update_vf(cl, 0, 0); if (cl->cl_flags & HFSC_RSC) eltree_remove(cl); } From 36269156033fe6262af0872f6ca539b8afb403c3 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:10:26 -0700 Subject: [PATCH 102/129] sch_qfq: make qfq_qlen_notify() idempotent commit 55f9eca4bfe30a15d8656f915922e8c98b7f0728 upstream. qfq_qlen_notify() always deletes its class from its active list with list_del_init() _and_ calls qfq_deactivate_agg() when the whole list becomes empty. To make it idempotent, just skip everything when it is not in the active list. Also change other list_del()'s to list_del_init() just to be extra safe. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250403211033.166059-5-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_qfq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index c7d4ee8cedff..5e557b960bde 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -352,7 +352,7 @@ static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl) struct qfq_aggregate *agg = cl->agg; - list_del(&cl->alist); /* remove from RR queue of the aggregate */ + list_del_init(&cl->alist); /* remove from RR queue of the aggregate */ if (list_empty(&agg->active)) /* agg is now inactive */ qfq_deactivate_agg(q, agg); } @@ -482,6 +482,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, gnet_stats_basic_sync_init(&cl->bstats); cl->common.classid = classid; cl->deficit = lmax; + INIT_LIST_HEAD(&cl->alist); cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, NULL); @@ -990,7 +991,7 @@ static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg, cl->deficit -= (int) len; if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */ - list_del(&cl->alist); + list_del_init(&cl->alist); else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) { cl->deficit += agg->lmax; list_move_tail(&cl->alist, &agg->active); @@ -1421,6 +1422,8 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)arg; + if (list_empty(&cl->alist)) + return; qfq_deactivate_class(q, cl); } From e762ec4c7b3401c4cef023b2b51f9c17326cef91 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:10:27 -0700 Subject: [PATCH 103/129] sch_ets: make est_qlen_notify() idempotent commit a7a15f39c682ac4268624da2abdb9114bdde96d5 upstream. est_qlen_notify() deletes its class from its active list with list_del() when qlen is 0, therefore, it is not idempotent and not friendly to its callers, like fq_codel_dequeue(). Let's make it idempotent to ease qdisc_tree_reduce_backlog() callers' life. Also change other list_del()'s to list_del_init() just to be extra safe. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250403211033.166059-6-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_ets.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 18269f86d21e..9da86db4d2c2 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -298,7 +298,7 @@ static void ets_class_qlen_notify(struct Qdisc *sch, unsigned long arg) * to remove them. */ if (!ets_class_is_strict(q, cl) && sch->q.qlen) - list_del(&cl->alist); + list_del_init(&cl->alist); } static int ets_class_dump(struct Qdisc *sch, unsigned long arg, @@ -491,7 +491,7 @@ static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch) if (unlikely(!skb)) goto out; if (cl->qdisc->q.qlen == 0) - list_del(&cl->alist); + list_del_init(&cl->alist); return ets_qdisc_dequeue_skb(sch, skb); } @@ -660,7 +660,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, } for (i = q->nbands; i < oldbands; i++) { if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) - list_del(&q->classes[i].alist); + list_del_init(&q->classes[i].alist); qdisc_tree_flush_backlog(q->classes[i].qdisc); } q->nstrict = nstrict; @@ -716,7 +716,7 @@ static void ets_qdisc_reset(struct Qdisc *sch) for (band = q->nstrict; band < q->nbands; band++) { if (q->classes[band].qdisc->q.qlen) - list_del(&q->classes[band].alist); + list_del_init(&q->classes[band].alist); } for (band = 0; band < q->nbands; band++) qdisc_reset(q->classes[band].qdisc); From 2fbf6c9695ad9f05e7e5c166bf43fac7cb3276b3 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 6 Mar 2025 18:54:47 +0000 Subject: [PATCH 104/129] firmware: arm_scmi: Balance device refcount when destroying devices [ Upstream commit 9ca67840c0ddf3f39407339624cef824a4f27599 ] Using device_find_child() to lookup the proper SCMI device to destroy causes an unbalance in device refcount, since device_find_child() calls an implicit get_device(): this, in turns, inhibits the call of the provided release methods upon devices destruction. As a consequence, one of the structures that is not freed properly upon destruction is the internal struct device_private dev->p populated by the drivers subsystem core. KMemleak detects this situation since loading/unloding some SCMI driver causes related devices to be created/destroyed without calling any device_release method. unreferenced object 0xffff00000f583800 (size 512): comm "insmod", pid 227, jiffies 4294912190 hex dump (first 32 bytes): 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... ff ff ff ff ff ff ff ff 60 36 1d 8a 00 80 ff ff ........`6...... backtrace (crc 114e2eed): kmemleak_alloc+0xbc/0xd8 __kmalloc_cache_noprof+0x2dc/0x398 device_add+0x954/0x12d0 device_register+0x28/0x40 __scmi_device_create.part.0+0x1bc/0x380 scmi_device_create+0x2d0/0x390 scmi_create_protocol_devices+0x74/0xf8 scmi_device_request_notifier+0x1f8/0x2a8 notifier_call_chain+0x110/0x3b0 blocking_notifier_call_chain+0x70/0xb0 scmi_driver_register+0x350/0x7f0 0xffff80000a3b3038 do_one_initcall+0x12c/0x730 do_init_module+0x1dc/0x640 load_module+0x4b20/0x5b70 init_module_from_file+0xec/0x158 $ ./scripts/faddr2line ./vmlinux device_add+0x954/0x12d0 device_add+0x954/0x12d0: kmalloc_noprof at include/linux/slab.h:901 (inlined by) kzalloc_noprof at include/linux/slab.h:1037 (inlined by) device_private_init at drivers/base/core.c:3510 (inlined by) device_add at drivers/base/core.c:3561 Balance device refcount by issuing a put_device() on devices found via device_find_child(). Reported-by: Alice Ryhl Closes: https://lore.kernel.org/linux-arm-kernel/Z8nK3uFkspy61yjP@arm.com/T/#mc1f73a0ea5e41014fa145147b7b839fc988ada8f CC: Sudeep Holla CC: Catalin Marinas Fixes: d4f9dddd21f3 ("firmware: arm_scmi: Add dynamic scmi devices creation") Signed-off-by: Cristian Marussi Tested-by: Alice Ryhl Message-Id: <20250306185447.2039336-1-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index dcf774d3edfe..51eeaf14367d 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -240,6 +240,9 @@ static struct scmi_device *scmi_child_dev_find(struct device *parent, if (!dev) return NULL; + /* Drop the refcnt bumped implicitly by device_find_child */ + put_device(dev); + return to_scmi_dev(dev); } From f68f93c9d04378f2c12b7b8f9ec1eb75992ef80a Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 21 Mar 2025 11:57:00 +0000 Subject: [PATCH 105/129] firmware: arm_ffa: Skip Rx buffer ownership release if not acquired MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4567bdaaaaa1744da3d7da07d9aca2f941f5b4e5 ] Completion of the FFA_PARTITION_INFO_GET ABI transfers the ownership of the caller’s Rx buffer from the producer(typically partition mnager) to the consumer(this driver/OS). FFA_RX_RELEASE transfers the ownership from the consumer back to the producer. However, when we set the flag to just return the count of partitions deployed in the system corresponding to the specified UUID while invoking FFA_PARTITION_INFO_GET, the Rx buffer ownership shouldn't be transferred to this driver. We must be able to skip transferring back the ownership to the partition manager when we request just to get the count of the partitions as the buffers are not acquired in this case. Firmware may return FFA_RET_DENIED or other error for the ffa_rx_release() in such cases. Fixes: bb1be7498500 ("firmware: arm_ffa: Add v1.1 get_partition_info support") Message-Id: <20250321115700.3525197-1-sudeep.holla@arm.com> Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_ffa/driver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 7cd6b1564e80..7c2db3f01765 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -225,7 +225,8 @@ __ffa_partition_info_get(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3, memcpy(buffer + idx, drv_info->rx_buffer + idx * sz, buf_sz); - ffa_rx_release(); + if (!(flags & PARTITION_INFO_GET_RETURN_COUNT_ONLY)) + ffa_rx_release(); mutex_unlock(&drv_info->rx_lock); From fabe730323838e2b27f45be4c9cd982d72c781bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Szymanski?= Date: Fri, 14 Mar 2025 17:20:38 +0100 Subject: [PATCH 106/129] ARM: dts: opos6ul: add ksz8081 phy properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6e1a7bc8382b0d4208258f7d2a4474fae788dd90 ] Commit c7e73b5051d6 ("ARM: imx: mach-imx6ul: remove 14x14 EVK specific PHY fixup") removed a PHY fixup that setted the clock mode and the LED mode. Make the Ethernet interface work again by doing as advised in the commit's log, set clock mode and the LED mode in the device tree. Fixes: c7e73b5051d6 ("ARM: imx: mach-imx6ul: remove 14x14 EVK specific PHY fixup") Signed-off-by: Sébastien Szymanski Reviewed-by: Oleksij Rempel Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi index f2386dcb9ff2..dda4fa91b2f2 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi @@ -40,6 +40,9 @@ reg = <1>; interrupt-parent = <&gpio4>; interrupts = <16 IRQ_TYPE_LEVEL_LOW>; + micrel,led-mode = <1>; + clocks = <&clks IMX6UL_CLK_ENET_REF>; + clock-names = "rmii-ref"; status = "okay"; }; }; From ce2fa5bc0158bbfe54878261186e76a89795d0f7 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:06:58 +0200 Subject: [PATCH 107/129] arm64: dts: st: Adjust interrupt-controller for stm32mp25 SoCs [ Upstream commit de2b2107d5a41a91ab603e135fb6e408abbee28e ] Use gic-400 compatible and remove address-cells = <1> on aarch64 Fixes: 5d30d03aaf785 ("arm64: dts: st: introduce stm32mp25 SoCs family") Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250415111654.2103767-2-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/st/stm32mp251.dtsi | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp251.dtsi b/arch/arm64/boot/dts/st/stm32mp251.dtsi index 5268a4321841..3219a8ea1e6a 100644 --- a/arch/arm64/boot/dts/st/stm32mp251.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp251.dtsi @@ -73,9 +73,8 @@ }; intc: interrupt-controller@4ac00000 { - compatible = "arm,cortex-a7-gic"; + compatible = "arm,gic-400"; #interrupt-cells = <3>; - #address-cells = <1>; interrupt-controller; reg = <0x0 0x4ac10000 0x0 0x1000>, <0x0 0x4ac20000 0x0 0x2000>, From f129d8aea8b17227e95610e86acc1c9c489f7da7 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:06:59 +0200 Subject: [PATCH 108/129] arm64: dts: st: Use 128kB size for aliased GIC400 register access on stm32mp25 SoCs [ Upstream commit 06c231fe953a26f4bc9d7a37ba1b9b288a59c7c2 ] Adjust the size of 8kB GIC regions to 128kB so that each 4kB is mapped 16 times over a 64kB region. The offset is then adjusted in the irq-gic driver. see commit 12e14066f4835 ("irqchip/GIC: Add workaround for aliased GIC400") Fixes: 5d30d03aaf785 ("arm64: dts: st: introduce stm32mp25 SoCs family") Suggested-by: Marc Zyngier Signed-off-by: Christian Bruel Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20250415111654.2103767-3-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/st/stm32mp251.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp251.dtsi b/arch/arm64/boot/dts/st/stm32mp251.dtsi index 3219a8ea1e6a..ce5409acae1c 100644 --- a/arch/arm64/boot/dts/st/stm32mp251.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp251.dtsi @@ -77,9 +77,9 @@ #interrupt-cells = <3>; interrupt-controller; reg = <0x0 0x4ac10000 0x0 0x1000>, - <0x0 0x4ac20000 0x0 0x2000>, - <0x0 0x4ac40000 0x0 0x2000>, - <0x0 0x4ac60000 0x0 0x2000>; + <0x0 0x4ac20000 0x0 0x20000>, + <0x0 0x4ac40000 0x0 0x20000>, + <0x0 0x4ac60000 0x0 0x20000>; }; psci { From 5e8c311318faec5809dbe35a0cb4476207fffe28 Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Mon, 21 Apr 2025 22:12:59 +0200 Subject: [PATCH 109/129] Revert "drm/meson: vclk: fix calculation of 59.94 fractional rates" [ Upstream commit f37bb5486ea536c1d61df89feeaeff3f84f0b560 ] This reverts commit bfbc68e. The patch does permit the offending YUV420 @ 59.94 phy_freq and vclk_freq mode to match in calculations. It also results in all fractional rates being unavailable for use. This was unintended and requires the patch to be reverted. Fixes: bfbc68e4d869 ("drm/meson: vclk: fix calculation of 59.94 fractional rates") Cc: stable@vger.kernel.org Signed-off-by: Christian Hewitt Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250421201300.778955-2-martin.blumenstingl@googlemail.com Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250421201300.778955-2-martin.blumenstingl@googlemail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/meson/meson_vclk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index 2a942dc6a6dc..2a82119eb58e 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -790,13 +790,13 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, FREQ_1000_1001(params[i].pixel_freq)); DRM_DEBUG_DRIVER("i = %d phy_freq = %d alt = %d\n", i, params[i].phy_freq, - FREQ_1000_1001(params[i].phy_freq/1000)*1000); + FREQ_1000_1001(params[i].phy_freq/10)*10); /* Match strict frequency */ if (phy_freq == params[i].phy_freq && vclk_freq == params[i].vclk_freq) return MODE_OK; /* Match 1000/1001 variant */ - if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/1000)*1000) && + if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/10)*10) && vclk_freq == FREQ_1000_1001(params[i].vclk_freq)) return MODE_OK; } @@ -1070,7 +1070,7 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, for (freq = 0 ; params[freq].pixel_freq ; ++freq) { if ((phy_freq == params[freq].phy_freq || - phy_freq == FREQ_1000_1001(params[freq].phy_freq/1000)*1000) && + phy_freq == FREQ_1000_1001(params[freq].phy_freq/10)*10) && (vclk_freq == params[freq].vclk_freq || vclk_freq == FREQ_1000_1001(params[freq].vclk_freq))) { if (vclk_freq != params[freq].vclk_freq) From 6299fb6190fefba4afdf24786d60b5fdbaa74840 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 19 Oct 2023 13:29:09 +0300 Subject: [PATCH 110/129] xhci: Set DESI bits in ERDP register correctly [ Upstream commit 044818a6cd808b38a5d179a5fb9940417de4ba24 ] When using more than one Event Ring segment (ERSTSZ > 1), software shall set the DESI bits in the ERDP register to the number of the segment to which the upper ERDP bits are pointing. The xHC may use the DESI bits as a shortcut to determine whether it needs to check for an Event Ring Full condition: If it's enqueueing events in a different segment, it need not compare its internal Enqueue Pointer with the Dequeue Pointer in the upper bits of the ERDP register (sec 5.5.2.3.3). Not setting the DESI bits correctly can result in the xHC enqueueing events past the Dequeue Pointer. On Renesas uPD720201 host controllers, incorrect DESI bits cause an interrupt storm. For comparison, VIA VL805 host controllers do not exhibit such problems. Perhaps they do not take advantage of the optimization afforded by the DESI bits. To fix the issue, assign the segment number to each struct xhci_segment in xhci_segment_alloc(). When advancing the Dequeue Pointer in xhci_update_erst_dequeue(), write the segment number to the DESI bits. On driver probe, set the DESI bits to zero in xhci_set_hc_event_deq() as processing starts in segment 0. Likewise on driver teardown, clear the DESI bits to zero in xhci_free_interrupter() when clearing the upper bits of the ERDP register. Previously those functions (incorrectly) treated the DESI bits as if they're declared RsvdP. Signed-off-by: Lukas Wunner Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20231019102924.2797346-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: bea5892d0ed2 ("xhci: Limit time spent with xHC interrupts disabled during bus resume") Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-mem.c | 25 +++++++++++-------------- drivers/usb/host/xhci-ring.c | 2 +- drivers/usb/host/xhci.h | 1 + 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index fbc486546b85..f236fba5cd24 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -29,6 +29,7 @@ static struct xhci_segment *xhci_segment_alloc(struct xhci_hcd *xhci, unsigned int cycle_state, unsigned int max_packet, + unsigned int num, gfp_t flags) { struct xhci_segment *seg; @@ -60,6 +61,7 @@ static struct xhci_segment *xhci_segment_alloc(struct xhci_hcd *xhci, for (i = 0; i < TRBS_PER_SEGMENT; i++) seg->trbs[i].link.control = cpu_to_le32(TRB_CYCLE); } + seg->num = num; seg->dma = dma; seg->next = NULL; @@ -324,6 +326,7 @@ static int xhci_alloc_segments_for_ring(struct xhci_hcd *xhci, enum xhci_ring_type type, unsigned int max_packet, gfp_t flags) { struct xhci_segment *prev; + unsigned int num = 0; bool chain_links; /* Set chain bit for 0.95 hosts, and for isoc rings on AMD 0.96 host */ @@ -331,16 +334,17 @@ static int xhci_alloc_segments_for_ring(struct xhci_hcd *xhci, (type == TYPE_ISOC && (xhci->quirks & XHCI_AMD_0x96_HOST))); - prev = xhci_segment_alloc(xhci, cycle_state, max_packet, flags); + prev = xhci_segment_alloc(xhci, cycle_state, max_packet, num, flags); if (!prev) return -ENOMEM; - num_segs--; + num++; *first = prev; - while (num_segs > 0) { + while (num < num_segs) { struct xhci_segment *next; - next = xhci_segment_alloc(xhci, cycle_state, max_packet, flags); + next = xhci_segment_alloc(xhci, cycle_state, max_packet, num, + flags); if (!next) { prev = *first; while (prev) { @@ -353,7 +357,7 @@ static int xhci_alloc_segments_for_ring(struct xhci_hcd *xhci, xhci_link_segments(prev, next, type, chain_links); prev = next; - num_segs--; + num++; } xhci_link_segments(prev, *first, type, chain_links); *last = prev; @@ -1803,7 +1807,6 @@ xhci_free_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir) { struct device *dev = xhci_to_hcd(xhci)->self.sysdev; size_t erst_size; - u64 tmp64; u32 tmp; if (!ir) @@ -1826,9 +1829,7 @@ xhci_free_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir) tmp &= ERST_SIZE_MASK; writel(tmp, &ir->ir_set->erst_size); - tmp64 = xhci_read_64(xhci, &ir->ir_set->erst_dequeue); - tmp64 &= (u64) ERST_PTR_MASK; - xhci_write_64(xhci, tmp64, &ir->ir_set->erst_dequeue); + xhci_write_64(xhci, ERST_EHB, &ir->ir_set->erst_dequeue); } /* free interrrupter event ring */ @@ -1935,7 +1936,6 @@ no_bw: static void xhci_set_hc_event_deq(struct xhci_hcd *xhci, struct xhci_interrupter *ir) { - u64 temp; dma_addr_t deq; deq = xhci_trb_virt_to_dma(ir->event_ring->deq_seg, @@ -1943,15 +1943,12 @@ static void xhci_set_hc_event_deq(struct xhci_hcd *xhci, struct xhci_interrupter if (!deq) xhci_warn(xhci, "WARN something wrong with SW event ring dequeue ptr.\n"); /* Update HC event ring dequeue pointer */ - temp = xhci_read_64(xhci, &ir->ir_set->erst_dequeue); - temp &= ERST_PTR_MASK; /* Don't clear the EHB bit (which is RW1C) because * there might be more events to service. */ - temp &= ~ERST_EHB; xhci_dbg_trace(xhci, trace_xhci_dbg_init, "// Write event ring dequeue pointer, preserving EHB bit"); - xhci_write_64(xhci, ((u64) deq & (u64) ~ERST_PTR_MASK) | temp, + xhci_write_64(xhci, ((u64) deq & (u64) ~ERST_PTR_MASK), &ir->ir_set->erst_dequeue); } diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index cb9443962945..884a668cca36 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3167,7 +3167,7 @@ static void xhci_update_erst_dequeue(struct xhci_hcd *xhci, return; /* Update HC event ring dequeue pointer */ - temp_64 &= ERST_DESI_MASK; + temp_64 = ir->event_ring->deq_seg->num & ERST_DESI_MASK; temp_64 |= ((u64) deq & (u64) ~ERST_PTR_MASK); } diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index df87e8bcb7d2..0325fccfaa2a 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1293,6 +1293,7 @@ struct xhci_segment { union xhci_trb *trbs; /* private to HCD */ struct xhci_segment *next; + unsigned int num; dma_addr_t dma; /* Max packet sized bounce buffer for td-fragmant alignment */ dma_addr_t bounce_dma; From f377434945f528217e65fc5b18781caeba19ce72 Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Thu, 19 Oct 2023 13:29:10 +0300 Subject: [PATCH 111/129] xhci: Use more than one Event Ring segment [ Upstream commit 28084d3fcc3c8445542917f32e382c45b5343cc2 ] Users have reported log spam created by "Event Ring Full" xHC event TRBs. These are caused by interrupt latency in conjunction with a very busy set of devices on the bus. The errors are benign, but throughput will suffer as the xHC will pause processing of transfers until the Event Ring is drained by the kernel. Commit dc0ffbea5729 ("usb: host: xhci: update event ring dequeue pointer on purpose") mitigated the issue by advancing the Event Ring Dequeue Pointer already after half a segment has been processed. Nevertheless, providing a larger Event Ring would be useful to cope with load peaks. Expand the number of event TRB slots available by increasing the number of Event Ring segments in the ERST. Controllers have a hardware-defined limit as to the number of ERST entries they can process, but with up to 32k it can be excessively high (sec 5.3.4). So cap the actual number at 2 (configurable through the ERST_MAX_SEGS macro), which seems like a reasonable quantity. It is supported by any xHC because the limit in the HCSPARAMS2 register is defined as a power of 2. Renesas uPD720201 and VIA VL805 controllers do not support more than 2 ERST entries. An alternative to increasing the number of Event Ring segments would be an increase of the segment size. But that requires allocating multiple contiguous pages, which may be impossible if memory is fragmented. Signed-off-by: Jonathan Bell Signed-off-by: Lukas Wunner Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20231019102924.2797346-6-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: bea5892d0ed2 ("xhci: Limit time spent with xHC interrupts disabled during bus resume") Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-mem.c | 10 +++++++--- drivers/usb/host/xhci.h | 5 +++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index f236fba5cd24..45240299fa17 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2237,14 +2237,18 @@ xhci_alloc_interrupter(struct xhci_hcd *xhci, gfp_t flags) { struct device *dev = xhci_to_hcd(xhci)->self.sysdev; struct xhci_interrupter *ir; + unsigned int num_segs; int ret; ir = kzalloc_node(sizeof(*ir), flags, dev_to_node(dev)); if (!ir) return NULL; - ir->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, 1, TYPE_EVENT, - 0, flags); + num_segs = min_t(unsigned int, 1 << HCS_ERST_MAX(xhci->hcs_params2), + ERST_MAX_SEGS); + + ir->event_ring = xhci_ring_alloc(xhci, num_segs, 1, TYPE_EVENT, 0, + flags); if (!ir->event_ring) { xhci_warn(xhci, "Failed to allocate interrupter event ring\n"); kfree(ir); @@ -2280,7 +2284,7 @@ xhci_add_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir, /* set ERST count with the number of entries in the segment table */ erst_size = readl(&ir->ir_set->erst_size); erst_size &= ERST_SIZE_MASK; - erst_size |= ERST_NUM_SEGS; + erst_size |= ir->event_ring->num_segs; writel(erst_size, &ir->ir_set->erst_size); erst_base = xhci_read_64(xhci, &ir->ir_set->erst_base); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 0325fccfaa2a..76a3010b8b74 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1427,8 +1427,9 @@ struct urb_priv { * Each segment table entry is 4*32bits long. 1K seems like an ok size: * (1K bytes * 8bytes/bit) / (4*32 bits) = 64 segment entries in the table, * meaning 64 ring segments. - * Initial allocated size of the ERST, in number of entries */ -#define ERST_NUM_SEGS 1 + * Reasonable limit for number of Event Ring segments (spec allows 32k) + */ +#define ERST_MAX_SEGS 2 /* Poll every 60 seconds */ #define POLL_TIMEOUT 60 /* Stop endpoint command timeout (secs) for URB cancellation watchdog timer */ From 3f6e8ad65b1eeaeb26e36844e45a3b9ac2124a1d Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 19 Oct 2023 13:29:15 +0300 Subject: [PATCH 112/129] xhci: Clean up stale comment on ERST_SIZE macro [ Upstream commit c087fada0a6180ab5b88b11c1776eef02f8d556f ] Commit ebd88cf50729 ("xhci: Remove unused defines for ERST_SIZE and ERST_ENTRIES") removed the ERST_SIZE macro but retained a code comment explaining the quantity chosen in the macro. Remove the code comment as well. Signed-off-by: Lukas Wunner Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20231019102924.2797346-11-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: bea5892d0ed2 ("xhci: Limit time spent with xHC interrupts disabled during bus resume") Signed-off-by: Sasha Levin --- drivers/usb/host/xhci.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 76a3010b8b74..a49560145d78 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1423,12 +1423,7 @@ struct urb_priv { struct xhci_td td[]; }; -/* - * Each segment table entry is 4*32bits long. 1K seems like an ok size: - * (1K bytes * 8bytes/bit) / (4*32 bits) = 64 segment entries in the table, - * meaning 64 ring segments. - * Reasonable limit for number of Event Ring segments (spec allows 32k) - */ +/* Reasonable limit for number of Event Ring segments (spec allows 32k) */ #define ERST_MAX_SEGS 2 /* Poll every 60 seconds */ #define POLL_TIMEOUT 60 From ff0a51339a3b74c8e23b6bc32c58b41340345b43 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 19 Oct 2023 13:29:21 +0300 Subject: [PATCH 113/129] xhci: split free interrupter into separate remove and free parts [ Upstream commit 47f503cf5f799ec02e5f4b7c3b9afe145eca2aef ] The current function that both removes and frees an interrupter isn't optimal when using several interrupters. The array of interrupters need to be protected with a lock while removing interrupters, but the default xhci spin lock can't be used while freeing the interrupters event ring segment table as dma_free_coherent() should be called with IRQs enabled. There is no need to free the interrupter under the lock, so split this code into separate unlocked free part, and a lock protected remove part. Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20231019102924.2797346-17-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: bea5892d0ed2 ("xhci: Limit time spent with xHC interrupts disabled during bus resume") Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-mem.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 45240299fa17..f00e96c9ca57 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1803,22 +1803,13 @@ int xhci_alloc_erst(struct xhci_hcd *xhci, } static void -xhci_free_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir) +xhci_remove_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir) { - struct device *dev = xhci_to_hcd(xhci)->self.sysdev; - size_t erst_size; u32 tmp; if (!ir) return; - erst_size = sizeof(struct xhci_erst_entry) * ir->erst.num_entries; - if (ir->erst.entries) - dma_free_coherent(dev, erst_size, - ir->erst.entries, - ir->erst.erst_dma_addr); - ir->erst.entries = NULL; - /* * Clean out interrupter registers except ERSTBA. Clearing either the * low or high 32 bits of ERSTBA immediately causes the controller to @@ -1831,10 +1822,28 @@ xhci_free_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir) xhci_write_64(xhci, ERST_EHB, &ir->ir_set->erst_dequeue); } +} - /* free interrrupter event ring */ +static void +xhci_free_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir) +{ + struct device *dev = xhci_to_hcd(xhci)->self.sysdev; + size_t erst_size; + + if (!ir) + return; + + erst_size = sizeof(struct xhci_erst_entry) * ir->erst.num_entries; + if (ir->erst.entries) + dma_free_coherent(dev, erst_size, + ir->erst.entries, + ir->erst.erst_dma_addr); + ir->erst.entries = NULL; + + /* free interrupter event ring */ if (ir->event_ring) xhci_ring_free(xhci, ir->event_ring); + ir->event_ring = NULL; kfree(ir); @@ -1847,6 +1856,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) cancel_delayed_work_sync(&xhci->cmd_timer); + xhci_remove_interrupter(xhci, xhci->interrupter); xhci_free_interrupter(xhci, xhci->interrupter); xhci->interrupter = NULL; xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Freed primary event ring"); From bcd191d7bab25513daf7db78ab32eda60d9484c3 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 2 Jan 2024 13:45:09 -0800 Subject: [PATCH 114/129] xhci: add support to allocate several interrupters [ Upstream commit c99b38c412343053e9af187e595793c8805bb9b8 ] Modify the XHCI drivers to accommodate for handling multiple event rings in case there are multiple interrupters. Add the required APIs so clients are able to allocate/request for an interrupter ring, and pass this information back to the client driver. This allows for users to handle the resource accordingly, such as passing the event ring base address to an audio DSP. There is no actual support for multiple MSI/MSI-X vectors. [export xhci_initialize_ring_info() -wcheng] Signed-off-by: Mathias Nyman Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20240102214549.22498-2-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: bea5892d0ed2 ("xhci: Limit time spent with xHC interrupts disabled during bus resume") Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-debugfs.c | 2 +- drivers/usb/host/xhci-mem.c | 108 ++++++++++++++++++++++++++++---- drivers/usb/host/xhci-ring.c | 2 +- drivers/usb/host/xhci.c | 51 +++++++++------ drivers/usb/host/xhci.h | 6 +- 5 files changed, 137 insertions(+), 32 deletions(-) diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c index 99baa60ef50f..15a8402ee8a1 100644 --- a/drivers/usb/host/xhci-debugfs.c +++ b/drivers/usb/host/xhci-debugfs.c @@ -693,7 +693,7 @@ void xhci_debugfs_init(struct xhci_hcd *xhci) "command-ring", xhci->debugfs_root); - xhci_debugfs_create_ring_dir(xhci, &xhci->interrupter->event_ring, + xhci_debugfs_create_ring_dir(xhci, &xhci->interrupters[0]->event_ring, "event-ring", xhci->debugfs_root); diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index f00e96c9ca57..3ab547a6e4ce 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -318,6 +318,7 @@ void xhci_initialize_ring_info(struct xhci_ring *ring, */ ring->num_trbs_free = ring->num_segs * (TRBS_PER_SEGMENT - 1) - 1; } +EXPORT_SYMBOL_GPL(xhci_initialize_ring_info); /* Allocate segments and link them for a ring */ static int xhci_alloc_segments_for_ring(struct xhci_hcd *xhci, @@ -1849,6 +1850,31 @@ xhci_free_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir) kfree(ir); } +void xhci_remove_secondary_interrupter(struct usb_hcd *hcd, struct xhci_interrupter *ir) +{ + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + unsigned int intr_num; + + /* interrupter 0 is primary interrupter, don't touch it */ + if (!ir || !ir->intr_num || ir->intr_num >= xhci->max_interrupters) + xhci_dbg(xhci, "Invalid secondary interrupter, can't remove\n"); + + /* fixme, should we check xhci->interrupter[intr_num] == ir */ + /* fixme locking */ + + spin_lock_irq(&xhci->lock); + + intr_num = ir->intr_num; + + xhci_remove_interrupter(xhci, ir); + xhci->interrupters[intr_num] = NULL; + + spin_unlock_irq(&xhci->lock); + + xhci_free_interrupter(xhci, ir); +} +EXPORT_SYMBOL_GPL(xhci_remove_secondary_interrupter); + void xhci_mem_cleanup(struct xhci_hcd *xhci) { struct device *dev = xhci_to_hcd(xhci)->self.sysdev; @@ -1856,10 +1882,14 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) cancel_delayed_work_sync(&xhci->cmd_timer); - xhci_remove_interrupter(xhci, xhci->interrupter); - xhci_free_interrupter(xhci, xhci->interrupter); - xhci->interrupter = NULL; - xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Freed primary event ring"); + for (i = 0; i < xhci->max_interrupters; i++) { + if (xhci->interrupters[i]) { + xhci_remove_interrupter(xhci, xhci->interrupters[i]); + xhci_free_interrupter(xhci, xhci->interrupters[i]); + xhci->interrupters[i] = NULL; + } + } + xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Freed interrupters"); if (xhci->cmd_ring) xhci_ring_free(xhci, xhci->cmd_ring); @@ -1929,6 +1959,7 @@ no_bw: for (i = 0; i < xhci->num_port_caps; i++) kfree(xhci->port_caps[i].psi); kfree(xhci->port_caps); + kfree(xhci->interrupters); xhci->num_port_caps = 0; xhci->usb2_rhub.ports = NULL; @@ -1937,6 +1968,7 @@ no_bw: xhci->rh_bw = NULL; xhci->ext_caps = NULL; xhci->port_caps = NULL; + xhci->interrupters = NULL; xhci->page_size = 0; xhci->page_shift = 0; @@ -2243,18 +2275,20 @@ static int xhci_setup_port_arrays(struct xhci_hcd *xhci, gfp_t flags) } static struct xhci_interrupter * -xhci_alloc_interrupter(struct xhci_hcd *xhci, gfp_t flags) +xhci_alloc_interrupter(struct xhci_hcd *xhci, int segs, gfp_t flags) { struct device *dev = xhci_to_hcd(xhci)->self.sysdev; struct xhci_interrupter *ir; - unsigned int num_segs; + unsigned int num_segs = segs; int ret; ir = kzalloc_node(sizeof(*ir), flags, dev_to_node(dev)); if (!ir) return NULL; - num_segs = min_t(unsigned int, 1 << HCS_ERST_MAX(xhci->hcs_params2), + /* number of ring segments should be greater than 0 */ + if (segs <= 0) + num_segs = min_t(unsigned int, 1 << HCS_ERST_MAX(xhci->hcs_params2), ERST_MAX_SEGS); ir->event_ring = xhci_ring_alloc(xhci, num_segs, 1, TYPE_EVENT, 0, @@ -2289,6 +2323,13 @@ xhci_add_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir, return -EINVAL; } + if (xhci->interrupters[intr_num]) { + xhci_warn(xhci, "Interrupter %d\n already set up", intr_num); + return -EINVAL; + } + + xhci->interrupters[intr_num] = ir; + ir->intr_num = intr_num; ir->ir_set = &xhci->run_regs->ir_set[intr_num]; /* set ERST count with the number of entries in the segment table */ @@ -2311,10 +2352,52 @@ xhci_add_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir, return 0; } +struct xhci_interrupter * +xhci_create_secondary_interrupter(struct usb_hcd *hcd, int num_seg) +{ + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct xhci_interrupter *ir; + unsigned int i; + int err = -ENOSPC; + + if (!xhci->interrupters || xhci->max_interrupters <= 1) + return NULL; + + ir = xhci_alloc_interrupter(xhci, num_seg, GFP_KERNEL); + if (!ir) + return NULL; + + spin_lock_irq(&xhci->lock); + + /* Find available secondary interrupter, interrupter 0 is reserved for primary */ + for (i = 1; i < xhci->max_interrupters; i++) { + if (xhci->interrupters[i] == NULL) { + err = xhci_add_interrupter(xhci, ir, i); + break; + } + } + + spin_unlock_irq(&xhci->lock); + + if (err) { + xhci_warn(xhci, "Failed to add secondary interrupter, max interrupters %d\n", + xhci->max_interrupters); + xhci_free_interrupter(xhci, ir); + return NULL; + } + + xhci_dbg(xhci, "Add secondary interrupter %d, max interrupters %d\n", + i, xhci->max_interrupters); + + return ir; +} +EXPORT_SYMBOL_GPL(xhci_create_secondary_interrupter); + int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) { - dma_addr_t dma; + struct xhci_interrupter *ir; struct device *dev = xhci_to_hcd(xhci)->self.sysdev; + dma_addr_t dma; unsigned int val, val2; u64 val_64; u32 page_size, temp; @@ -2439,11 +2522,14 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) /* Allocate and set up primary interrupter 0 with an event ring. */ xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Allocating primary event ring"); - xhci->interrupter = xhci_alloc_interrupter(xhci, flags); - if (!xhci->interrupter) + xhci->interrupters = kcalloc_node(xhci->max_interrupters, sizeof(*xhci->interrupters), + flags, dev_to_node(dev)); + + ir = xhci_alloc_interrupter(xhci, 0, flags); + if (!ir) goto fail; - if (xhci_add_interrupter(xhci, xhci->interrupter, 0)) + if (xhci_add_interrupter(xhci, ir, 0)) goto fail; xhci->isoc_bei_interval = AVOID_BEI_INTERVAL_MAX; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 884a668cca36..5a53280fa2ed 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3225,7 +3225,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) writel(status, &xhci->op_regs->status); /* This is the handler of the primary interrupter */ - ir = xhci->interrupter; + ir = xhci->interrupters[0]; if (!hcd->msi_enabled) { u32 irq_pending; irq_pending = readl(&ir->ir_set->irq_pending); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 70e6c240a540..5c3250989047 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -457,7 +457,7 @@ static int xhci_init(struct usb_hcd *hcd) static int xhci_run_finished(struct xhci_hcd *xhci) { - struct xhci_interrupter *ir = xhci->interrupter; + struct xhci_interrupter *ir = xhci->interrupters[0]; unsigned long flags; u32 temp; @@ -509,7 +509,7 @@ int xhci_run(struct usb_hcd *hcd) u64 temp_64; int ret; struct xhci_hcd *xhci = hcd_to_xhci(hcd); - struct xhci_interrupter *ir = xhci->interrupter; + struct xhci_interrupter *ir = xhci->interrupters[0]; /* Start the xHCI host controller running only after the USB 2.0 roothub * is setup. */ @@ -573,7 +573,7 @@ void xhci_stop(struct usb_hcd *hcd) { u32 temp; struct xhci_hcd *xhci = hcd_to_xhci(hcd); - struct xhci_interrupter *ir = xhci->interrupter; + struct xhci_interrupter *ir = xhci->interrupters[0]; mutex_lock(&xhci->mutex); @@ -669,36 +669,51 @@ EXPORT_SYMBOL_GPL(xhci_shutdown); #ifdef CONFIG_PM static void xhci_save_registers(struct xhci_hcd *xhci) { - struct xhci_interrupter *ir = xhci->interrupter; + struct xhci_interrupter *ir; + unsigned int i; xhci->s3.command = readl(&xhci->op_regs->command); xhci->s3.dev_nt = readl(&xhci->op_regs->dev_notification); xhci->s3.dcbaa_ptr = xhci_read_64(xhci, &xhci->op_regs->dcbaa_ptr); xhci->s3.config_reg = readl(&xhci->op_regs->config_reg); - if (!ir) - return; + /* save both primary and all secondary interrupters */ + /* fixme, shold we lock to prevent race with remove secondary interrupter? */ + for (i = 0; i < xhci->max_interrupters; i++) { + ir = xhci->interrupters[i]; + if (!ir) + continue; - ir->s3_erst_size = readl(&ir->ir_set->erst_size); - ir->s3_erst_base = xhci_read_64(xhci, &ir->ir_set->erst_base); - ir->s3_erst_dequeue = xhci_read_64(xhci, &ir->ir_set->erst_dequeue); - ir->s3_irq_pending = readl(&ir->ir_set->irq_pending); - ir->s3_irq_control = readl(&ir->ir_set->irq_control); + ir->s3_erst_size = readl(&ir->ir_set->erst_size); + ir->s3_erst_base = xhci_read_64(xhci, &ir->ir_set->erst_base); + ir->s3_erst_dequeue = xhci_read_64(xhci, &ir->ir_set->erst_dequeue); + ir->s3_irq_pending = readl(&ir->ir_set->irq_pending); + ir->s3_irq_control = readl(&ir->ir_set->irq_control); + } } static void xhci_restore_registers(struct xhci_hcd *xhci) { - struct xhci_interrupter *ir = xhci->interrupter; + struct xhci_interrupter *ir; + unsigned int i; writel(xhci->s3.command, &xhci->op_regs->command); writel(xhci->s3.dev_nt, &xhci->op_regs->dev_notification); xhci_write_64(xhci, xhci->s3.dcbaa_ptr, &xhci->op_regs->dcbaa_ptr); writel(xhci->s3.config_reg, &xhci->op_regs->config_reg); - writel(ir->s3_erst_size, &ir->ir_set->erst_size); - xhci_write_64(xhci, ir->s3_erst_base, &ir->ir_set->erst_base); - xhci_write_64(xhci, ir->s3_erst_dequeue, &ir->ir_set->erst_dequeue); - writel(ir->s3_irq_pending, &ir->ir_set->irq_pending); - writel(ir->s3_irq_control, &ir->ir_set->irq_control); + + /* FIXME should we lock to protect against freeing of interrupters */ + for (i = 0; i < xhci->max_interrupters; i++) { + ir = xhci->interrupters[i]; + if (!ir) + continue; + + writel(ir->s3_erst_size, &ir->ir_set->erst_size); + xhci_write_64(xhci, ir->s3_erst_base, &ir->ir_set->erst_base); + xhci_write_64(xhci, ir->s3_erst_dequeue, &ir->ir_set->erst_dequeue); + writel(ir->s3_irq_pending, &ir->ir_set->irq_pending); + writel(ir->s3_irq_control, &ir->ir_set->irq_control); + } } static void xhci_set_cmd_ring_deq(struct xhci_hcd *xhci) @@ -1061,7 +1076,7 @@ int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg) xhci_dbg(xhci, "// Disabling event ring interrupts\n"); temp = readl(&xhci->op_regs->status); writel((temp & ~0x1fff) | STS_EINT, &xhci->op_regs->status); - xhci_disable_interrupter(xhci->interrupter); + xhci_disable_interrupter(xhci->interrupters[0]); xhci_dbg(xhci, "cleaning up memory\n"); xhci_mem_cleanup(xhci); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index a49560145d78..9d2cf11cef84 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1549,7 +1549,7 @@ struct xhci_hcd { struct reset_control *reset; /* data structures */ struct xhci_device_context_array *dcbaa; - struct xhci_interrupter *interrupter; + struct xhci_interrupter **interrupters; struct xhci_ring *cmd_ring; unsigned int cmd_ring_state; #define CMD_RING_STATE_RUNNING (1 << 0) @@ -1866,6 +1866,10 @@ struct xhci_container_ctx *xhci_alloc_container_ctx(struct xhci_hcd *xhci, int type, gfp_t flags); void xhci_free_container_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx); +struct xhci_interrupter * +xhci_create_secondary_interrupter(struct usb_hcd *hcd, int num_seg); +void xhci_remove_secondary_interrupter(struct usb_hcd + *hcd, struct xhci_interrupter *ir); /* xHCI host controller glue */ typedef void (*xhci_get_quirks_t)(struct device *, struct xhci_hcd *); From 3733697cb7dd1ada4181affd1d170bdc47888bba Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 16 Feb 2024 16:09:28 -0800 Subject: [PATCH 115/129] xhci: Add helper to set an interrupters interrupt moderation interval [ Upstream commit ace21625878f78708b75b7a872ec7a0e2ed15ca4 ] Add a helper to set the interrupt moderation interval for an interrupter. Each interrupter can have its own moderation value. Hardware has a 16bit register for the moderation value, each step is 250ns. Helper function imod_interval argument is in nanoseconds. Values from 0 to 16383750 (250 x 0xffff) are accepted. 0 means no interrupt throttling. Signed-off-by: Mathias Nyman Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20240217001017.29969-3-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: bea5892d0ed2 ("xhci: Limit time spent with xHC interrupts disabled during bus resume") Signed-off-by: Sasha Levin --- drivers/usb/host/xhci.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 5c3250989047..d6a0c79e5fad 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -323,6 +323,23 @@ static int xhci_disable_interrupter(struct xhci_interrupter *ir) return 0; } +/* interrupt moderation interval imod_interval in nanoseconds */ +static int xhci_set_interrupter_moderation(struct xhci_interrupter *ir, + u32 imod_interval) +{ + u32 imod; + + if (!ir || !ir->ir_set || imod_interval > U16_MAX * 250) + return -EINVAL; + + imod = readl(&ir->ir_set->irq_control); + imod &= ~ER_IRQ_INTERVAL_MASK; + imod |= (imod_interval / 250) & ER_IRQ_INTERVAL_MASK; + writel(imod, &ir->ir_set->irq_control); + + return 0; +} + static void compliance_mode_recovery(struct timer_list *t) { struct xhci_hcd *xhci; @@ -505,7 +522,6 @@ static int xhci_run_finished(struct xhci_hcd *xhci) */ int xhci_run(struct usb_hcd *hcd) { - u32 temp; u64 temp_64; int ret; struct xhci_hcd *xhci = hcd_to_xhci(hcd); @@ -525,12 +541,7 @@ int xhci_run(struct usb_hcd *hcd) xhci_dbg_trace(xhci, trace_xhci_dbg_init, "ERST deq = 64'h%0lx", (long unsigned int) temp_64); - xhci_dbg_trace(xhci, trace_xhci_dbg_init, - "// Set the interrupt modulation register"); - temp = readl(&ir->ir_set->irq_control); - temp &= ~ER_IRQ_INTERVAL_MASK; - temp |= (xhci->imod_interval / 250) & ER_IRQ_INTERVAL_MASK; - writel(temp, &ir->ir_set->irq_control); + xhci_set_interrupter_moderation(ir, xhci->imod_interval); if (xhci->quirks & XHCI_NEC_HOST) { struct xhci_command *command; From cc64775afb2775a9e8f4289909eff7940283110e Mon Sep 17 00:00:00 2001 From: Niklas Neronin Date: Mon, 29 Apr 2024 17:02:30 +0300 Subject: [PATCH 116/129] usb: xhci: check if 'requested segments' exceeds ERST capacity [ Upstream commit db4460b6ecf07574d580f01cd88054a62607068c ] Check if requested segments ('segs' or 'ERST_DEFAULT_SEGS') exceeds the maximum amount ERST supports. When 'segs' is '0', 'ERST_DEFAULT_SEGS' is used instead. But both values may not exceed ERST max. Macro 'ERST_MAX_SEGS' is renamed to 'ERST_DEFAULT_SEGS'. The new name better represents the macros, which is the number of Event Ring segments to allocate, when the amount is not specified. Additionally, rename and change xhci_create_secondary_interrupter()'s argument 'int num_segs' to 'unsigned int segs'. This makes it the same as its counter part in xhci_alloc_interrupter(). Fixes: c99b38c41234 ("xhci: add support to allocate several interrupters") Signed-off-by: Niklas Neronin Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240429140245.3955523-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: bea5892d0ed2 ("xhci: Limit time spent with xHC interrupts disabled during bus resume") Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-mem.c | 22 +++++++++++----------- drivers/usb/host/xhci.h | 6 +++--- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 3ab547a6e4ce..489f54cf9a8a 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2275,24 +2275,24 @@ static int xhci_setup_port_arrays(struct xhci_hcd *xhci, gfp_t flags) } static struct xhci_interrupter * -xhci_alloc_interrupter(struct xhci_hcd *xhci, int segs, gfp_t flags) +xhci_alloc_interrupter(struct xhci_hcd *xhci, unsigned int segs, gfp_t flags) { struct device *dev = xhci_to_hcd(xhci)->self.sysdev; struct xhci_interrupter *ir; - unsigned int num_segs = segs; + unsigned int max_segs; int ret; + if (!segs) + segs = ERST_DEFAULT_SEGS; + + max_segs = BIT(HCS_ERST_MAX(xhci->hcs_params2)); + segs = min(segs, max_segs); + ir = kzalloc_node(sizeof(*ir), flags, dev_to_node(dev)); if (!ir) return NULL; - /* number of ring segments should be greater than 0 */ - if (segs <= 0) - num_segs = min_t(unsigned int, 1 << HCS_ERST_MAX(xhci->hcs_params2), - ERST_MAX_SEGS); - - ir->event_ring = xhci_ring_alloc(xhci, num_segs, 1, TYPE_EVENT, 0, - flags); + ir->event_ring = xhci_ring_alloc(xhci, segs, 1, TYPE_EVENT, 0, flags); if (!ir->event_ring) { xhci_warn(xhci, "Failed to allocate interrupter event ring\n"); kfree(ir); @@ -2353,7 +2353,7 @@ xhci_add_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir, } struct xhci_interrupter * -xhci_create_secondary_interrupter(struct usb_hcd *hcd, int num_seg) +xhci_create_secondary_interrupter(struct usb_hcd *hcd, unsigned int segs) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); struct xhci_interrupter *ir; @@ -2363,7 +2363,7 @@ xhci_create_secondary_interrupter(struct usb_hcd *hcd, int num_seg) if (!xhci->interrupters || xhci->max_interrupters <= 1) return NULL; - ir = xhci_alloc_interrupter(xhci, num_seg, GFP_KERNEL); + ir = xhci_alloc_interrupter(xhci, segs, GFP_KERNEL); if (!ir) return NULL; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 9d2cf11cef84..156e43977cdd 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1423,8 +1423,8 @@ struct urb_priv { struct xhci_td td[]; }; -/* Reasonable limit for number of Event Ring segments (spec allows 32k) */ -#define ERST_MAX_SEGS 2 +/* Number of Event Ring segments to allocate, when amount is not specified. (spec allows 32k) */ +#define ERST_DEFAULT_SEGS 2 /* Poll every 60 seconds */ #define POLL_TIMEOUT 60 /* Stop endpoint command timeout (secs) for URB cancellation watchdog timer */ @@ -1867,7 +1867,7 @@ struct xhci_container_ctx *xhci_alloc_container_ctx(struct xhci_hcd *xhci, void xhci_free_container_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx); struct xhci_interrupter * -xhci_create_secondary_interrupter(struct usb_hcd *hcd, int num_seg); +xhci_create_secondary_interrupter(struct usb_hcd *hcd, unsigned int segs); void xhci_remove_secondary_interrupter(struct usb_hcd *hcd, struct xhci_interrupter *ir); From ebb7195dac403f7c77ac860206e12de789a3eb5a Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 5 Sep 2024 17:33:00 +0300 Subject: [PATCH 117/129] xhci: support setting interrupt moderation IMOD for secondary interrupters [ Upstream commit 9c0c11bb87b09a8b7cdc21ca1090e7b36abe9d09 ] Allow creators of seconday interrupters to specify the interrupt moderation interval value in nanoseconds when creating the interrupter. If not sure what value to use then use the xhci driver default xhci->imod_interval Suggested-by: Wesley Cheng Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240905143300.1959279-13-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: bea5892d0ed2 ("xhci: Limit time spent with xHC interrupts disabled during bus resume") Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-mem.c | 8 +++++++- drivers/usb/host/xhci.c | 4 ++-- drivers/usb/host/xhci.h | 5 ++++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 489f54cf9a8a..2c44855be75e 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2353,7 +2353,8 @@ xhci_add_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir, } struct xhci_interrupter * -xhci_create_secondary_interrupter(struct usb_hcd *hcd, unsigned int segs) +xhci_create_secondary_interrupter(struct usb_hcd *hcd, unsigned int segs, + u32 imod_interval) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); struct xhci_interrupter *ir; @@ -2386,6 +2387,11 @@ xhci_create_secondary_interrupter(struct usb_hcd *hcd, unsigned int segs) return NULL; } + err = xhci_set_interrupter_moderation(ir, imod_interval); + if (err) + xhci_warn(xhci, "Failed to set interrupter %d moderation to %uns\n", + i, imod_interval); + xhci_dbg(xhci, "Add secondary interrupter %d, max interrupters %d\n", i, xhci->max_interrupters); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index d6a0c79e5fad..0af298c5af65 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -324,8 +324,8 @@ static int xhci_disable_interrupter(struct xhci_interrupter *ir) } /* interrupt moderation interval imod_interval in nanoseconds */ -static int xhci_set_interrupter_moderation(struct xhci_interrupter *ir, - u32 imod_interval) +int xhci_set_interrupter_moderation(struct xhci_interrupter *ir, + u32 imod_interval) { u32 imod; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 156e43977cdd..9d05a21392bb 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1867,7 +1867,8 @@ struct xhci_container_ctx *xhci_alloc_container_ctx(struct xhci_hcd *xhci, void xhci_free_container_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx); struct xhci_interrupter * -xhci_create_secondary_interrupter(struct usb_hcd *hcd, unsigned int segs); +xhci_create_secondary_interrupter(struct usb_hcd *hcd, unsigned int segs, + u32 imod_interval); void xhci_remove_secondary_interrupter(struct usb_hcd *hcd, struct xhci_interrupter *ir); @@ -1905,6 +1906,8 @@ int xhci_alloc_tt_info(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev, struct usb_device *hdev, struct usb_tt *tt, gfp_t mem_flags); +int xhci_set_interrupter_moderation(struct xhci_interrupter *ir, + u32 imod_interval); /* xHCI ring, segment, TRB, and TD functions */ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg, union xhci_trb *trb); From baa3ebbad80ffda219cb1dd642994bedb7dab1b4 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 10 Apr 2025 18:18:27 +0300 Subject: [PATCH 118/129] xhci: Limit time spent with xHC interrupts disabled during bus resume [ Upstream commit bea5892d0ed274e03655223d1977cf59f9aff2f2 ] Current xhci bus resume implementation prevents xHC host from generating interrupts during high-speed USB 2 and super-speed USB 3 bus resume. Only reason to disable interrupts during bus resume would be to prevent the interrupt handler from interfering with the resume process of USB 2 ports. Host initiated resume of USB 2 ports is done in two stages. The xhci driver first transitions the port from 'U3' to 'Resume' state, then wait in Resume for 20ms, and finally moves port to U0 state. xhci driver can't prevent interrupts by keeping the xhci spinlock due to this 20ms sleep. Limit interrupt disabling to the USB 2 port resume case only. resuming USB 2 ports in bus resume is only done in special cases where USB 2 ports had to be forced to suspend during bus suspend. The current way of preventing interrupts by clearing the 'Interrupt Enable' (INTE) bit in USBCMD register won't prevent the Interrupter registers 'Interrupt Pending' (IP), 'Event Handler Busy' (EHB) and USBSTS register Event Interrupt (EINT) bits from being set. New interrupts can't be issued before those bits are properly clered. Disable interrupts by clearing the interrupter register 'Interrupt Enable' (IE) bit instead. This way IP, EHB and INTE won't be set before IE is enabled again and a new interrupt is triggered. Reported-by: Devyn Liu Closes: https://lore.kernel.org/linux-usb/b1a9e2d51b4d4ff7a304f77c5be8164e@huawei.com/ Cc: stable@vger.kernel.org Tested-by: Devyn Liu Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250410151828.2868740-6-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-hub.c | 30 ++++++++++++++++-------------- drivers/usb/host/xhci.c | 4 ++-- drivers/usb/host/xhci.h | 2 ++ 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 0df5d807a77e..a2b6a922077e 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1880,9 +1880,10 @@ int xhci_bus_resume(struct usb_hcd *hcd) int slot_id; int sret; u32 next_state; - u32 temp, portsc; + u32 portsc; struct xhci_hub *rhub; struct xhci_port **ports; + bool disabled_irq = false; rhub = xhci_get_rhub(hcd); ports = rhub->ports; @@ -1898,17 +1899,20 @@ int xhci_bus_resume(struct usb_hcd *hcd) return -ESHUTDOWN; } - /* delay the irqs */ - temp = readl(&xhci->op_regs->command); - temp &= ~CMD_EIE; - writel(temp, &xhci->op_regs->command); - /* bus specific resume for ports we suspended at bus_suspend */ - if (hcd->speed >= HCD_USB3) + if (hcd->speed >= HCD_USB3) { next_state = XDEV_U0; - else + } else { next_state = XDEV_RESUME; - + if (bus_state->bus_suspended) { + /* + * prevent port event interrupts from interfering + * with usb2 port resume process + */ + xhci_disable_interrupter(xhci->interrupters[0]); + disabled_irq = true; + } + } port_index = max_ports; while (port_index--) { portsc = readl(ports[port_index]->addr); @@ -1977,11 +1981,9 @@ int xhci_bus_resume(struct usb_hcd *hcd) (void) readl(&xhci->op_regs->command); bus_state->next_statechange = jiffies + msecs_to_jiffies(5); - /* re-enable irqs */ - temp = readl(&xhci->op_regs->command); - temp |= CMD_EIE; - writel(temp, &xhci->op_regs->command); - temp = readl(&xhci->op_regs->command); + /* re-enable interrupter */ + if (disabled_irq) + xhci_enable_interrupter(xhci->interrupters[0]); spin_unlock_irqrestore(&xhci->lock, flags); return 0; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0af298c5af65..ce38cd2435c8 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -297,7 +297,7 @@ static void xhci_zero_64b_regs(struct xhci_hcd *xhci) xhci_info(xhci, "Fault detected\n"); } -static int xhci_enable_interrupter(struct xhci_interrupter *ir) +int xhci_enable_interrupter(struct xhci_interrupter *ir) { u32 iman; @@ -310,7 +310,7 @@ static int xhci_enable_interrupter(struct xhci_interrupter *ir) return 0; } -static int xhci_disable_interrupter(struct xhci_interrupter *ir) +int xhci_disable_interrupter(struct xhci_interrupter *ir) { u32 iman; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 9d05a21392bb..74bdd035d756 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1908,6 +1908,8 @@ int xhci_alloc_tt_info(struct xhci_hcd *xhci, struct usb_tt *tt, gfp_t mem_flags); int xhci_set_interrupter_moderation(struct xhci_interrupter *ir, u32 imod_interval); +int xhci_enable_interrupter(struct xhci_interrupter *ir); +int xhci_disable_interrupter(struct xhci_interrupter *ir); /* xHCI ring, segment, TRB, and TD functions */ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg, union xhci_trb *trb); From 0ecbb45e30abd0b1e59012fa3f3d5781140248b3 Mon Sep 17 00:00:00 2001 From: Shyam Saini Date: Thu, 27 Feb 2025 10:49:27 -0800 Subject: [PATCH 119/129] kernel: param: rename locate_module_kobject [ Upstream commit bbc9462f0cb0c8917a4908e856731708f0cee910 ] The locate_module_kobject() function looks up an existing module_kobject for a given module name. If it cannot find the corresponding module_kobject, it creates one for the given name. This commit renames locate_module_kobject() to lookup_or_create_module_kobject() to better describe its operations. This doesn't change anything functionality wise. Suggested-by: Rasmus Villemoes Signed-off-by: Shyam Saini Link: https://lore.kernel.org/r/20250227184930.34163-2-shyamsaini@linux.microsoft.com Signed-off-by: Petr Pavlu Stable-dep-of: f95bbfe18512 ("drivers: base: handle module_kobject creation") Signed-off-by: Sasha Levin --- kernel/params.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/params.c b/kernel/params.c index 2d4a0564697e..8d48a6bfe68d 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -759,7 +759,7 @@ void destroy_params(const struct kernel_param *params, unsigned num) params[i].ops->free(params[i].arg); } -static struct module_kobject * __init locate_module_kobject(const char *name) +static struct module_kobject * __init lookup_or_create_module_kobject(const char *name) { struct module_kobject *mk; struct kobject *kobj; @@ -801,7 +801,7 @@ static void __init kernel_add_sysfs_param(const char *name, struct module_kobject *mk; int err; - mk = locate_module_kobject(name); + mk = lookup_or_create_module_kobject(name); if (!mk) return; @@ -872,7 +872,7 @@ static void __init version_sysfs_builtin(void) int err; for (vattr = __start___modver; vattr < __stop___modver; vattr++) { - mk = locate_module_kobject(vattr->module_name); + mk = lookup_or_create_module_kobject(vattr->module_name); if (mk) { err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr); WARN_ON_ONCE(err); From ace531f2fea11dd56fdfa11eb948cbf689e6339e Mon Sep 17 00:00:00 2001 From: Shyam Saini Date: Thu, 27 Feb 2025 10:49:29 -0800 Subject: [PATCH 120/129] kernel: globalize lookup_or_create_module_kobject() [ Upstream commit 7c76c813cfc42a7376378a0c4b7250db2eebab81 ] lookup_or_create_module_kobject() is marked as static and __init, to make it global drop static keyword. Since this function can be called from non-init code, use __modinit instead of __init, __modinit marker will make it __init if CONFIG_MODULES is not defined. Suggested-by: Rasmus Villemoes Signed-off-by: Shyam Saini Link: https://lore.kernel.org/r/20250227184930.34163-4-shyamsaini@linux.microsoft.com Signed-off-by: Petr Pavlu Stable-dep-of: f95bbfe18512 ("drivers: base: handle module_kobject creation") Signed-off-by: Sasha Levin --- include/linux/module.h | 2 ++ kernel/params.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/module.h b/include/linux/module.h index a98e188cf37b..f2a8624eef1e 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -162,6 +162,8 @@ extern void cleanup_module(void); #define __INITRODATA_OR_MODULE __INITRODATA #endif /*CONFIG_MODULES*/ +struct module_kobject *lookup_or_create_module_kobject(const char *name); + /* Generic info of form tag = "info" */ #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) diff --git a/kernel/params.c b/kernel/params.c index 8d48a6bfe68d..c7aed3c51cd5 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -759,7 +759,7 @@ void destroy_params(const struct kernel_param *params, unsigned num) params[i].ops->free(params[i].arg); } -static struct module_kobject * __init lookup_or_create_module_kobject(const char *name) +struct module_kobject __modinit * lookup_or_create_module_kobject(const char *name) { struct module_kobject *mk; struct kobject *kobj; From 8f2451ebaf5b8adffdbdf2ee42e4adf94d213aec Mon Sep 17 00:00:00 2001 From: Shyam Saini Date: Thu, 27 Feb 2025 10:49:30 -0800 Subject: [PATCH 121/129] drivers: base: handle module_kobject creation [ Upstream commit f95bbfe18512c5c018720468959edac056a17196 ] module_add_driver() relies on module_kset list for /sys/module//drivers directory creation. Since, commit 96a1a2412acba ("kernel/params.c: defer most of param_sysfs_init() to late_initcall time") drivers which are initialized from subsys_initcall() or any other higher precedence initcall couldn't find the related kobject entry in the module_kset list because module_kset is not fully populated by the time module_add_driver() refers it. As a consequence, module_add_driver() returns early without calling make_driver_name(). Therefore, /sys/module//drivers is never created. Fix this issue by letting module_add_driver() handle module_kobject creation itself. Fixes: 96a1a2412acb ("kernel/params.c: defer most of param_sysfs_init() to late_initcall time") Cc: stable@vger.kernel.org # requires all other patches from the series Suggested-by: Rasmus Villemoes Signed-off-by: Shyam Saini Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250227184930.34163-5-shyamsaini@linux.microsoft.com Signed-off-by: Petr Pavlu Signed-off-by: Sasha Levin --- drivers/base/module.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/base/module.c b/drivers/base/module.c index a33663d92256..955582b34e54 100644 --- a/drivers/base/module.c +++ b/drivers/base/module.c @@ -42,16 +42,13 @@ int module_add_driver(struct module *mod, struct device_driver *drv) if (mod) mk = &mod->mkobj; else if (drv->mod_name) { - struct kobject *mkobj; - - /* Lookup built-in module entry in /sys/modules */ - mkobj = kset_find_obj(module_kset, drv->mod_name); - if (mkobj) { - mk = container_of(mkobj, struct module_kobject, kobj); + /* Lookup or create built-in module entry in /sys/modules */ + mk = lookup_or_create_module_kobject(drv->mod_name); + if (mk) { /* remember our module structure */ drv->p->mkobj = mk; - /* kset_find_obj took a reference */ - kobject_put(mkobj); + /* lookup_or_create_module_kobject took a reference */ + kobject_put(&mk->kobj); } } From 3dc33f145a8af9c7a3fb98259fa9dcc105dbe1b4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 6 Aug 2024 20:31:15 -0300 Subject: [PATCH 122/129] iommu/arm-smmu-v3: Use the new rb tree helpers [ Upstream commit a2bb820e862d61f9ca1499e500915f9f505a2655 ] Since v5.12 the rbtree has gained some simplifying helpers aimed at making rb tree users write less convoluted boiler plate code. Instead the caller provides a single comparison function and the helpers generate the prior open-coded stuff. Update smmu->streams to use rb_find_add() and rb_find(). Tested-by: Nicolin Chen Reviewed-by: Mostafa Saleh Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v3-9fef8cdc2ff6+150d1-smmuv3_tidy_jgg@nvidia.com Signed-off-by: Will Deacon Stable-dep-of: b00d24997a11 ("iommu/arm-smmu-v3: Fix iommu_device_probe bug due to duplicated stream ids") Signed-off-by: Sasha Levin --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 68 ++++++++++----------- 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6cecbac0e6ba..2cab4798e7a0 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1443,26 +1443,37 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) return 0; } +static int arm_smmu_streams_cmp_key(const void *lhs, const struct rb_node *rhs) +{ + struct arm_smmu_stream *stream_rhs = + rb_entry(rhs, struct arm_smmu_stream, node); + const u32 *sid_lhs = lhs; + + if (*sid_lhs < stream_rhs->id) + return -1; + if (*sid_lhs > stream_rhs->id) + return 1; + return 0; +} + +static int arm_smmu_streams_cmp_node(struct rb_node *lhs, + const struct rb_node *rhs) +{ + return arm_smmu_streams_cmp_key( + &rb_entry(lhs, struct arm_smmu_stream, node)->id, rhs); +} + static struct arm_smmu_master * arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) { struct rb_node *node; - struct arm_smmu_stream *stream; lockdep_assert_held(&smmu->streams_mutex); - node = smmu->streams.rb_node; - while (node) { - stream = rb_entry(node, struct arm_smmu_stream, node); - if (stream->id < sid) - node = node->rb_right; - else if (stream->id > sid) - node = node->rb_left; - else - return stream->master; - } - - return NULL; + node = rb_find(&sid, &smmu->streams, arm_smmu_streams_cmp_key); + if (!node) + return NULL; + return rb_entry(node, struct arm_smmu_stream, node)->master; } /* IRQ and event handlers */ @@ -2575,8 +2586,6 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, { int i; int ret = 0; - struct arm_smmu_stream *new_stream, *cur_stream; - struct rb_node **new_node, *parent_node = NULL; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev); master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams), @@ -2587,9 +2596,9 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, mutex_lock(&smmu->streams_mutex); for (i = 0; i < fwspec->num_ids; i++) { + struct arm_smmu_stream *new_stream = &master->streams[i]; u32 sid = fwspec->ids[i]; - new_stream = &master->streams[i]; new_stream->id = sid; new_stream->master = master; @@ -2598,28 +2607,13 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, break; /* Insert into SID tree */ - new_node = &(smmu->streams.rb_node); - while (*new_node) { - cur_stream = rb_entry(*new_node, struct arm_smmu_stream, - node); - parent_node = *new_node; - if (cur_stream->id > new_stream->id) { - new_node = &((*new_node)->rb_left); - } else if (cur_stream->id < new_stream->id) { - new_node = &((*new_node)->rb_right); - } else { - dev_warn(master->dev, - "stream %u already in tree\n", - cur_stream->id); - ret = -EINVAL; - break; - } - } - if (ret) + if (rb_find_add(&new_stream->node, &smmu->streams, + arm_smmu_streams_cmp_node)) { + dev_warn(master->dev, "stream %u already in tree\n", + sid); + ret = -EINVAL; break; - - rb_link_node(&new_stream->node, parent_node, new_node); - rb_insert_color(&new_stream->node, &smmu->streams); + } } if (ret) { From 4306dbd7676e41032813620e49cd821a25559852 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 15 Apr 2025 11:56:20 -0700 Subject: [PATCH 123/129] iommu/arm-smmu-v3: Fix iommu_device_probe bug due to duplicated stream ids [ Upstream commit b00d24997a11c10d3e420614f0873b83ce358a34 ] ASPEED VGA card has two built-in devices: 0008:06:00.0 PCI bridge: ASPEED Technology, Inc. AST1150 PCI-to-PCI Bridge (rev 06) 0008:07:00.0 VGA compatible controller: ASPEED Technology, Inc. ASPEED Graphics Family (rev 52) Its toplogy looks like this: +-[0008:00]---00.0-[01-09]--+-00.0-[02-09]--+-00.0-[03]----00.0 Sandisk Corp Device 5017 | +-01.0-[04]-- | +-02.0-[05]----00.0 NVIDIA Corporation Device | +-03.0-[06-07]----00.0-[07]----00.0 ASPEED Technology, Inc. ASPEED Graphics Family | +-04.0-[08]----00.0 Renesas Technology Corp. uPD720201 USB 3.0 Host Controller | \-05.0-[09]----00.0 Realtek Semiconductor Co., Ltd. RTL8111/8168/8411 PCI Express Gigabit Ethernet Controller \-00.1 PMC-Sierra Inc. Device 4028 The IORT logic populaties two identical IDs into the fwspec->ids array via DMA aliasing in iort_pci_iommu_init() called by pci_for_each_dma_alias(). Though the SMMU driver had been able to handle this situation since commit 563b5cbe334e ("iommu/arm-smmu-v3: Cope with duplicated Stream IDs"), that got broken by the later commit cdf315f907d4 ("iommu/arm-smmu-v3: Maintain a SID->device structure"), which ended up with allocating separate streams with the same stuffing. On a kernel prior to v6.15-rc1, there has been an overlooked warning: pci 0008:07:00.0: vgaarb: setting as boot VGA device pci 0008:07:00.0: vgaarb: bridge control possible pci 0008:07:00.0: vgaarb: VGA device added: decodes=io+mem,owns=none,locks=none pcieport 0008:06:00.0: Adding to iommu group 14 ast 0008:07:00.0: stream 67328 already in tree <===== WARNING ast 0008:07:00.0: enabling device (0002 -> 0003) ast 0008:07:00.0: Using default configuration ast 0008:07:00.0: AST 2600 detected ast 0008:07:00.0: [drm] Using analog VGA ast 0008:07:00.0: [drm] dram MCLK=396 Mhz type=1 bus_width=16 [drm] Initialized ast 0.1.0 for 0008:07:00.0 on minor 0 ast 0008:07:00.0: [drm] fb0: astdrmfb frame buffer device With v6.15-rc, since the commit bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path"), the error returned with the warning is moved to the SMMU device probe flow: arm_smmu_probe_device+0x15c/0x4c0 __iommu_probe_device+0x150/0x4f8 probe_iommu_group+0x44/0x80 bus_for_each_dev+0x7c/0x100 bus_iommu_probe+0x48/0x1a8 iommu_device_register+0xb8/0x178 arm_smmu_device_probe+0x1350/0x1db0 which then fails the entire SMMU driver probe: pci 0008:06:00.0: Adding to iommu group 21 pci 0008:07:00.0: stream 67328 already in tree arm-smmu-v3 arm-smmu-v3.9.auto: Failed to register iommu arm-smmu-v3 arm-smmu-v3.9.auto: probe with driver arm-smmu-v3 failed with error -22 Since SMMU driver had been already expecting a potential duplicated Stream ID in arm_smmu_install_ste_for_dev(), change the arm_smmu_insert_master() routine to ignore a duplicated ID from the fwspec->sids array as well. Note: this has been failing the iommu_device_probe() since 2021, although a recent iommu commit in v6.15-rc1 that moves iommu_device_probe() started to fail the SMMU driver probe. Since nobody has cared about DMA Alias support, leave that as it was but fix the fundamental iommu_device_probe() breakage. Fixes: cdf315f907d4 ("iommu/arm-smmu-v3: Maintain a SID->device structure") Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20250415185620.504299-1-nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 2cab4798e7a0..f2260f45728e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2597,6 +2597,7 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, mutex_lock(&smmu->streams_mutex); for (i = 0; i < fwspec->num_ids; i++) { struct arm_smmu_stream *new_stream = &master->streams[i]; + struct rb_node *existing; u32 sid = fwspec->ids[i]; new_stream->id = sid; @@ -2607,10 +2608,20 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, break; /* Insert into SID tree */ - if (rb_find_add(&new_stream->node, &smmu->streams, - arm_smmu_streams_cmp_node)) { - dev_warn(master->dev, "stream %u already in tree\n", - sid); + existing = rb_find_add(&new_stream->node, &smmu->streams, + arm_smmu_streams_cmp_node); + if (existing) { + struct arm_smmu_master *existing_master = + rb_entry(existing, struct arm_smmu_stream, node) + ->master; + + /* Bridged PCI devices may end up with duplicated IDs */ + if (existing_master == master) + continue; + + dev_warn(master->dev, + "stream %u already in tree from dev %s\n", sid, + dev_name(existing_master->dev)); ret = -EINVAL; break; } From 97a918755a4ca990a372729a4de70d87a31bb93a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 28 Feb 2025 13:30:01 -0600 Subject: [PATCH 124/129] drm/amd/display: Add scoped mutexes for amdgpu_dm_dhcp [ Upstream commit 6b675ab8efbf2bcee25be29e865455c56e246401 ] [Why] Guards automatically release mutex when it goes out of scope making code easier to follow. [How] Replace all use of mutex_lock()/mutex_unlock() with guard(mutex). Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Stable-dep-of: be593d9d91c5 ("drm/amd/display: Fix slab-use-after-free in hdcp") Signed-off-by: Sasha Levin --- .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 37 +++++-------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 2ad9f900a857..4330d37022fa 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -172,7 +172,7 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, struct mod_hdcp_display_adjustment display_adjust; unsigned int conn_index = aconnector->base.index; - mutex_lock(&hdcp_w->mutex); + guard(mutex)(&hdcp_w->mutex); hdcp_w->aconnector[conn_index] = aconnector; memset(&link_adjust, 0, sizeof(link_adjust)); @@ -209,7 +209,6 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, mod_hdcp_update_display(&hdcp_w->hdcp, conn_index, &link_adjust, &display_adjust, &hdcp_w->output); process_output(hdcp_w); - mutex_unlock(&hdcp_w->mutex); } static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, @@ -220,7 +219,7 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, struct drm_connector_state *conn_state = aconnector->base.state; unsigned int conn_index = aconnector->base.index; - mutex_lock(&hdcp_w->mutex); + guard(mutex)(&hdcp_w->mutex); hdcp_w->aconnector[conn_index] = aconnector; /* the removal of display will invoke auth reset -> hdcp destroy and @@ -239,7 +238,6 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); process_output(hdcp_w); - mutex_unlock(&hdcp_w->mutex); } void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_index) @@ -247,7 +245,7 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; unsigned int conn_index; - mutex_lock(&hdcp_w->mutex); + guard(mutex)(&hdcp_w->mutex); mod_hdcp_reset_connection(&hdcp_w->hdcp, &hdcp_w->output); @@ -259,8 +257,6 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde } process_output(hdcp_w); - - mutex_unlock(&hdcp_w->mutex); } void hdcp_handle_cpirq(struct hdcp_workqueue *hdcp_work, unsigned int link_index) @@ -277,7 +273,7 @@ static void event_callback(struct work_struct *work) hdcp_work = container_of(to_delayed_work(work), struct hdcp_workqueue, callback_dwork); - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); cancel_delayed_work(&hdcp_work->callback_dwork); @@ -285,8 +281,6 @@ static void event_callback(struct work_struct *work) &hdcp_work->output); process_output(hdcp_work); - - mutex_unlock(&hdcp_work->mutex); } static void event_property_update(struct work_struct *work) @@ -323,7 +317,7 @@ static void event_property_update(struct work_struct *work) continue; drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); if (conn_state->commit) { ret = wait_for_completion_interruptible_timeout(&conn_state->commit->hw_done, @@ -355,7 +349,6 @@ static void event_property_update(struct work_struct *work) drm_hdcp_update_content_protection(connector, DRM_MODE_CONTENT_PROTECTION_DESIRED); } - mutex_unlock(&hdcp_work->mutex); drm_modeset_unlock(&dev->mode_config.connection_mutex); } } @@ -368,7 +361,7 @@ static void event_property_validate(struct work_struct *work) struct amdgpu_dm_connector *aconnector; unsigned int conn_index; - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) { @@ -408,8 +401,6 @@ static void event_property_validate(struct work_struct *work) schedule_work(&hdcp_work->property_update_work); } } - - mutex_unlock(&hdcp_work->mutex); } static void event_watchdog_timer(struct work_struct *work) @@ -420,7 +411,7 @@ static void event_watchdog_timer(struct work_struct *work) struct hdcp_workqueue, watchdog_timer_dwork); - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); cancel_delayed_work(&hdcp_work->watchdog_timer_dwork); @@ -429,8 +420,6 @@ static void event_watchdog_timer(struct work_struct *work) &hdcp_work->output); process_output(hdcp_work); - - mutex_unlock(&hdcp_work->mutex); } static void event_cpirq(struct work_struct *work) @@ -439,13 +428,11 @@ static void event_cpirq(struct work_struct *work) hdcp_work = container_of(work, struct hdcp_workqueue, cpirq_work); - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); mod_hdcp_process_event(&hdcp_work->hdcp, MOD_HDCP_EVENT_CPIRQ, &hdcp_work->output); process_output(hdcp_work); - - mutex_unlock(&hdcp_work->mutex); } void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work) @@ -479,7 +466,7 @@ static bool enable_assr(void *handle, struct dc_link *link) dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.context.mem_context.shared_buf; - mutex_lock(&psp->dtm_context.mutex); + guard(mutex)(&psp->dtm_context.mutex); memset(dtm_cmd, 0, sizeof(struct ta_dtm_shared_memory)); dtm_cmd->cmd_id = TA_DTM_COMMAND__TOPOLOGY_ASSR_ENABLE; @@ -494,8 +481,6 @@ static bool enable_assr(void *handle, struct dc_link *link) res = false; } - mutex_unlock(&psp->dtm_context.mutex); - return res; } @@ -557,13 +542,11 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) (!!aconnector->base.state) ? aconnector->base.state->hdcp_content_type : -1); - mutex_lock(&hdcp_w->mutex); + guard(mutex)(&hdcp_w->mutex); mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output); process_output(hdcp_w); - mutex_unlock(&hdcp_w->mutex); - } /** From bbc66abcd297be67e3d835276e21e6fdc65205a6 Mon Sep 17 00:00:00 2001 From: Chris Bainbridge Date: Thu, 17 Apr 2025 16:50:05 -0500 Subject: [PATCH 125/129] drm/amd/display: Fix slab-use-after-free in hdcp [ Upstream commit be593d9d91c5a3a363d456b9aceb71029aeb3f1d ] The HDCP code in amdgpu_dm_hdcp.c copies pointers to amdgpu_dm_connector objects without incrementing the kref reference counts. When using a USB-C dock, and the dock is unplugged, the corresponding amdgpu_dm_connector objects are freed, creating dangling pointers in the HDCP code. When the dock is plugged back, the dangling pointers are dereferenced, resulting in a slab-use-after-free: [ 66.775837] BUG: KASAN: slab-use-after-free in event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.776171] Read of size 4 at addr ffff888127804120 by task kworker/0:1/10 [ 66.776179] CPU: 0 UID: 0 PID: 10 Comm: kworker/0:1 Not tainted 6.14.0-rc7-00180-g54505f727a38-dirty #233 [ 66.776183] Hardware name: HP HP Pavilion Aero Laptop 13-be0xxx/8916, BIOS F.17 12/18/2024 [ 66.776186] Workqueue: events event_property_validate [amdgpu] [ 66.776494] Call Trace: [ 66.776496] [ 66.776497] dump_stack_lvl+0x70/0xa0 [ 66.776504] print_report+0x175/0x555 [ 66.776507] ? __virt_addr_valid+0x243/0x450 [ 66.776510] ? kasan_complete_mode_report_info+0x66/0x1c0 [ 66.776515] kasan_report+0xeb/0x1c0 [ 66.776518] ? event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.776819] ? event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.777121] __asan_report_load4_noabort+0x14/0x20 [ 66.777124] event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.777342] ? __lock_acquire+0x6b40/0x6b40 [ 66.777347] ? enable_assr+0x250/0x250 [amdgpu] [ 66.777571] process_one_work+0x86b/0x1510 [ 66.777575] ? pwq_dec_nr_in_flight+0xcf0/0xcf0 [ 66.777578] ? assign_work+0x16b/0x280 [ 66.777580] ? lock_is_held_type+0xa3/0x130 [ 66.777583] worker_thread+0x5c0/0xfa0 [ 66.777587] ? process_one_work+0x1510/0x1510 [ 66.777588] kthread+0x3a2/0x840 [ 66.777591] ? kthread_is_per_cpu+0xd0/0xd0 [ 66.777594] ? trace_hardirqs_on+0x4f/0x60 [ 66.777597] ? _raw_spin_unlock_irq+0x27/0x60 [ 66.777599] ? calculate_sigpending+0x77/0xa0 [ 66.777602] ? kthread_is_per_cpu+0xd0/0xd0 [ 66.777605] ret_from_fork+0x40/0x90 [ 66.777607] ? kthread_is_per_cpu+0xd0/0xd0 [ 66.777609] ret_from_fork_asm+0x11/0x20 [ 66.777614] [ 66.777643] Allocated by task 10: [ 66.777646] kasan_save_stack+0x39/0x60 [ 66.777649] kasan_save_track+0x14/0x40 [ 66.777652] kasan_save_alloc_info+0x37/0x50 [ 66.777655] __kasan_kmalloc+0xbb/0xc0 [ 66.777658] __kmalloc_cache_noprof+0x1c8/0x4b0 [ 66.777661] dm_dp_add_mst_connector+0xdd/0x5c0 [amdgpu] [ 66.777880] drm_dp_mst_port_add_connector+0x47e/0x770 [drm_display_helper] [ 66.777892] drm_dp_send_link_address+0x1554/0x2bf0 [drm_display_helper] [ 66.777901] drm_dp_check_and_send_link_address+0x187/0x1f0 [drm_display_helper] [ 66.777909] drm_dp_mst_link_probe_work+0x2b8/0x410 [drm_display_helper] [ 66.777917] process_one_work+0x86b/0x1510 [ 66.777919] worker_thread+0x5c0/0xfa0 [ 66.777922] kthread+0x3a2/0x840 [ 66.777925] ret_from_fork+0x40/0x90 [ 66.777927] ret_from_fork_asm+0x11/0x20 [ 66.777932] Freed by task 1713: [ 66.777935] kasan_save_stack+0x39/0x60 [ 66.777938] kasan_save_track+0x14/0x40 [ 66.777940] kasan_save_free_info+0x3b/0x60 [ 66.777944] __kasan_slab_free+0x52/0x70 [ 66.777946] kfree+0x13f/0x4b0 [ 66.777949] dm_dp_mst_connector_destroy+0xfa/0x150 [amdgpu] [ 66.778179] drm_connector_free+0x7d/0xb0 [ 66.778184] drm_mode_object_put.part.0+0xee/0x160 [ 66.778188] drm_mode_object_put+0x37/0x50 [ 66.778191] drm_atomic_state_default_clear+0x220/0xd60 [ 66.778194] __drm_atomic_state_free+0x16e/0x2a0 [ 66.778197] drm_mode_atomic_ioctl+0x15ed/0x2ba0 [ 66.778200] drm_ioctl_kernel+0x17a/0x310 [ 66.778203] drm_ioctl+0x584/0xd10 [ 66.778206] amdgpu_drm_ioctl+0xd2/0x1c0 [amdgpu] [ 66.778375] __x64_sys_ioctl+0x139/0x1a0 [ 66.778378] x64_sys_call+0xee7/0xfb0 [ 66.778381] do_syscall_64+0x87/0x140 [ 66.778385] entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fix this by properly incrementing and decrementing the reference counts when making and deleting copies of the amdgpu_dm_connector pointers. (Mario: rebase on current code and update fixes tag) Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4006 Signed-off-by: Chris Bainbridge Fixes: da3fd7ac0bcf3 ("drm/amd/display: Update CP property based on HW query") Reviewed-by: Alex Hung Link: https://lore.kernel.org/r/20250417215005.37964-1-mario.limonciello@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit d4673f3c3b3dcb74e36e53cdfc880baa7a87b330) Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 4330d37022fa..a048022d9865 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -173,6 +173,9 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, unsigned int conn_index = aconnector->base.index; guard(mutex)(&hdcp_w->mutex); + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); hdcp_w->aconnector[conn_index] = aconnector; memset(&link_adjust, 0, sizeof(link_adjust)); @@ -220,7 +223,6 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, unsigned int conn_index = aconnector->base.index; guard(mutex)(&hdcp_w->mutex); - hdcp_w->aconnector[conn_index] = aconnector; /* the removal of display will invoke auth reset -> hdcp destroy and * we'd expect the Content Protection (CP) property changed back to @@ -236,7 +238,10 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, } mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); - + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } process_output(hdcp_w); } @@ -254,6 +259,10 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) { hdcp_w->encryption_status[conn_index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } } process_output(hdcp_w); @@ -489,6 +498,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) struct hdcp_workqueue *hdcp_work = handle; struct amdgpu_dm_connector *aconnector = config->dm_stream_ctx; int link_index = aconnector->dc_link->link_index; + unsigned int conn_index = aconnector->base.index; struct mod_hdcp_display *display = &hdcp_work[link_index].display; struct mod_hdcp_link *link = &hdcp_work[link_index].link; struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; @@ -545,7 +555,10 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) guard(mutex)(&hdcp_w->mutex); mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output); - + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = aconnector; process_output(hdcp_w); } From 3efb29f6a78d4746f958c1ab6cd7981c5762f03b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Aug 2024 15:44:07 +0300 Subject: [PATCH 126/129] usb: xhci: Check for xhci->interrupters being allocated in xhci_mem_clearup() commit dcdb52d948f3a17ccd3fce757d9bd981d7c32039 upstream. If xhci_mem_init() fails, it calls into xhci_mem_cleanup() to mop up the damage. If it fails early enough, before xhci->interrupters is allocated but after xhci->max_interrupters has been set, which happens in most (all?) cases, things get uglier, as xhci_mem_cleanup() unconditionally derefences xhci->interrupters. With prejudice. Gate the interrupt freeing loop with a check on xhci->interrupters being non-NULL. Found while debugging a DMA allocation issue that led the XHCI driver on this exact path. Fixes: c99b38c41234 ("xhci: add support to allocate several interrupters") Cc: Mathias Nyman Cc: Wesley Cheng Cc: Greg Kroah-Hartman Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org # 6.8+ Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240809124408.505786-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 2c44855be75e..ff732ddc6e8c 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1882,7 +1882,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) cancel_delayed_work_sync(&xhci->cmd_timer); - for (i = 0; i < xhci->max_interrupters; i++) { + for (i = 0; xhci->interrupters && i < xhci->max_interrupters; i++) { if (xhci->interrupters[i]) { xhci_remove_interrupter(xhci, xhci->interrupters[i]); xhci_free_interrupter(xhci, xhci->interrupters[i]); From fe465003bb63df8d5ea15fc17ff6aa9a5fe7daeb Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 25 Jan 2024 17:27:34 +0200 Subject: [PATCH 127/129] xhci: fix possible null pointer dereference at secondary interrupter removal commit a54a594d72f25b08f39d743880a76721fba9ae77 upstream. Don't try to remove a secondary interrupter that is known to be invalid. Also check if the interrupter is valid inside the spinlock that protects the array of interrupters. Found by smatch static checker Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-usb/ffaa0a1b-5984-4a1f-bfd3-9184630a97b9@moroto.mountain/ Fixes: c99b38c41234 ("xhci: add support to allocate several interrupters") Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index ff732ddc6e8c..22cca89efbfd 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1855,15 +1855,15 @@ void xhci_remove_secondary_interrupter(struct usb_hcd *hcd, struct xhci_interrup struct xhci_hcd *xhci = hcd_to_xhci(hcd); unsigned int intr_num; - /* interrupter 0 is primary interrupter, don't touch it */ - if (!ir || !ir->intr_num || ir->intr_num >= xhci->max_interrupters) - xhci_dbg(xhci, "Invalid secondary interrupter, can't remove\n"); - - /* fixme, should we check xhci->interrupter[intr_num] == ir */ - /* fixme locking */ - spin_lock_irq(&xhci->lock); + /* interrupter 0 is primary interrupter, don't touch it */ + if (!ir || !ir->intr_num || ir->intr_num >= xhci->max_interrupters) { + xhci_dbg(xhci, "Invalid secondary interrupter, can't remove\n"); + spin_unlock_irq(&xhci->lock); + return; + } + intr_num = ir->intr_num; xhci_remove_interrupter(xhci, ir); From ed3248a403740a623c73afd95f88cc37e0cd3ad2 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 6 May 2025 11:31:50 +0000 Subject: [PATCH 128/129] dm: fix copying after src array boundaries commit f1aff4bc199cb92c055668caed65505e3b4d2656 upstream. The blammed commit copied to argv the size of the reallocated argv, instead of the size of the old_argv, thus reading and copying from past the old_argv allocated memory. Following BUG_ON was hit: [ 3.038929][ T1] kernel BUG at lib/string_helpers.c:1040! [ 3.039147][ T1] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP ... [ 3.056489][ T1] Call trace: [ 3.056591][ T1] __fortify_panic+0x10/0x18 (P) [ 3.056773][ T1] dm_split_args+0x20c/0x210 [ 3.056942][ T1] dm_table_add_target+0x13c/0x360 [ 3.057132][ T1] table_load+0x110/0x3ac [ 3.057292][ T1] dm_ctl_ioctl+0x424/0x56c [ 3.057457][ T1] __arm64_sys_ioctl+0xa8/0xec [ 3.057634][ T1] invoke_syscall+0x58/0x10c [ 3.057804][ T1] el0_svc_common+0xa8/0xdc [ 3.057970][ T1] do_el0_svc+0x1c/0x28 [ 3.058123][ T1] el0_svc+0x50/0xac [ 3.058266][ T1] el0t_64_sync_handler+0x60/0xc4 [ 3.058452][ T1] el0t_64_sync+0x1b0/0x1b4 [ 3.058620][ T1] Code: f800865e a9bf7bfd 910003fd 941f48aa (d4210000) [ 3.058897][ T1] ---[ end trace 0000000000000000 ]--- [ 3.059083][ T1] Kernel panic - not syncing: Oops - BUG: Fatal exception Fix it by copying the size of src, and not the size of dst, as it was. Fixes: 5a2a6c428190 ("dm: always update the array size in realloc_argv on success") Cc: stable@vger.kernel.org Signed-off-by: Tudor Ambarus Signed-off-by: Mikulas Patocka Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index c32ae18a8ca3..319bd10548e9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -501,9 +501,9 @@ static char **realloc_argv(unsigned int *size, char **old_argv) } argv = kmalloc_array(new_size, sizeof(*argv), gfp); if (argv) { - *size = new_size; if (old_argv) memcpy(argv, old_argv, *size * sizeof(*argv)); + *size = new_size; } kfree(old_argv); From 9c2dd8954dad0430e83ee55b985ba55070e50cf7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 9 May 2025 09:44:08 +0200 Subject: [PATCH 129/129] Linux 6.6.90 Link: https://lore.kernel.org/r/20250507183813.500572371@linuxfoundation.org Tested-by: Nam Cao Tested-by: Miguel Ojeda Tested-by: Shuah Khan Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20250508112618.875786933@linuxfoundation.org Tested-by: Peter Schneider Tested-by: Ryan Matthews Tested-by: Miguel Ojeda Tested-by: Jon Hunter Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 23e90df5785c..587a1586e76d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 89 +SUBLEVEL = 90 EXTRAVERSION = NAME = Pinguïn Aangedreven