From 3cdbf62209b86f262584d9864882b8df728727aa Mon Sep 17 00:00:00 2001 From: Christian Heusel Date: Thu, 24 Apr 2025 16:00:28 +0200 Subject: [PATCH 01/97] Revert "rndis_host: Flag RNDIS modems as WWAN devices" commit 765f253e28909f161b0211f85cf0431cfee7d6df upstream. This reverts commit 67d1a8956d2d62fe6b4c13ebabb57806098511d8. Since this commit has been proven to be problematic for the setup of USB-tethered ethernet connections and the related breakage is very noticeable for users it should be reverted until a fixed version of the change can be rolled out. Closes: https://lore.kernel.org/all/e0df2d85-1296-4317-b717-bd757e3ab928@heusel.eu/ Link: https://chaos.social/@gromit/114377862699921553 Link: https://bugzilla.kernel.org/show_bug.cgi?id=220002 Link: https://bugs.gentoo.org/953555 Link: https://bbs.archlinux.org/viewtopic.php?id=304892 Cc: stable@vger.kernel.org Acked-by: Lubomir Rintel Signed-off-by: Christian Heusel Link: https://patch.msgid.link/20250424-usb-tethering-fix-v1-1-b65cf97c740e@heusel.eu Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/rndis_host.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index bb0bf1415872..7b3739b29c8f 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -630,16 +630,6 @@ static const struct driver_info zte_rndis_info = { .tx_fixup = rndis_tx_fixup, }; -static const struct driver_info wwan_rndis_info = { - .description = "Mobile Broadband RNDIS device", - .flags = FLAG_WWAN | FLAG_POINTTOPOINT | FLAG_FRAMING_RN | FLAG_NO_SETINT, - .bind = rndis_bind, - .unbind = rndis_unbind, - .status = rndis_status, - .rx_fixup = rndis_rx_fixup, - .tx_fixup = rndis_tx_fixup, -}; - /*-------------------------------------------------------------------------*/ static const struct usb_device_id products [] = { @@ -676,11 +666,9 @@ static const struct usb_device_id products [] = { USB_INTERFACE_INFO(USB_CLASS_WIRELESS_CONTROLLER, 1, 3), .driver_info = (unsigned long) &rndis_info, }, { - /* Mobile Broadband Modem, seen in Novatel Verizon USB730L and - * Telit FN990A (RNDIS) - */ + /* Novatel Verizon USB730L */ USB_INTERFACE_INFO(USB_CLASS_MISC, 4, 1), - .driver_info = (unsigned long)&wwan_rndis_info, + .driver_info = (unsigned long) &rndis_info, }, { }, // END }; From 6649a1dbcd2c5f6c2627044ded73550dddb45282 Mon Sep 17 00:00:00 2001 From: Joachim Priesner Date: Mon, 28 Apr 2025 07:36:06 +0200 Subject: [PATCH 02/97] ALSA: usb-audio: Add second USB ID for Jabra Evolve 65 headset commit 1149719442d28c96dc63cad432b5a6db7c300e1a upstream. There seem to be multiple USB device IDs used for these; the one I have reports as 0b0e:030c when powered on. (When powered off, it reports as 0b0e:0311.) Signed-off-by: Joachim Priesner Cc: Link: https://patch.msgid.link/20250428053606.9237-1-joachim.priesner@web.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/format.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/format.c b/sound/usb/format.c index 3b3a5ea6fcbf..f33d25a4e4cc 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -263,7 +263,8 @@ static int parse_audio_format_rates_v1(struct snd_usb_audio *chip, struct audiof } /* Jabra Evolve 65 headset */ - if (chip->usb_id == USB_ID(0x0b0e, 0x030b)) { + if (chip->usb_id == USB_ID(0x0b0e, 0x030b) || + chip->usb_id == USB_ID(0x0b0e, 0x030c)) { /* only 48kHz for playback while keeping 16kHz for capture */ if (fp->nr_rates != 1) return set_fixed_rate(fp, 48000, SNDRV_PCM_RATE_48000); From 47ca11836c35c5698088fd87f7fb4b0ffa217e17 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Tue, 15 Apr 2025 14:19:00 +0200 Subject: [PATCH 03/97] drm/nouveau: Fix WARN_ON in nouveau_fence_context_kill() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bbe5679f30d7690a9b6838a583b9690ea73fe0e9 upstream. Nouveau is mostly designed in a way that it's expected that fences only ever get signaled through nouveau_fence_signal(). However, in at least one other place, nouveau_fence_done(), can signal fences, too. If that happens (race) a signaled fence remains in the pending list for a while, until it gets removed by nouveau_fence_update(). Should nouveau_fence_context_kill() run in the meantime, this would be a bug because the function would attempt to set an error code on an already signaled fence. Have nouveau_fence_context_kill() check for a fence being signaled. Cc: stable@vger.kernel.org # v5.10+ Fixes: ea13e5abf807 ("drm/nouveau: signal pending fences when channel has been killed") Suggested-by: Christian König Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250415121900.55719-3-phasta@kernel.org Signed-off-by: Danilo Krummrich Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index abcac7db4347..c84f4b9dbb2c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -95,7 +95,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) while (!list_empty(&fctx->pending)) { fence = list_entry(fctx->pending.next, typeof(*fence), head); - if (error) + if (error && !dma_fence_is_signaled_locked(&fence->base)) dma_fence_set_error(&fence->base, error); if (nouveau_fence_signal(fence)) From 17050592870e44d41f13519ce8e62db443a3fd9e Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Fri, 25 Apr 2025 07:26:39 -0700 Subject: [PATCH 04/97] EDAC/altera: Test the correct error reg offset commit 4fb7b8fceb0beebbe00712c3daf49ade0386076a upstream. Test correct structure member, ecc_cecnt_offset, before using it. [ bp: Massage commit message. ] Fixes: 73bcc942f427 ("EDAC, altera: Add Arria10 EDAC support") Signed-off-by: Niravkumar L Rabara Signed-off-by: Matthew Gerlach Signed-off-by: Borislav Petkov (AMD) Acked-by: Dinh Nguyen Cc: stable@kernel.org Link: https://lore.kernel.org/20250425142640.33125-2-matthew.gerlach@altera.com Signed-off-by: Greg Kroah-Hartman --- drivers/edac/altera_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index e7e8e624a436..1cde19ba3a9f 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -98,7 +98,7 @@ static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id) if (status & priv->ecc_stat_ce_mask) { regmap_read(drvdata->mc_vbase, priv->ecc_saddr_offset, &err_addr); - if (priv->ecc_uecnt_offset) + if (priv->ecc_cecnt_offset) regmap_read(drvdata->mc_vbase, priv->ecc_cecnt_offset, &err_count); edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count, From a62780b542a1cedf70ad536bb24806f057e6d1ca Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Fri, 25 Apr 2025 07:26:40 -0700 Subject: [PATCH 05/97] EDAC/altera: Set DDR and SDMMC interrupt mask before registration commit 6dbe3c5418c4368e824bff6ae4889257dd544892 upstream. Mask DDR and SDMMC in probe function to avoid spurious interrupts before registration. Removed invalid register write to system manager. Fixes: 1166fde93d5b ("EDAC, altera: Add Arria10 ECC memory init functions") Signed-off-by: Niravkumar L Rabara Signed-off-by: Matthew Gerlach Signed-off-by: Borislav Petkov (AMD) Acked-by: Dinh Nguyen Cc: stable@kernel.org Link: https://lore.kernel.org/20250425142640.33125-3-matthew.gerlach@altera.com Signed-off-by: Greg Kroah-Hartman --- drivers/edac/altera_edac.c | 7 ++++--- drivers/edac/altera_edac.h | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 1cde19ba3a9f..f865139e43db 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1015,9 +1015,6 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, } } - /* Interrupt mode set to every SBERR */ - regmap_write(ecc_mgr_map, ALTR_A10_ECC_INTMODE_OFST, - ALTR_A10_ECC_INTMODE); /* Enable ECC */ ecc_set_bits(ecc_ctrl_en_mask, (ecc_block_base + ALTR_A10_ECC_CTRL_OFST)); @@ -2138,6 +2135,10 @@ static int altr_edac_a10_probe(struct platform_device *pdev) return PTR_ERR(edac->ecc_mgr_map); } + /* Set irq mask for DDR SBE to avoid any pending irq before registration */ + regmap_write(edac->ecc_mgr_map, A10_SYSMGR_ECC_INTMASK_SET_OFST, + (A10_SYSMGR_ECC_INTMASK_SDMMCB | A10_SYSMGR_ECC_INTMASK_DDR0)); + edac->irq_chip.name = pdev->dev.of_node->name; edac->irq_chip.irq_mask = a10_eccmgr_irq_mask; edac->irq_chip.irq_unmask = a10_eccmgr_irq_unmask; diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 3727e72c8c2e..7248d24c4908 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -249,6 +249,8 @@ struct altr_sdram_mc_data { #define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94 #define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 #define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1) +#define A10_SYSMGR_ECC_INTMASK_SDMMCB BIT(16) +#define A10_SYSMGR_ECC_INTMASK_DDR0 BIT(17) #define A10_SYSMGR_ECC_INTSTAT_SERR_OFST 0x9C #define A10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 From b36749bb27073e7cd4723ebe9b8e9b1d2053dedc Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Mon, 21 Apr 2025 14:23:41 +0800 Subject: [PATCH 06/97] i2c: imx-lpi2c: Fix clock count when probe defers commit b1852c5de2f2a37dd4462f7837c9e3e678f9e546 upstream. Deferred probe with pm_runtime_put() may delay clock disable, causing incorrect clock usage count. Use pm_runtime_put_sync() to ensure the clock is disabled immediately. Fixes: 13d6eb20fc79 ("i2c: imx-lpi2c: add runtime pm support") Signed-off-by: Clark Wang Signed-off-by: Carlos Song Cc: # v4.16+ Link: https://lore.kernel.org/r/20250421062341.2471922-1-carlos.song@nxp.com Signed-off-by: Andi Shyti Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-imx-lpi2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index ff12018bc206..45d97d611ba1 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -616,9 +616,9 @@ static int lpi2c_imx_probe(struct platform_device *pdev) return 0; rpm_disable: - pm_runtime_put(&pdev->dev); - pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); return ret; } From 446289b8b36b2ee98dabf6388acbddcc33ed41be Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 1 May 2025 11:47:47 +0100 Subject: [PATCH 07/97] arm64: errata: Add missing sentinels to Spectre-BHB MIDR arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fee4d171451c1ad9e8aaf65fc0ab7d143a33bd72 upstream. Commit a5951389e58d ("arm64: errata: Add newer ARM cores to the spectre_bhb_loop_affected() lists") added some additional CPUs to the Spectre-BHB workaround, including some new arrays for designs that require new 'k' values for the workaround to be effective. Unfortunately, the new arrays omitted the sentinel entry and so is_midr_in_range_list() will walk off the end when it doesn't find a match. With UBSAN enabled, this leads to a crash during boot when is_midr_in_range_list() is inlined (which was more common prior to c8c2647e69be ("arm64: Make  _midr_in_range_list() an exported function")): | Internal error: aarch64 BRK: 00000000f2000001 [#1] PREEMPT SMP | pstate: 804000c5 (Nzcv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : spectre_bhb_loop_affected+0x28/0x30 | lr : is_spectre_bhb_affected+0x170/0x190 | [...] | Call trace: | spectre_bhb_loop_affected+0x28/0x30 | update_cpu_capabilities+0xc0/0x184 | init_cpu_features+0x188/0x1a4 | cpuinfo_store_boot_cpu+0x4c/0x60 | smp_prepare_boot_cpu+0x38/0x54 | start_kernel+0x8c/0x478 | __primary_switched+0xc8/0xd4 | Code: 6b09011f 54000061 52801080 d65f03c0 (d4200020) | ---[ end trace 0000000000000000 ]--- | Kernel panic - not syncing: aarch64 BRK: Fatal exception Add the missing sentinel entries. Cc: Lee Jones Cc: James Morse Cc: Doug Anderson Cc: Shameer Kolothum Cc: Reported-by: Greg Kroah-Hartman Fixes: a5951389e58d ("arm64: errata: Add newer ARM cores to the spectre_bhb_loop_affected() lists") Signed-off-by: Will Deacon Reviewed-by: Lee Jones Reviewed-by: Douglas Anderson Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250501104747.28431-1-will@kernel.org Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/proton-pack.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 93a00cd42edd..3d12fc5af87b 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -891,10 +891,12 @@ static u8 spectre_bhb_loop_affected(void) static const struct midr_range spectre_bhb_k132_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + {}, }; static const struct midr_range spectre_bhb_k38_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + {}, }; static const struct midr_range spectre_bhb_k32_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), From 6c639af49e9e5615a8395981eaf5943fb40acd6f Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 3 May 2025 18:24:01 +0200 Subject: [PATCH 08/97] parisc: Fix double SIGFPE crash commit de3629baf5a33af1919dec7136d643b0662e85ef upstream. Camm noticed that on parisc a SIGFPE exception will crash an application with a second SIGFPE in the signal handler. Dave analyzed it, and it happens because glibc uses a double-word floating-point store to atomically update function descriptors. As a result of lazy binding, we hit a floating-point store in fpe_func almost immediately. When the T bit is set, an assist exception trap occurs when when the co-processor encounters *any* floating-point instruction except for a double store of register %fr0. The latter cancels all pending traps. Let's fix this by clearing the Trap (T) bit in the FP status register before returning to the signal handler in userspace. The issue can be reproduced with this test program: root@parisc:~# cat fpe.c static void fpe_func(int sig, siginfo_t *i, void *v) { sigset_t set; sigemptyset(&set); sigaddset(&set, SIGFPE); sigprocmask(SIG_UNBLOCK, &set, NULL); printf("GOT signal %d with si_code %ld\n", sig, i->si_code); } int main() { struct sigaction action = { .sa_sigaction = fpe_func, .sa_flags = SA_RESTART|SA_SIGINFO }; sigaction(SIGFPE, &action, 0); feenableexcept(FE_OVERFLOW); return printf("%lf\n",1.7976931348623158E308*1.7976931348623158E308); } root@parisc:~# gcc fpe.c -lm root@parisc:~# ./a.out Floating point exception root@parisc:~# strace -f ./a.out execve("./a.out", ["./a.out"], 0xf9ac7034 /* 20 vars */) = 0 getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM_INFINITY}) = 0 ... rt_sigaction(SIGFPE, {sa_handler=0x1110a, sa_mask=[], sa_flags=SA_RESTART|SA_SIGINFO}, NULL, 8) = 0 --- SIGFPE {si_signo=SIGFPE, si_code=FPE_FLTOVF, si_addr=0x1078f} --- --- SIGFPE {si_signo=SIGFPE, si_code=FPE_FLTOVF, si_addr=0xf8f21237} --- +++ killed by SIGFPE +++ Floating point exception Signed-off-by: Helge Deller Suggested-by: John David Anglin Reported-by: Camm Maguire Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/parisc/math-emu/driver.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c index 6ce427b58836..ecd27b48d61f 100644 --- a/arch/parisc/math-emu/driver.c +++ b/arch/parisc/math-emu/driver.c @@ -103,9 +103,19 @@ handle_fpe(struct pt_regs *regs) memcpy(regs->fr, frcopy, sizeof regs->fr); if (signalcode != 0) { - force_sig_fault(signalcode >> 24, signalcode & 0xffffff, - (void __user *) regs->iaoq[0]); - return -1; + int sig = signalcode >> 24; + + if (sig == SIGFPE) { + /* + * Clear floating point trap bit to avoid trapping + * again on the first floating-point instruction in + * the userspace signal handler. + */ + regs->fr[0] &= ~(1ULL << 38); + } + force_sig_fault(sig, signalcode & 0xffffff, + (void __user *) regs->iaoq[0]); + return -1; } return signalcode ? -1 : 0; From 160153cf9e4aa875ad086cc094ce34aac8e13d63 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 25 Apr 2025 17:13:55 -0700 Subject: [PATCH 09/97] perf/x86/intel: KVM: Mask PEBS_ENABLE loaded for guest with vCPU's value. commit 58f6217e5d0132a9f14e401e62796916aa055c1b upstream. When generating the MSR_IA32_PEBS_ENABLE value that will be loaded on VM-Entry to a KVM guest, mask the value with the vCPU's desired PEBS_ENABLE value. Consulting only the host kernel's host vs. guest masks results in running the guest with PEBS enabled even when the guest doesn't want to use PEBS. Because KVM uses perf events to proxy the guest virtual PMU, simply looking at exclude_host can't differentiate between events created by host userspace, and events created by KVM on behalf of the guest. Running the guest with PEBS unexpectedly enabled typically manifests as crashes due to a near-infinite stream of #PFs. E.g. if the guest hasn't written MSR_IA32_DS_AREA, the CPU will hit page faults on address '0' when trying to record PEBS events. The issue is most easily reproduced by running `perf kvm top` from before commit 7b100989b4f6 ("perf evlist: Remove __evlist__add_default") (after which, `perf kvm top` effectively stopped using PEBS). The userspace side of perf creates a guest-only PEBS event, which intel_guest_get_msrs() misconstrues a guest-*owned* PEBS event. Arguably, this is a userspace bug, as enabling PEBS on guest-only events simply cannot work, and userspace can kill VMs in many other ways (there is no danger to the host). However, even if this is considered to be bad userspace behavior, there's zero downside to perf/KVM restricting PEBS to guest-owned events. Note, commit 854250329c02 ("KVM: x86/pmu: Disable guest PEBS temporarily in two rare situations") fixed the case where host userspace is profiling KVM *and* userspace, but missed the case where userspace is profiling only KVM. Fixes: c59a1f106f5c ("KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS") Closes: https://lore.kernel.org/all/Z_VUswFkWiTYI0eD@do-x1carbon Reported-by: Seth Forshee Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dapeng Mi Tested-by: "Seth Forshee (DigitalOcean)" Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250426001355.1026530-1-seanjc@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 46677cad3a39..b380fc5ea088 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4177,7 +4177,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) arr[pebs_enable] = (struct perf_guest_switch_msr){ .msr = MSR_IA32_PEBS_ENABLE, .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask, - .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask, + .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask & kvm_pmu->pebs_enable, }; if (arr[pebs_enable].host) { From bdabd4ee04a6e583a363d5ea960263ba00259d1f Mon Sep 17 00:00:00 2001 From: Vishal Badole Date: Thu, 24 Apr 2025 18:32:48 +0530 Subject: [PATCH 10/97] amd-xgbe: Fix to ensure dependent features are toggled with RX checksum offload commit f04dd30f1bef1ed2e74a4050af6e5e5e3869bac3 upstream. According to the XGMAC specification, enabling features such as Layer 3 and Layer 4 Packet Filtering, Split Header and Virtualized Network support automatically selects the IPC Full Checksum Offload Engine on the receive side. When RX checksum offload is disabled, these dependent features must also be disabled to prevent abnormal behavior caused by mismatched feature dependencies. Ensure that toggling RX checksum offload (disabling or enabling) properly disables or enables all dependent features, maintaining consistent and expected behavior in the network device. Cc: stable@vger.kernel.org Fixes: 1a510ccf5869 ("amd-xgbe: Add support for VXLAN offload capabilities") Signed-off-by: Vishal Badole Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250424130248.428865-1-Vishal.Badole@amd.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amd/xgbe/xgbe-desc.c | 9 +++++++-- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 24 +++++++++++++++++++++-- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 11 +++++++++-- drivers/net/ethernet/amd/xgbe/xgbe.h | 4 ++++ 4 files changed, 42 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index 230726d7b74f..d41b58fad37b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -373,8 +373,13 @@ static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata, } /* Set up the header page info */ - xgbe_set_buffer_data(&rdata->rx.hdr, &ring->rx_hdr_pa, - XGBE_SKB_ALLOC_SIZE); + if (pdata->netdev->features & NETIF_F_RXCSUM) { + xgbe_set_buffer_data(&rdata->rx.hdr, &ring->rx_hdr_pa, + XGBE_SKB_ALLOC_SIZE); + } else { + xgbe_set_buffer_data(&rdata->rx.hdr, &ring->rx_hdr_pa, + pdata->rx_buf_size); + } /* Set up the buffer page info */ xgbe_set_buffer_data(&rdata->rx.buf, &ring->rx_buf_pa, diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 4030d619e84f..3cf7943b590c 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -320,6 +320,18 @@ static void xgbe_config_sph_mode(struct xgbe_prv_data *pdata) XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE); } +static void xgbe_disable_sph_mode(struct xgbe_prv_data *pdata) +{ + unsigned int i; + + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->rx_ring) + break; + + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 0); + } +} + static int xgbe_write_rss_reg(struct xgbe_prv_data *pdata, unsigned int type, unsigned int index, unsigned int val) { @@ -3495,8 +3507,12 @@ static int xgbe_init(struct xgbe_prv_data *pdata) xgbe_config_tx_coalesce(pdata); xgbe_config_rx_buffer_size(pdata); xgbe_config_tso_mode(pdata); - xgbe_config_sph_mode(pdata); - xgbe_config_rss(pdata); + + if (pdata->netdev->features & NETIF_F_RXCSUM) { + xgbe_config_sph_mode(pdata); + xgbe_config_rss(pdata); + } + desc_if->wrapper_tx_desc_init(pdata); desc_if->wrapper_rx_desc_init(pdata); xgbe_enable_dma_interrupts(pdata); @@ -3650,5 +3666,9 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if) hw_if->disable_vxlan = xgbe_disable_vxlan; hw_if->set_vxlan_id = xgbe_set_vxlan_id; + /* For Split Header*/ + hw_if->enable_sph = xgbe_config_sph_mode; + hw_if->disable_sph = xgbe_disable_sph_mode; + DBGPR("<--xgbe_init_function_ptrs\n"); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 6b73648b3779..34d45cebefb5 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2257,10 +2257,17 @@ static int xgbe_set_features(struct net_device *netdev, if (ret) return ret; - if ((features & NETIF_F_RXCSUM) && !rxcsum) + if ((features & NETIF_F_RXCSUM) && !rxcsum) { + hw_if->enable_sph(pdata); + hw_if->enable_vxlan(pdata); hw_if->enable_rx_csum(pdata); - else if (!(features & NETIF_F_RXCSUM) && rxcsum) + schedule_work(&pdata->restart_work); + } else if (!(features & NETIF_F_RXCSUM) && rxcsum) { + hw_if->disable_sph(pdata); + hw_if->disable_vxlan(pdata); hw_if->disable_rx_csum(pdata); + schedule_work(&pdata->restart_work); + } if ((features & NETIF_F_HW_VLAN_CTAG_RX) && !rxvlan) hw_if->enable_rx_vlan_stripping(pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 7a41367c437d..b17c7d1dc4b0 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -859,6 +859,10 @@ struct xgbe_hw_if { void (*enable_vxlan)(struct xgbe_prv_data *); void (*disable_vxlan)(struct xgbe_prv_data *); void (*set_vxlan_id)(struct xgbe_prv_data *); + + /* For Split Header */ + void (*enable_sph)(struct xgbe_prv_data *pdata); + void (*disable_sph)(struct xgbe_prv_data *pdata); }; /* This structure represents implementation specific routines for an From 45aced97f01d5ab14c8a2a60f6748f18c501c3f5 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Fri, 2 May 2025 13:22:28 +0200 Subject: [PATCH 11/97] irqchip/qcom-mpm: Prevent crash when trying to handle non-wake GPIOs commit 38a05c0b87833f5b188ae43b428b1f792df2b384 upstream. On Qualcomm chipsets not all GPIOs are wakeup capable. Those GPIOs do not have a corresponding MPM pin and should not be handled inside the MPM driver. The IRQ domain hierarchy is always applied, so it's required to explicitly disconnect the hierarchy for those. The pinctrl-msm driver marks these with GPIO_NO_WAKE_IRQ. qcom-pdc has a check for this, but irq-qcom-mpm is currently missing the check. This is causing crashes when setting up interrupts for non-wake GPIOs: root@rb1:~# gpiomon -c gpiochip1 10 irq: IRQ159: trimming hierarchy from :soc@0:interrupt-controller@f200000-1 Unable to handle kernel paging request at virtual address ffff8000a1dc3820 Hardware name: Qualcomm Technologies, Inc. Robotics RB1 (DT) pc : mpm_set_type+0x80/0xcc lr : mpm_set_type+0x5c/0xcc Call trace: mpm_set_type+0x80/0xcc (P) qcom_mpm_set_type+0x64/0x158 irq_chip_set_type_parent+0x20/0x38 msm_gpio_irq_set_type+0x50/0x530 __irq_set_trigger+0x60/0x184 __setup_irq+0x304/0x6bc request_threaded_irq+0xc8/0x19c edge_detector_setup+0x260/0x364 linereq_create+0x420/0x5a8 gpio_ioctl+0x2d4/0x6c0 Fix this by copying the check for GPIO_NO_WAKE_IRQ from qcom-pdc.c, so that MPM is removed entirely from the hierarchy for non-wake GPIOs. Fixes: a6199bb514d8 ("irqchip: Add Qualcomm MPM controller driver") Reported-by: Alexey Klimov Signed-off-by: Stephan Gerhold Signed-off-by: Thomas Gleixner Tested-by: Alexey Klimov Reviewed-by: Bartosz Golaszewski Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250502-irq-qcom-mpm-fix-no-wake-v1-1-8a1eafcd28d4@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-qcom-mpm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c index d30614661eea..775ab17e51f9 100644 --- a/drivers/irqchip/irq-qcom-mpm.c +++ b/drivers/irqchip/irq-qcom-mpm.c @@ -226,6 +226,9 @@ static int qcom_mpm_alloc(struct irq_domain *domain, unsigned int virq, if (ret) return ret; + if (pin == GPIO_NO_WAKE_IRQ) + return irq_domain_disconnect_hierarchy(domain, virq); + ret = irq_domain_set_hwirq_and_chip(domain, virq, pin, &qcom_mpm_chip, priv); if (ret) From 3eaa5e363012eba9fa21c8b368a1737768754ed3 Mon Sep 17 00:00:00 2001 From: Ruslan Piasetskyi Date: Wed, 26 Mar 2025 23:06:38 +0100 Subject: [PATCH 12/97] mmc: renesas_sdhi: Fix error handling in renesas_sdhi_probe commit 649b50a82f09fa44c2f7a65618e4584072145ab7 upstream. After moving tmio_mmc_host_probe down, error handling has to be adjusted. Fixes: 74f45de394d9 ("mmc: renesas_sdhi: register irqs before registering controller") Reviewed-by: Ihar Salauyou Signed-off-by: Ruslan Piasetskyi Reviewed-by: Geert Uytterhoeven Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250326220638.460083-1-ruslan.piasetskyi@gmail.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/renesas_sdhi_core.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 7572c5714b46..d0a15645a0b8 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -1109,26 +1109,26 @@ int renesas_sdhi_probe(struct platform_device *pdev, num_irqs = platform_irq_count(pdev); if (num_irqs < 0) { ret = num_irqs; - goto eirq; + goto edisclk; } /* There must be at least one IRQ source */ if (!num_irqs) { ret = -ENXIO; - goto eirq; + goto edisclk; } for (i = 0; i < num_irqs; i++) { irq = platform_get_irq(pdev, i); if (irq < 0) { ret = irq; - goto eirq; + goto edisclk; } ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_irq, 0, dev_name(&pdev->dev), host); if (ret) - goto eirq; + goto edisclk; } ret = tmio_mmc_host_probe(host); @@ -1140,8 +1140,6 @@ int renesas_sdhi_probe(struct platform_device *pdev, return ret; -eirq: - tmio_mmc_host_remove(host); edisclk: renesas_sdhi_clk_disable(host); efree: From 524b70441baba453b193c418e3142bd31059cc1f Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Tue, 22 Apr 2025 12:22:02 +0800 Subject: [PATCH 13/97] wifi: brcm80211: fmac: Add error handling for brcmf_usb_dl_writeimage() commit 8e089e7b585d95122c8122d732d1d5ef8f879396 upstream. The function brcmf_usb_dl_writeimage() calls the function brcmf_usb_dl_cmd() but dose not check its return value. The 'state.state' and the 'state.bytes' are uninitialized if the function brcmf_usb_dl_cmd() fails. It is dangerous to use uninitialized variables in the conditions. Add error handling for brcmf_usb_dl_cmd() to jump to error handling path if the brcmf_usb_dl_cmd() fails and the 'state.state' and the 'state.bytes' are uninitialized. Improve the error message to report more detailed error information. Fixes: 71bb244ba2fd ("brcm80211: fmac: add USB support for bcm43235/6/8 chipsets") Cc: stable@vger.kernel.org # v3.4+ Signed-off-by: Wentao Liang Acked-by: Arend van Spriel Link: https://patch.msgid.link/20250422042203.2259-1-vulab@iscas.ac.cn Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index 85e18fb9c497..92b0839970c2 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -903,14 +903,16 @@ brcmf_usb_dl_writeimage(struct brcmf_usbdev_info *devinfo, u8 *fw, int fwlen) } /* 1) Prepare USB boot loader for runtime image */ - brcmf_usb_dl_cmd(devinfo, DL_START, &state, sizeof(state)); + err = brcmf_usb_dl_cmd(devinfo, DL_START, &state, sizeof(state)); + if (err) + goto fail; rdlstate = le32_to_cpu(state.state); rdlbytes = le32_to_cpu(state.bytes); /* 2) Check we are in the Waiting state */ if (rdlstate != DL_WAITING) { - brcmf_err("Failed to DL_START\n"); + brcmf_err("Invalid DL state: %u\n", rdlstate); err = -EINVAL; goto fail; } From 12351db6c325ec74160ea176ca0b7b7ae678633f Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 22 Apr 2025 21:18:33 +0200 Subject: [PATCH 14/97] dm-integrity: fix a warning on invalid table line commit 0a533c3e4246c29d502a7e0fba0e86d80a906b04 upstream. If we use the 'B' mode and we have an invalit table line, cancel_delayed_work_sync would trigger a warning. This commit avoids the warning. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index c1f1f4ad608d..efd0732a8c10 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4637,7 +4637,7 @@ static void dm_integrity_dtr(struct dm_target *ti) BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); BUG_ON(!list_empty(&ic->wait_list)); - if (ic->mode == 'B') + if (ic->mode == 'B' && ic->bitmap_flush_work.work.func) cancel_delayed_work_sync(&ic->bitmap_flush_work); if (ic->metadata_wq) destroy_workqueue(ic->metadata_wq); From 2dd94484415dde4da0f5c40ff2809d9ef4a01935 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 15 Apr 2025 00:17:16 -0400 Subject: [PATCH 15/97] dm: always update the array size in realloc_argv on success commit 5a2a6c428190f945c5cbf5791f72dbea83e97f66 upstream. realloc_argv() was only updating the array size if it was called with old_argv already allocated. The first time it was called to create an argv array, it would allocate the array but return the array size as zero. dm_split_args() would think that it couldn't store any arguments in the array and would call realloc_argv() again, causing it to reallocate the initial slots (this time using GPF_KERNEL) and finally return a size. Aside from being wasteful, this could cause deadlocks on targets that need to process messages without starting new IO. Instead, realloc_argv should always update the allocated array size on success. Fixes: a0651926553c ("dm table: don't copy from a NULL pointer in realloc_argv()") Cc: stable@vger.kernel.org Signed-off-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-table.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index aabb2435070b..052ef081fcdf 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -502,9 +502,10 @@ static char **realloc_argv(unsigned int *size, char **old_argv) gfp = GFP_NOIO; } argv = kmalloc_array(new_size, sizeof(*argv), gfp); - if (argv && old_argv) { - memcpy(argv, old_argv, *size * sizeof(*argv)); + if (argv) { *size = new_size; + if (old_argv) + memcpy(argv, old_argv, *size * sizeof(*argv)); } kfree(old_argv); From c3f37faa71f5d26dd2144b3f2b14525ec8f5e41f Mon Sep 17 00:00:00 2001 From: Pavel Paklov Date: Tue, 25 Mar 2025 09:22:44 +0000 Subject: [PATCH 16/97] iommu/amd: Fix potential buffer overflow in parse_ivrs_acpihid commit 8dee308e4c01dea48fc104d37f92d5b58c50b96c upstream. There is a string parsing logic error which can lead to an overflow of hid or uid buffers. Comparing ACPIID_LEN against a total string length doesn't take into account the lengths of individual hid and uid buffers so the check is insufficient in some cases. For example if the length of hid string is 4 and the length of the uid string is 260, the length of str will be equal to ACPIID_LEN + 1 but uid string will overflow uid buffer which size is 256. The same applies to the hid string with length 13 and uid string with length 250. Check the length of hid and uid strings separately to prevent buffer overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter") Cc: stable@vger.kernel.org Signed-off-by: Pavel Paklov Link: https://lore.kernel.org/r/20250325092259.392844-1-Pavel.Paklov@cyberprotect.ru Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd/init.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index c9598c506ff9..bc78e8665551 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3610,6 +3610,14 @@ found: while (*uid == '0' && *(uid + 1)) uid++; + if (strlen(hid) >= ACPIHID_HID_LEN) { + pr_err("Invalid command line: hid is too long\n"); + return 1; + } else if (strlen(uid) >= ACPIHID_UID_LEN) { + pr_err("Invalid command line: uid is too long\n"); + return 1; + } + i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); From 810947bfbb56e65488649dc08adb14e228f27947 Mon Sep 17 00:00:00 2001 From: Mingcong Bai Date: Fri, 18 Apr 2025 11:16:42 +0800 Subject: [PATCH 17/97] iommu/vt-d: Apply quirk_iommu_igfx for 8086:0044 (QM57/QS57) commit 2c8a7c66c90832432496616a9a3c07293f1364f3 upstream. On the Lenovo ThinkPad X201, when Intel VT-d is enabled in the BIOS, the kernel boots with errors related to DMAR, the graphical interface appeared quite choppy, and the system resets erratically within a minute after it booted: DMAR: DRHD: handling fault status reg 3 DMAR: [DMA Write NO_PASID] Request device [00:02.0] fault addr 0xb97ff000 [fault reason 0x05] PTE Write access is not set Upon comparing boot logs with VT-d on/off, I found that the Intel Calpella quirk (`quirk_calpella_no_shadow_gtt()') correctly applied the igfx IOMMU disable/quirk correctly: pci 0000:00:00.0: DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics Whereas with VT-d on, it went into the "else" branch, which then triggered the DMAR handling fault above: ... else if (!disable_igfx_iommu) { /* we have to ensure the gfx device is idle before we flush */ pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); iommu_set_dma_strict(); } Now, this is not exactly scientific, but moving 0x0044 to quirk_iommu_igfx seems to have fixed the aforementioned issue. Running a few `git blame' runs on the function, I have found that the quirk was originally introduced as a fix specific to ThinkPad X201: commit 9eecabcb9a92 ("intel-iommu: Abort IOMMU setup for igfx if BIOS gave no shadow GTT space") Which was later revised twice to the "else" branch we saw above: - 2011: commit 6fbcfb3e467a ("intel-iommu: Workaround IOTLB hang on Ironlake GPU") - 2024: commit ba00196ca41c ("iommu/vt-d: Decouple igfx_off from graphic identity mapping") I'm uncertain whether further testings on this particular laptops were done in 2011 and (honestly I'm not sure) 2024, but I would be happy to do some distro-specific testing if that's what would be required to verify this patch. P.S., I also see IDs 0x0040, 0x0062, and 0x006a listed under the same `quirk_calpella_no_shadow_gtt()' quirk, but I'm not sure how similar these chipsets are (if they share the same issue with VT-d or even, indeed, if this issue is specific to a bug in the Lenovo BIOS). With regards to 0x0062, it seems to be a Centrino wireless card, but not a chipset? I have also listed a couple (distro and kernel) bug reports below as references (some of them are from 7-8 years ago!), as they seem to be similar issue found on different Westmere/Ironlake, Haswell, and Broadwell hardware setups. Cc: stable@vger.kernel.org Fixes: 6fbcfb3e467a ("intel-iommu: Workaround IOTLB hang on Ironlake GPU") Fixes: ba00196ca41c ("iommu/vt-d: Decouple igfx_off from graphic identity mapping") Link: https://groups.google.com/g/qubes-users/c/4NP4goUds2c?pli=1 Link: https://bugs.archlinux.org/task/65362 Link: https://bbs.archlinux.org/viewtopic.php?id=230323 Reported-by: Wenhao Sun Closes: https://bugzilla.kernel.org/show_bug.cgi?id=197029 Signed-off-by: Mingcong Bai Link: https://lore.kernel.org/r/20250415133330.12528-1-jeffbai@aosc.io Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel/iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 9a1bdfda9a9a..5847ac29d976 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4836,6 +4836,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx); +/* QM57/QS57 integrated gfx malfunctions with dmar */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx); + /* Broadwell igfx malfunctions with dmar */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx); @@ -4913,7 +4916,6 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); From cf443f314509cff8539c6d73b81e2d55bc061992 Mon Sep 17 00:00:00 2001 From: Shouye Liu Date: Thu, 17 Apr 2025 11:23:21 +0800 Subject: [PATCH 18/97] platform/x86/intel-uncore-freq: Fix missing uncore sysfs during CPU hotplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8d6955ed76e8a47115f2ea1d9c263ee6f505d737 upstream. In certain situations, the sysfs for uncore may not be present when all CPUs in a package are offlined and then brought back online after boot. This issue can occur if there is an error in adding the sysfs entry due to a memory allocation failure. Retrying to bring the CPUs online will not resolve the issue, as the uncore_cpu_mask is already set for the package before the failure condition occurs. This issue does not occur if the failure happens during module initialization, as the module will fail to load in the event of any error. To address this, ensure that the uncore_cpu_mask is not set until the successful return of uncore_freq_add_entry(). Fixes: dbce412a7733 ("platform/x86/intel-uncore-freq: Split common and enumeration part") Signed-off-by: Shouye Liu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250417032321.75580-1-shouyeliu@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- .../x86/intel/uncore-frequency/uncore-frequency.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c index 00ac7e381441..0be3087e21d5 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c @@ -121,15 +121,13 @@ static int uncore_event_cpu_online(unsigned int cpu) { struct uncore_data *data; int target; + int ret; /* Check if there is an online cpu in the package for uncore MSR */ target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu)); if (target < nr_cpu_ids) return 0; - /* Use this CPU on this die as a control CPU */ - cpumask_set_cpu(cpu, &uncore_cpu_mask); - data = uncore_get_instance(cpu); if (!data) return 0; @@ -137,7 +135,14 @@ static int uncore_event_cpu_online(unsigned int cpu) data->package_id = topology_physical_package_id(cpu); data->die_id = topology_die_id(cpu); - return uncore_freq_add_entry(data, cpu); + ret = uncore_freq_add_entry(data, cpu); + if (ret) + return ret; + + /* Use this CPU on this die as a control CPU */ + cpumask_set_cpu(cpu, &uncore_cpu_mask); + + return 0; } static int uncore_event_cpu_offline(unsigned int cpu) From e34a33d5d7e87399af0a138bb32f6a3e95dd83d2 Mon Sep 17 00:00:00 2001 From: Sean Heelan Date: Sat, 19 Apr 2025 19:59:28 +0100 Subject: [PATCH 19/97] ksmbd: fix use-after-free in kerberos authentication commit e86e9134e1d1c90a960dd57f59ce574d27b9a124 upstream. Setting sess->user = NULL was introduced to fix the dangling pointer created by ksmbd_free_user. However, it is possible another thread could be operating on the session and make use of sess->user after it has been passed to ksmbd_free_user but before sess->user is set to NULL. Cc: stable@vger.kernel.org Signed-off-by: Sean Heelan Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/auth.c | 14 +++++++++++++- fs/smb/server/smb2pdu.c | 5 ----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/smb/server/auth.c b/fs/smb/server/auth.c index 1dee600e773f..f8a192cc82f2 100644 --- a/fs/smb/server/auth.c +++ b/fs/smb/server/auth.c @@ -544,7 +544,19 @@ int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob, retval = -ENOMEM; goto out; } - sess->user = user; + + if (!sess->user) { + /* First successful authentication */ + sess->user = user; + } else { + if (!ksmbd_compare_user(sess->user, user)) { + ksmbd_debug(AUTH, "different user tried to reuse session\n"); + retval = -EPERM; + ksmbd_free_user(user); + goto out; + } + ksmbd_free_user(user); + } memcpy(sess->sess_key, resp->payload, resp->session_key_len); memcpy(out_blob, resp->payload + resp->session_key_len, diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 8c9857b36add..9b1ba4aedbce 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1615,11 +1615,6 @@ static int krb5_authenticate(struct ksmbd_work *work, if (prev_sess_id && prev_sess_id != sess->id) destroy_previous_session(conn, sess->user, prev_sess_id); - if (sess->state == SMB2_SESSION_VALID) { - ksmbd_free_user(sess->user); - sess->user = NULL; - } - retval = ksmbd_krb5_authenticate(sess, in_blob, in_len, out_blob, &out_len); if (retval) { From 681400579b88ef35e0e14fcff9c8d205ec0c71c4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 16 Apr 2025 16:12:37 +0200 Subject: [PATCH 20/97] cpufreq: Avoid using inconsistent policy->min and policy->max commit 7491cdf46b5cbdf123fc84fbe0a07e9e3d7b7620 upstream. Since cpufreq_driver_resolve_freq() can run in parallel with cpufreq_set_policy() and there is no synchronization between them, the former may access policy->min and policy->max while the latter is updating them and it may see intermediate values of them due to the way the update is carried out. Also the compiler is free to apply any optimizations it wants both to the stores in cpufreq_set_policy() and to the loads in cpufreq_driver_resolve_freq() which may result in additional inconsistencies. To address this, use WRITE_ONCE() when updating policy->min and policy->max in cpufreq_set_policy() and use READ_ONCE() for reading them in cpufreq_driver_resolve_freq(). Moreover, rearrange the update in cpufreq_set_policy() to avoid storing intermediate values in policy->min and policy->max with the help of the observation that their new values are expected to be properly ordered upfront. Also modify cpufreq_driver_resolve_freq() to take the possible reverse ordering of policy->min and policy->max, which may happen depending on the ordering of operations when this function and cpufreq_set_policy() run concurrently, into account by always honoring the max when it turns out to be less than the min (in case it comes from thermal throttling or similar). Fixes: 151717690694 ("cpufreq: Make policy min/max hard requirements") Cc: 5.16+ # 5.16+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Christian Loehle Acked-by: Viresh Kumar Link: https://patch.msgid.link/5907080.DvuYhMxLoT@rjwysocki.net Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 779ec6e510f7..fdbbc737bd56 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -532,8 +532,6 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy, { unsigned int idx; - target_freq = clamp_val(target_freq, policy->min, policy->max); - if (!policy->freq_table) return target_freq; @@ -557,7 +555,22 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy, unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, unsigned int target_freq) { - return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_LE); + unsigned int min = READ_ONCE(policy->min); + unsigned int max = READ_ONCE(policy->max); + + /* + * If this function runs in parallel with cpufreq_set_policy(), it may + * read policy->min before the update and policy->max after the update + * or the other way around, so there is no ordering guarantee. + * + * Resolve this by always honoring the max (in case it comes from + * thermal throttling or similar). + */ + if (unlikely(min > max)) + min = max; + + return __resolve_freq(policy, clamp_val(target_freq, min, max), + CPUFREQ_RELATION_LE); } EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); @@ -2283,6 +2296,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (cpufreq_disabled()) return -ENODEV; + target_freq = clamp_val(target_freq, policy->min, policy->max); target_freq = __resolve_freq(policy, target_freq, relation); pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", @@ -2573,11 +2587,15 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, * Resolve policy min/max to available frequencies. It ensures * no frequency resolution will neither overshoot the requested maximum * nor undershoot the requested minimum. + * + * Avoid storing intermediate values in policy->max or policy->min and + * compiler optimizations around them because they may be accessed + * concurrently by cpufreq_driver_resolve_freq() during the update. */ - policy->min = new_data.min; - policy->max = new_data.max; - policy->min = __resolve_freq(policy, policy->min, CPUFREQ_RELATION_L); - policy->max = __resolve_freq(policy, policy->max, CPUFREQ_RELATION_H); + WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, CPUFREQ_RELATION_H)); + new_data.min = __resolve_freq(policy, new_data.min, CPUFREQ_RELATION_L); + WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min); + trace_cpu_frequency_limits(policy); policy->cached_target_freq = UINT_MAX; From 15888cd413f8ffdba42396a764c33eff4ef2ec3c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 25 Apr 2025 13:36:21 +0200 Subject: [PATCH 21/97] cpufreq: Fix setting policy limits when frequency tables are used commit b79028039f440e7d2c4df6ab243060c4e3803e84 upstream. Commit 7491cdf46b5c ("cpufreq: Avoid using inconsistent policy->min and policy->max") overlooked the fact that policy->min and policy->max were accessed directly in cpufreq_frequency_table_target() and in the functions called by it. Consequently, the changes made by that commit led to problems with setting policy limits. Address this by passing the target frequency limits to __resolve_freq() and cpufreq_frequency_table_target() and propagating them to the functions called by the latter. Fixes: 7491cdf46b5c ("cpufreq: Avoid using inconsistent policy->min and policy->max") Cc: 5.16+ # 5.16+ Closes: https://lore.kernel.org/linux-pm/aAplED3IA_J0eZN0@linaro.org/ Reported-by: Stephan Gerhold Signed-off-by: Rafael J. Wysocki Tested-by: Stephan Gerhold Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/5896780.DvuYhMxLoT@rjwysocki.net Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq.c | 22 +++++--- drivers/cpufreq/cpufreq_ondemand.c | 3 +- drivers/cpufreq/freq_table.c | 6 +-- include/linux/cpufreq.h | 83 +++++++++++++++++++----------- 4 files changed, 73 insertions(+), 41 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index fdbbc737bd56..3f35ce19c7b6 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -528,14 +528,18 @@ void cpufreq_disable_fast_switch(struct cpufreq_policy *policy) EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); static unsigned int __resolve_freq(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) + unsigned int target_freq, + unsigned int min, unsigned int max, + unsigned int relation) { unsigned int idx; + target_freq = clamp_val(target_freq, min, max); + if (!policy->freq_table) return target_freq; - idx = cpufreq_frequency_table_target(policy, target_freq, relation); + idx = cpufreq_frequency_table_target(policy, target_freq, min, max, relation); policy->cached_resolved_idx = idx; policy->cached_target_freq = target_freq; return policy->freq_table[idx].frequency; @@ -569,8 +573,7 @@ unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, if (unlikely(min > max)) min = max; - return __resolve_freq(policy, clamp_val(target_freq, min, max), - CPUFREQ_RELATION_LE); + return __resolve_freq(policy, target_freq, min, max, CPUFREQ_RELATION_LE); } EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); @@ -2296,8 +2299,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (cpufreq_disabled()) return -ENODEV; - target_freq = clamp_val(target_freq, policy->min, policy->max); - target_freq = __resolve_freq(policy, target_freq, relation); + target_freq = __resolve_freq(policy, target_freq, policy->min, + policy->max, relation); pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", policy->cpu, target_freq, relation, old_target_freq); @@ -2592,8 +2595,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, * compiler optimizations around them because they may be accessed * concurrently by cpufreq_driver_resolve_freq() during the update. */ - WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, CPUFREQ_RELATION_H)); - new_data.min = __resolve_freq(policy, new_data.min, CPUFREQ_RELATION_L); + WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, + new_data.min, new_data.max, + CPUFREQ_RELATION_H)); + new_data.min = __resolve_freq(policy, new_data.min, new_data.min, + new_data.max, CPUFREQ_RELATION_L); WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min); trace_cpu_frequency_limits(policy); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index c52d19d67557..65cbd5ecbaf8 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -77,7 +77,8 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, return freq_next; } - index = cpufreq_frequency_table_target(policy, freq_next, relation); + index = cpufreq_frequency_table_target(policy, freq_next, policy->min, + policy->max, relation); freq_req = freq_table[index].frequency; freq_reduc = freq_req * od_tuners->powersave_bias / 1000; freq_avg = freq_req - freq_reduc; diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 67e56cf638ef..190f8aa7ad02 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -116,8 +116,8 @@ int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy) EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation) { struct cpufreq_frequency_table optimal = { .driver_data = ~0, @@ -148,7 +148,7 @@ int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, cpufreq_for_each_valid_entry_idx(pos, table, i) { freq = pos->frequency; - if ((freq < policy->min) || (freq > policy->max)) + if (freq < min || freq > max) continue; if (freq == target_freq) { optimal.driver_data = i; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 3759d0a15c7b..d87a0d024693 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -789,8 +789,8 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation); + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation); int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, unsigned int freq); @@ -855,12 +855,12 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_al(policy, target_freq, @@ -870,6 +870,14 @@ static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_l(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find highest freq at or below target in a table in ascending order */ static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy, unsigned int target_freq, @@ -923,12 +931,12 @@ static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ah(policy, target_freq, @@ -938,6 +946,14 @@ static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_h(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find closest freq to target in a table in ascending order */ static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy, unsigned int target_freq, @@ -1008,12 +1024,12 @@ static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ac(policy, target_freq, @@ -1023,7 +1039,17 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } -static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_c(policy, target_freq, policy->min, policy->max, efficiencies); +} + +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, + unsigned int min, unsigned int max, + int idx) { unsigned int freq; @@ -1032,11 +1058,13 @@ static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) freq = policy->freq_table[idx].frequency; - return freq == clamp_val(freq, policy->min, policy->max); + return freq == clamp_val(freq, min, max); } static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, + unsigned int min, + unsigned int max, unsigned int relation) { bool efficiencies = policy->efficiencies_available && @@ -1047,29 +1075,26 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, relation &= ~CPUFREQ_RELATION_E; if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)) - return cpufreq_table_index_unsorted(policy, target_freq, - relation); + return cpufreq_table_index_unsorted(policy, target_freq, min, + max, relation); retry: switch (relation) { case CPUFREQ_RELATION_L: - idx = cpufreq_table_find_index_l(policy, target_freq, - efficiencies); + idx = find_index_l(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_H: - idx = cpufreq_table_find_index_h(policy, target_freq, - efficiencies); + idx = find_index_h(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_C: - idx = cpufreq_table_find_index_c(policy, target_freq, - efficiencies); + idx = find_index_c(policy, target_freq, min, max, efficiencies); break; default: WARN_ON_ONCE(1); return 0; } - /* Limit frequency index to honor policy->min/max */ - if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { + /* Limit frequency index to honor min and max */ + if (!cpufreq_is_in_limits(policy, min, max, idx) && efficiencies) { efficiencies = false; goto retry; } From 441021e5b3c7d9bd1b963590652c415929f3b157 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Tue, 22 Apr 2025 20:30:25 +0900 Subject: [PATCH 22/97] tracing: Fix oob write in trace_seq_to_buffer() commit f5178c41bb43444a6008150fe6094497135d07cb upstream. syzbot reported this bug: ================================================================== BUG: KASAN: slab-out-of-bounds in trace_seq_to_buffer kernel/trace/trace.c:1830 [inline] BUG: KASAN: slab-out-of-bounds in tracing_splice_read_pipe+0x6be/0xdd0 kernel/trace/trace.c:6822 Write of size 4507 at addr ffff888032b6b000 by task syz.2.320/7260 CPU: 1 UID: 0 PID: 7260 Comm: syz.2.320 Not tainted 6.15.0-rc1-syzkaller-00301-g3bde70a2c827 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xc3/0x670 mm/kasan/report.c:521 kasan_report+0xe0/0x110 mm/kasan/report.c:634 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0xef/0x1a0 mm/kasan/generic.c:189 __asan_memcpy+0x3c/0x60 mm/kasan/shadow.c:106 trace_seq_to_buffer kernel/trace/trace.c:1830 [inline] tracing_splice_read_pipe+0x6be/0xdd0 kernel/trace/trace.c:6822 .... ================================================================== It has been reported that trace_seq_to_buffer() tries to copy more data than PAGE_SIZE to buf. Therefore, to prevent this, we should use the smaller of trace_seq_used(&iter->seq) and PAGE_SIZE as an argument. Link: https://lore.kernel.org/20250422113026.13308-1-aha310510@gmail.com Reported-by: syzbot+c8cd2d2c412b868263fb@syzkaller.appspotmail.com Fixes: 3c56819b14b0 ("tracing: splice support for tracing_pipe") Suggested-by: Steven Rostedt Signed-off-by: Jeongjun Park Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index acc176aa1cbe..9da29583dfbc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7054,13 +7054,14 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, /* Copy the data into the page, so we can start over. */ ret = trace_seq_to_buffer(&iter->seq, page_address(spd.pages[i]), - trace_seq_used(&iter->seq)); + min((size_t)trace_seq_used(&iter->seq), + PAGE_SIZE)); if (ret < 0) { __free_page(spd.pages[i]); break; } spd.partial[i].offset = 0; - spd.partial[i].len = trace_seq_used(&iter->seq); + spd.partial[i].len = ret; trace_seq_init(&iter->seq); } From ddbfb6adb0596e5b7ecbd6c5dfc5d79c22397e25 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Apr 2025 14:26:48 -0700 Subject: [PATCH 23/97] xfs: fix error returns from xfs_bmapi_write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6773da870ab89123d1b513da63ed59e32a29cb77 ] xfs_bmapi_write can return 0 without actually returning a mapping in mval in two different cases: 1) when there is absolutely no space available to do an allocation 2) when converting delalloc space, and the allocation is so small that it only covers parts of the delalloc extent before the range requested by the caller Callers at best can handle one of these cases, but in many cases can't cope with either one. Switch xfs_bmapi_write to always return a mapping or return an error code instead. For case 1) above ENOSPC is the obvious choice which is very much what the callers expect anyway. For case 2) there is no really good error code, so pick a funky one from the SysV streams portfolio. This fixes the reproducer here: https://lore.kernel.org/linux-xfs/CAEJPjCvT3Uag-pMTYuigEjWZHn1sGMZ0GCjVVCv29tNHK76Cgg@mail.gmail.com0/ which uses reserved blocks to create file systems that are gravely out of space and thus cause at least xfs_file_alloc_space to hang and trigger the lack of ENOSPC handling in xfs_dquot_disk_alloc. Note that this patch does not actually make any caller but xfs_alloc_file_space deal intelligently with case 2) above. Signed-off-by: Christoph Hellwig Reported-by: 刘通 Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_attr_remote.c | 1 - fs/xfs/libxfs/xfs_bmap.c | 46 ++++++++++++++++++++++++++------- fs/xfs/libxfs/xfs_da_btree.c | 20 ++++---------- fs/xfs/xfs_bmap_util.c | 35 ++++++++++++------------- fs/xfs/xfs_dquot.c | 1 - fs/xfs/xfs_iomap.c | 8 ------ fs/xfs/xfs_reflink.c | 14 ---------- fs/xfs/xfs_rtalloc.c | 2 -- 8 files changed, 59 insertions(+), 68 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index d440393b40eb..54de405cbab5 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -619,7 +619,6 @@ xfs_attr_rmtval_set_blk( if (error) return error; - ASSERT(nmap == 1); ASSERT((map->br_startblock != DELAYSTARTBLOCK) && (map->br_startblock != HOLESTARTBLOCK)); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 2a3b78032cb0..0235c1dd3d7e 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4113,8 +4113,10 @@ xfs_bmapi_allocate( } else { error = xfs_bmap_alloc_userdata(bma); } - if (error || bma->blkno == NULLFSBLOCK) + if (error) return error; + if (bma->blkno == NULLFSBLOCK) + return -ENOSPC; if (bma->flags & XFS_BMAPI_ZERO) { error = xfs_zero_extent(bma->ip, bma->blkno, bma->length); @@ -4294,6 +4296,15 @@ xfs_bmapi_finish( * extent state if necessary. Details behaviour is controlled by the flags * parameter. Only allocates blocks from a single allocation group, to avoid * locking problems. + * + * Returns 0 on success and places the extent mappings in mval. nmaps is used + * as an input/output parameter where the caller specifies the maximum number + * of mappings that may be returned and xfs_bmapi_write passes back the number + * of mappings (including existing mappings) it found. + * + * Returns a negative error code on failure, including -ENOSPC when it could not + * allocate any blocks and -ENOSR when it did allocate blocks to convert a + * delalloc range, but those blocks were before the passed in range. */ int xfs_bmapi_write( @@ -4421,10 +4432,16 @@ xfs_bmapi_write( ASSERT(len > 0); ASSERT(bma.length > 0); error = xfs_bmapi_allocate(&bma); - if (error) + if (error) { + /* + * If we already allocated space in a previous + * iteration return what we go so far when + * running out of space. + */ + if (error == -ENOSPC && bma.nallocs) + break; goto error0; - if (bma.blkno == NULLFSBLOCK) - break; + } /* * If this is a CoW allocation, record the data in @@ -4462,7 +4479,6 @@ xfs_bmapi_write( if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got)) eof = true; } - *nmap = n; error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags, whichfork); @@ -4473,7 +4489,22 @@ xfs_bmapi_write( ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork)); xfs_bmapi_finish(&bma, whichfork, 0); xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval, - orig_nmap, *nmap); + orig_nmap, n); + + /* + * When converting delayed allocations, xfs_bmapi_allocate ignores + * the passed in bno and always converts from the start of the found + * delalloc extent. + * + * To avoid a successful return with *nmap set to 0, return the magic + * -ENOSR error code for this particular case so that the caller can + * handle it. + */ + if (!n) { + ASSERT(bma.nallocs >= *nmap); + return -ENOSR; + } + *nmap = n; return 0; error0: xfs_bmapi_finish(&bma, whichfork, error); @@ -4580,9 +4611,6 @@ xfs_bmapi_convert_delalloc( if (error) goto out_finish; - error = -ENOSPC; - if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK)) - goto out_finish; error = -EFSCORRUPTED; if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock))) goto out_finish; diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 282c7cf032f4..12e3cca804b7 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -2158,8 +2158,8 @@ xfs_da_grow_inode_int( struct xfs_inode *dp = args->dp; int w = args->whichfork; xfs_rfsblock_t nblks = dp->i_nblocks; - struct xfs_bmbt_irec map, *mapp; - int nmap, error, got, i, mapi; + struct xfs_bmbt_irec map, *mapp = ↦ + int nmap, error, got, i, mapi = 1; /* * Find a spot in the file space to put the new block. @@ -2175,14 +2175,7 @@ xfs_da_grow_inode_int( error = xfs_bmapi_write(tp, dp, *bno, count, xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, args->total, &map, &nmap); - if (error) - return error; - - ASSERT(nmap <= 1); - if (nmap == 1) { - mapp = ↦ - mapi = 1; - } else if (nmap == 0 && count > 1) { + if (error == -ENOSPC && count > 1) { xfs_fileoff_t b; int c; @@ -2199,16 +2192,13 @@ xfs_da_grow_inode_int( args->total, &mapp[mapi], &nmap); if (error) goto out_free_map; - if (nmap < 1) - break; mapi += nmap; b = mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount; } - } else { - mapi = 0; - mapp = NULL; } + if (error) + goto out_free_map; /* * Count the blocks we got, make sure it matches the total. diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 468bb61a5e46..399451aab26a 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -868,33 +868,32 @@ xfs_alloc_file_space( if (error) goto error; + /* + * If the allocator cannot find a single free extent large + * enough to cover the start block of the requested range, + * xfs_bmapi_write will return -ENOSR. + * + * In that case we simply need to keep looping with the same + * startoffset_fsb so that one of the following allocations + * will eventually reach the requested range. + */ error = xfs_bmapi_write(tp, ip, startoffset_fsb, allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp, &nimaps); - if (error) - goto error; + if (error) { + if (error != -ENOSR) + goto error; + error = 0; + } else { + startoffset_fsb += imapp->br_blockcount; + allocatesize_fsb -= imapp->br_blockcount; + } ip->i_diflags |= XFS_DIFLAG_PREALLOC; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); - if (error) - break; - - /* - * If the allocator cannot find a single free extent large - * enough to cover the start block of the requested range, - * xfs_bmapi_write will return 0 but leave *nimaps set to 0. - * - * In that case we simply need to keep looping with the same - * startoffset_fsb so that one of the following allocations - * will eventually reach the requested range. - */ - if (nimaps) { - startoffset_fsb += imapp->br_blockcount; - allocatesize_fsb -= imapp->br_blockcount; - } } return error; diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index a8b2f3b278ea..6186b69be50a 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -333,7 +333,6 @@ xfs_dquot_disk_alloc( goto err_cancel; ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); - ASSERT(nmaps == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && (map.br_startblock != HOLESTARTBLOCK)); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index ab5512c0bcf7..60d9638cec6d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -309,14 +309,6 @@ xfs_iomap_write_direct( if (error) goto out_unlock; - /* - * Copy any maps to caller's array and return any error. - */ - if (nimaps == 0) { - error = -ENOSPC; - goto out_unlock; - } - if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock))) error = xfs_alert_fsblock_zero(ip, imap); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 1bac6a8af970..0405226fb74c 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -431,13 +431,6 @@ xfs_reflink_fill_cow_hole( if (error) return error; - /* - * Allocation succeeded but the requested range was not even partially - * satisfied? Bail out! - */ - if (nimaps == 0) - return -ENOSPC; - convert: return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now); @@ -500,13 +493,6 @@ xfs_reflink_fill_delalloc( error = xfs_trans_commit(tp); if (error) return error; - - /* - * Allocation succeeded but the requested range was not even - * partially satisfied? Bail out! - */ - if (nimaps == 0) - return -ENOSPC; } while (cmap->br_startoff + cmap->br_blockcount <= imap->br_startoff); return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 7c5134899634..5cf1e91f4c20 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -840,8 +840,6 @@ xfs_growfs_rt_alloc( nmap = 1; error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, XFS_BMAPI_METADATA, 0, &map, &nmap); - if (!error && nmap < 1) - error = -ENOSPC; if (error) goto out_trans_cancel; /* From 1e2a60807f6957df60fa43b062cc17338d7ed571 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Apr 2025 14:26:49 -0700 Subject: [PATCH 24/97] xfs: fix xfs_bmap_add_extent_delay_real for partial conversions [ Upstream commit d69bee6a35d3c5e4873b9e164dd1a9711351a97c ] xfs_bmap_add_extent_delay_real takes parts or all of a delalloc extent and converts them to a real extent. It is written to deal with any potential overlap of the to be converted range with the delalloc extent, but it turns out that currently only converting the entire extents, or a part starting at the beginning is actually exercised, as the only caller always tries to convert the entire delalloc extent, and either succeeds or at least progresses partially from the start. If it only converts a tiny part of a delalloc extent, the indirect block calculation for the new delalloc extent (da_new) might be equivalent to that of the existing delalloc extent (da_old). If this extent conversion now requires allocating an indirect block that gets accounted into da_new, leading to the assert that da_new must be smaller or equal to da_new unless we split the extent to trigger. Except for the assert that case is actually handled by just trying to allocate more space, as that already handled for the split case (which currently can't be reached at all), so just reusing it should be fine. Except that without dipping into the reserved block pool that would make it a bit too easy to trigger a fs shutdown due to ENOSPC. So in addition to adjusting the assert, also dip into the reserved block pool. Note that I could only reproduce the assert with a change to only convert the actually asked range instead of the full delalloc extent from xfs_bmapi_write. Signed-off-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 0235c1dd3d7e..f8a355e1196f 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1530,6 +1530,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; } + ASSERT(da_new <= da_old); break; case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: @@ -1559,6 +1560,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; } + ASSERT(da_new <= da_old); break; case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: @@ -1592,6 +1594,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; } + ASSERT(da_new <= da_old); break; case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: @@ -1624,6 +1627,7 @@ xfs_bmap_add_extent_delay_real( goto done; } } + ASSERT(da_new <= da_old); break; case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: @@ -1661,6 +1665,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; } + ASSERT(da_new <= da_old); break; case BMAP_LEFT_FILLING: @@ -1748,6 +1753,7 @@ xfs_bmap_add_extent_delay_real( xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); xfs_iext_next(ifp, &bma->icur); xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT); + ASSERT(da_new <= da_old); break; case BMAP_RIGHT_FILLING: @@ -1795,6 +1801,7 @@ xfs_bmap_add_extent_delay_real( PREV.br_blockcount = temp; xfs_iext_insert(bma->ip, &bma->icur, &PREV, state); xfs_iext_next(ifp, &bma->icur); + ASSERT(da_new <= da_old); break; case 0: @@ -1915,11 +1922,9 @@ xfs_bmap_add_extent_delay_real( } /* adjust for changes in reserved delayed indirect blocks */ - if (da_new != da_old) { - ASSERT(state == 0 || da_new < da_old); + if (da_new != da_old) error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), - false); - } + true); xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); done: From 2edfd2c87c56a231f1c617703511c47065a3f39b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Apr 2025 14:26:50 -0700 Subject: [PATCH 25/97] xfs: remove a racy if_bytes check in xfs_reflink_end_cow_extent [ Upstream commit 86de848403abda05bf9c16dcdb6bef65a8d88c41 ] Accessing if_bytes without the ilock is racy. Remove the initial if_bytes == 0 check in xfs_reflink_end_cow_extent and let ext_iext_lookup_extent fail for this case after we've taken the ilock. Signed-off-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_reflink.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 0405226fb74c..d539487eaf1a 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -718,12 +718,6 @@ xfs_reflink_end_cow_extent( int nmaps; int error; - /* No COW extents? That's easy! */ - if (ifp->if_bytes == 0) { - *offset_fsb = end_fsb; - return 0; - } - resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, XFS_TRANS_RESERVE, &tp); From 3a97d267fd91214d4a0da953b129c877587967ef Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 30 Apr 2025 14:26:51 -0700 Subject: [PATCH 26/97] xfs: require XFS_SB_FEAT_INCOMPAT_LOG_XATTRS for attr log intent item recovery [ Upstream commit 8ef1d96a985e4dc07ffbd71bd7fc5604a80cc644 ] The XFS_SB_FEAT_INCOMPAT_LOG_XATTRS feature bit protects a filesystem from old kernels that do not know how to recover extended attribute log intent items. Make this check mandatory instead of a debugging assert. Fixes: fd920008784ea ("xfs: Set up infrastructure for log attribute replay") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_attr_item.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 11e88a76a33c..c5bc6b72e014 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -510,6 +510,9 @@ xfs_attri_validate( unsigned int op = attrp->alfi_op_flags & XFS_ATTRI_OP_FLAGS_TYPE_MASK; + if (!xfs_sb_version_haslogxattrs(&mp->m_sb)) + return false; + if (attrp->__pad != 0) return false; @@ -601,8 +604,6 @@ xfs_attri_item_recover( args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT | XFS_DA_OP_LOGGED; - ASSERT(xfs_sb_version_haslogxattrs(&mp->m_sb)); - switch (attr->xattri_op_flags) { case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: From d6f7f0ddc4c419136d6c27220ae44c45d33d666b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 30 Apr 2025 14:26:52 -0700 Subject: [PATCH 27/97] xfs: check opcode and iovec count match in xlog_recover_attri_commit_pass2 [ Upstream commit ad206ae50eca62836c5460ab5bbf2a6c59a268e7 ] Check that the number of recovered log iovecs is what is expected for the xattri opcode is expecting. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_attr_item.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index c5bc6b72e014..a8e09ea2622d 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -717,6 +717,7 @@ xlog_recover_attri_commit_pass2( const void *attr_value = NULL; const void *attr_name; size_t len; + unsigned int op; attri_formatp = item->ri_buf[0].i_addr; attr_name = item->ri_buf[1].i_addr; @@ -735,6 +736,32 @@ xlog_recover_attri_commit_pass2( return -EFSCORRUPTED; } + /* Check the number of log iovecs makes sense for the op code. */ + op = attri_formatp->alfi_op_flags & XFS_ATTRI_OP_FLAGS_TYPE_MASK; + switch (op) { + case XFS_ATTRI_OP_FLAGS_SET: + case XFS_ATTRI_OP_FLAGS_REPLACE: + /* Log item, attr name, attr value */ + if (item->ri_total != 3) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + attri_formatp, len); + return -EFSCORRUPTED; + } + break; + case XFS_ATTRI_OP_FLAGS_REMOVE: + /* Log item, attr name */ + if (item->ri_total != 2) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + attri_formatp, len); + return -EFSCORRUPTED; + } + break; + default: + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + attri_formatp, len); + return -EFSCORRUPTED; + } + /* Validate the attr name */ if (item->ri_buf[1].i_len != xlog_calc_iovec_len(attri_formatp->alfi_name_len)) { From b359d2ee93b1236cf724b05b3a8d1b485aa93954 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 30 Apr 2025 14:26:53 -0700 Subject: [PATCH 28/97] xfs: validate recovered name buffers when recovering xattr items [ Upstream commit 1c7f09d210aba2f2bb206e2e8c97c9f11a3fd880 ] Strengthen the xattri log item recovery code by checking that we actually have the required name and newname buffers for whatever operation we're replaying. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_attr_item.c | 58 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index a8e09ea2622d..4a712f1565c1 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -717,22 +717,20 @@ xlog_recover_attri_commit_pass2( const void *attr_value = NULL; const void *attr_name; size_t len; - unsigned int op; - - attri_formatp = item->ri_buf[0].i_addr; - attr_name = item->ri_buf[1].i_addr; + unsigned int op, i = 0; /* Validate xfs_attri_log_format before the large memory allocation */ len = sizeof(struct xfs_attri_log_format); - if (item->ri_buf[0].i_len != len) { + if (item->ri_buf[i].i_len != len) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } + attri_formatp = item->ri_buf[i].i_addr; if (!xfs_attri_validate(mp, attri_formatp)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + attri_formatp, len); return -EFSCORRUPTED; } @@ -761,31 +759,69 @@ xlog_recover_attri_commit_pass2( attri_formatp, len); return -EFSCORRUPTED; } + i++; /* Validate the attr name */ - if (item->ri_buf[1].i_len != + if (item->ri_buf[i].i_len != xlog_calc_iovec_len(attri_formatp->alfi_name_len)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + attri_formatp, len); return -EFSCORRUPTED; } + attr_name = item->ri_buf[i].i_addr; if (!xfs_attr_namecheck(attr_name, attri_formatp->alfi_name_len)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[1].i_addr, item->ri_buf[1].i_len); + attri_formatp, len); return -EFSCORRUPTED; } + i++; /* Validate the attr value, if present */ if (attri_formatp->alfi_value_len != 0) { - if (item->ri_buf[2].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) { + if (item->ri_buf[i].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } - attr_value = item->ri_buf[2].i_addr; + attr_value = item->ri_buf[i].i_addr; + i++; + } + + /* + * Make sure we got the correct number of buffers for the operation + * that we just loaded. + */ + if (i != item->ri_total) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + attri_formatp, len); + return -EFSCORRUPTED; + } + + switch (op) { + case XFS_ATTRI_OP_FLAGS_REMOVE: + /* Regular remove operations operate only on names. */ + if (attr_value != NULL || attri_formatp->alfi_value_len != 0) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + attri_formatp, len); + return -EFSCORRUPTED; + } + fallthrough; + case XFS_ATTRI_OP_FLAGS_SET: + case XFS_ATTRI_OP_FLAGS_REPLACE: + /* + * Regular xattr set/remove/replace operations require a name + * and do not take a newname. Values are optional for set and + * replace. + */ + if (attr_name == NULL || attri_formatp->alfi_name_len == 0) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + attri_formatp, len); + return -EFSCORRUPTED; + } + break; } /* From 7bfa86d2042661d77b31271465f1a8fd634341be Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 30 Apr 2025 14:26:54 -0700 Subject: [PATCH 29/97] xfs: revert commit 44af6c7e59b12 [ Upstream commit 2a009397eb5ae178670cbd7101e9635cf6412b35 ] In my haste to fix what I thought was a performance problem in the attr scrub code, I neglected to notice that the xfs_attr_get_ilocked also had the effect of checking that attributes can actually be looked up through the attr dabtree. Fix this. Fixes: 44af6c7e59b12 ("xfs: don't load local xattr values during scrub") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/scrub/attr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index b6f0c9f3f124..f51771e5c3fe 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -159,6 +159,11 @@ xchk_xattr_listent( args.value = xchk_xattr_valuebuf(sx->sc); args.valuelen = valuelen; + /* + * Get the attr value to ensure that lookup can find this attribute + * through the dabtree indexing and that remote value retrieval also + * works correctly. + */ error = xfs_attr_get_ilocked(&args); /* ENODATA means the hash lookup failed and the attr is bad */ if (error == -ENODATA) From 0e2fcf273b67213215c91cbc17583f12f22d4bce Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 30 Apr 2025 14:26:55 -0700 Subject: [PATCH 30/97] xfs: match lock mode in xfs_buffered_write_iomap_begin() [ Upstream commit bb712842a85d595525e72f0e378c143e620b3ea2 ] Commit 1aa91d9c9933 ("xfs: Add async buffered write support") replace xfs_ilock(XFS_ILOCK_EXCL) with xfs_ilock_for_iomap() when locking the writing inode, and a new variable lockmode is used to indicate the lock mode. Although the lockmode should always be XFS_ILOCK_EXCL, it's still better to use this variable instead of useing XFS_ILOCK_EXCL directly when unlocking the inode. Fixes: 1aa91d9c9933 ("xfs: Add async buffered write support") Signed-off-by: Zhang Yi Reviewed-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_iomap.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 60d9638cec6d..f6ca27a42498 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1129,13 +1129,13 @@ retry: * them out if the write happens to fail. */ seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW); - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, lockmode); trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq); found_imap: seq = xfs_iomap_inode_sequence(ip, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, lockmode); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); found_cow: @@ -1145,17 +1145,17 @@ found_cow: if (error) goto out_unlock; seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, lockmode); return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); } xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb); - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, lockmode); return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq); out_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, lockmode); return error; } From b4dbf9056481a506a4c72f9f05929b8648488176 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 30 Apr 2025 14:26:56 -0700 Subject: [PATCH 31/97] xfs: make the seq argument to xfs_bmapi_convert_delalloc() optional [ Upstream commit fc8d0ba0ff5fe4700fa02008b7751ec6b84b7677 ] Allow callers to pass a NULLL seq argument if they don't care about the fork sequence number. Signed-off-by: Zhang Yi Reviewed-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index f8a355e1196f..92d321dd8944 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4580,7 +4580,8 @@ xfs_bmapi_convert_delalloc( if (!isnullstartblock(bma.got.br_startblock)) { xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, xfs_iomap_inode_sequence(ip, flags)); - *seq = READ_ONCE(ifp->if_seq); + if (seq) + *seq = READ_ONCE(ifp->if_seq); goto out_trans_cancel; } @@ -4626,7 +4627,8 @@ xfs_bmapi_convert_delalloc( ASSERT(!isnullstartblock(bma.got.br_startblock)); xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, xfs_iomap_inode_sequence(ip, flags)); - *seq = READ_ONCE(ifp->if_seq); + if (seq) + *seq = READ_ONCE(ifp->if_seq); if (whichfork == XFS_COW_FORK) xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length); From 9ae4afcb9f9cd018efcea77e5f2f1603e3593a23 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 30 Apr 2025 14:26:57 -0700 Subject: [PATCH 32/97] xfs: make xfs_bmapi_convert_delalloc() to allocate the target offset [ Upstream commit 2e08371a83f1c06fd85eea8cd37c87a224cc4cc4 ] Since xfs_bmapi_convert_delalloc() only attempts to allocate the entire delalloc extent and require multiple invocations to allocate the target offset. So xfs_convert_blocks() add a loop to do this job and we call it in the write back path, but xfs_convert_blocks() isn't a common helper. Let's do it in xfs_bmapi_convert_delalloc() and drop xfs_convert_blocks(), preparing for the post EOF delalloc blocks converting in the buffered write begin path. Signed-off-by: Zhang Yi Reviewed-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 34 +++++++++++++++++++++++-- fs/xfs/xfs_aops.c | 54 +++++++++++----------------------------- 2 files changed, 46 insertions(+), 42 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 92d321dd8944..a3c4d4a442af 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4522,8 +4522,8 @@ error0: * invocations to allocate the target offset if a large enough physical extent * is not available. */ -int -xfs_bmapi_convert_delalloc( +static int +xfs_bmapi_convert_one_delalloc( struct xfs_inode *ip, int whichfork, xfs_off_t offset, @@ -4651,6 +4651,36 @@ out_trans_cancel: return error; } +/* + * Pass in a dellalloc extent and convert it to real extents, return the real + * extent that maps offset_fsb in iomap. + */ +int +xfs_bmapi_convert_delalloc( + struct xfs_inode *ip, + int whichfork, + loff_t offset, + struct iomap *iomap, + unsigned int *seq) +{ + int error; + + /* + * Attempt to allocate whatever delalloc extent currently backs offset + * and put the result into iomap. Allocate in a loop because it may + * take several attempts to allocate real blocks for a contiguous + * delalloc extent if free space is sufficiently fragmented. + */ + do { + error = xfs_bmapi_convert_one_delalloc(ip, whichfork, offset, + iomap, seq); + if (error) + return error; + } while (iomap->offset + iomap->length <= offset); + + return 0; +} + int xfs_bmapi_remap( struct xfs_trans *tp, diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 21c241e96d48..50a7f2745514 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -225,45 +225,6 @@ xfs_imap_valid( return true; } -/* - * Pass in a dellalloc extent and convert it to real extents, return the real - * extent that maps offset_fsb in wpc->iomap. - * - * The current page is held locked so nothing could have removed the block - * backing offset_fsb, although it could have moved from the COW to the data - * fork by another thread. - */ -static int -xfs_convert_blocks( - struct iomap_writepage_ctx *wpc, - struct xfs_inode *ip, - int whichfork, - loff_t offset) -{ - int error; - unsigned *seq; - - if (whichfork == XFS_COW_FORK) - seq = &XFS_WPC(wpc)->cow_seq; - else - seq = &XFS_WPC(wpc)->data_seq; - - /* - * Attempt to allocate whatever delalloc extent currently backs offset - * and put the result into wpc->iomap. Allocate in a loop because it - * may take several attempts to allocate real blocks for a contiguous - * delalloc extent if free space is sufficiently fragmented. - */ - do { - error = xfs_bmapi_convert_delalloc(ip, whichfork, offset, - &wpc->iomap, seq); - if (error) - return error; - } while (wpc->iomap.offset + wpc->iomap.length <= offset); - - return 0; -} - static int xfs_map_blocks( struct iomap_writepage_ctx *wpc, @@ -281,6 +242,7 @@ xfs_map_blocks( struct xfs_iext_cursor icur; int retries = 0; int error = 0; + unsigned int *seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -376,7 +338,19 @@ retry: trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); return 0; allocate_blocks: - error = xfs_convert_blocks(wpc, ip, whichfork, offset); + /* + * Convert a dellalloc extent to a real one. The current page is held + * locked so nothing could have removed the block backing offset_fsb, + * although it could have moved from the COW to the data fork by another + * thread. + */ + if (whichfork == XFS_COW_FORK) + seq = &XFS_WPC(wpc)->cow_seq; + else + seq = &XFS_WPC(wpc)->data_seq; + + error = xfs_bmapi_convert_delalloc(ip, whichfork, offset, + &wpc->iomap, seq); if (error) { /* * If we failed to find the extent in the COW fork we might have From 251af3b8c14c12fea57c11500750a2da27501dcf Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 30 Apr 2025 14:26:58 -0700 Subject: [PATCH 33/97] xfs: convert delayed extents to unwritten when zeroing post eof blocks [ Upstream commit 5ce5674187c345dc31534d2024c09ad8ef29b7ba ] Current clone operation could be non-atomic if the destination of a file is beyond EOF, user could get a file with corrupted (zeroed) data on crash. The problem is about preallocations. If you write some data into a file: [A...B) and XFS decides to preallocate some post-eof blocks, then it can create a delayed allocation reservation: [A.........D) The writeback path tries to convert delayed extents to real ones by allocating blocks. If there aren't enough contiguous free space, we can end up with two extents, the first real and the second still delalloc: [A....C)[C.D) After that, both the in-memory and the on-disk file sizes are still B. If we clone into the range [E...F) from another file: [A....C)[C.D) [E...F) then xfs_reflink_zero_posteof() calls iomap_zero_range() to zero out the range [B, E) beyond EOF and flush it. Since [C, D) is still a delalloc extent, its pagecache will be zeroed and both the in-memory and on-disk size will be updated to D after flushing but before cloning. This is wrong, because the user can see the size change and read the zeroes while the clone operation is ongoing. We need to keep the in-memory and on-disk size before the clone operation starts, so instead of writing zeroes through the page cache for delayed ranges beyond EOF, we convert these ranges to unwritten and invalidate any cached data over that range beyond EOF. Suggested-by: Dave Chinner Signed-off-by: Zhang Yi Reviewed-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_iomap.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index f6ca27a42498..fab191a09442 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -996,6 +996,24 @@ xfs_buffered_write_iomap_begin( goto out_unlock; } + /* + * For zeroing, trim a delalloc extent that extends beyond the EOF + * block. If it starts beyond the EOF block, convert it to an + * unwritten extent. + */ + if ((flags & IOMAP_ZERO) && imap.br_startoff <= offset_fsb && + isnullstartblock(imap.br_startblock)) { + xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); + + if (offset_fsb >= eof_fsb) + goto convert_delay; + if (end_fsb > eof_fsb) { + end_fsb = eof_fsb; + xfs_trim_extent(&imap, offset_fsb, + end_fsb - offset_fsb); + } + } + /* * Search the COW fork extent list even if we did not find a data fork * extent. This serves two purposes: first this implements the @@ -1138,6 +1156,17 @@ found_imap: xfs_iunlock(ip, lockmode); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); +convert_delay: + xfs_iunlock(ip, lockmode); + truncate_pagecache(inode, offset); + error = xfs_bmapi_convert_delalloc(ip, XFS_DATA_FORK, offset, + iomap, NULL); + if (error) + return error; + + trace_xfs_iomap_alloc(ip, offset, count, XFS_DATA_FORK, &imap); + return 0; + found_cow: seq = xfs_iomap_inode_sequence(ip, 0); if (imap.br_startoff <= offset_fsb) { From 80d3d1e7a231da50bd5da643c9e6254479246efb Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 30 Apr 2025 14:26:59 -0700 Subject: [PATCH 34/97] xfs: allow symlinks with short remote targets [ Upstream commit 38de567906d95c397d87f292b892686b7ec6fbc3 ] An internal user complained about log recovery failing on a symlink ("Bad dinode after recovery") with the following (excerpted) format: core.magic = 0x494e core.mode = 0120777 core.version = 3 core.format = 2 (extents) core.nlinkv2 = 1 core.nextents = 1 core.size = 297 core.nblocks = 1 core.naextents = 0 core.forkoff = 0 core.aformat = 2 (extents) u3.bmx[0] = [startoff,startblock,blockcount,extentflag] 0:[0,12,1,0] This is a symbolic link with a 297-byte target stored in a disk block, which is to say this is a symlink with a remote target. The forkoff is 0, which is to say that there's 512 - 176 == 336 bytes in the inode core to store the data fork. Eventually, testing of generic/388 failed with the same inode corruption message during inode recovery. In writing a debugging patch to call xfs_dinode_verify on dirty inode log items when we're committing transactions, I observed that xfs/298 can reproduce the problem quite quickly. xfs/298 creates a symbolic link, adds some extended attributes, then deletes them all. The test failure occurs when the final removexattr also deletes the attr fork because that does not convert the remote symlink back into a shortform symlink. That is how we trip this test. The only reason why xfs/298 only triggers with the debug patch added is that it deletes the symlink, so the final iflush shows the inode as free. I wrote a quick fstest to emulate the behavior of xfs/298, except that it leaves the symlinks on the filesystem after inducing the "corrupt" state. Kernels going back at least as far as 4.18 have written out symlink inodes in this manner and prior to 1eb70f54c445f they did not object to reading them back in. Because we've been writing out inodes this way for quite some time, the only way to fix this is to relax the check for symbolic links. Directories don't have this problem because di_size is bumped to blocksize during the sf->data conversion. Fixes: 1eb70f54c445f ("xfs: validate inode fork size against fork format") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_inode_buf.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 601b05ca5fc2..127c9a698d9f 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -365,17 +365,37 @@ xfs_dinode_verify_fork( /* * For fork types that can contain local data, check that the fork * format matches the size of local data contained within the fork. - * - * For all types, check that when the size says the should be in extent - * or btree format, the inode isn't claiming it is in local format. */ if (whichfork == XFS_DATA_FORK) { - if (S_ISDIR(mode) || S_ISLNK(mode)) { + /* + * A directory small enough to fit in the inode must be stored + * in local format. The directory sf <-> extents conversion + * code updates the directory size accordingly. + */ + if (S_ISDIR(mode)) { if (be64_to_cpu(dip->di_size) <= fork_size && fork_format != XFS_DINODE_FMT_LOCAL) return __this_address; } + /* + * A symlink with a target small enough to fit in the inode can + * be stored in extents format if xattrs were added (thus + * converting the data fork from shortform to remote format) + * and then removed. + */ + if (S_ISLNK(mode)) { + if (be64_to_cpu(dip->di_size) <= fork_size && + fork_format != XFS_DINODE_FMT_EXTENTS && + fork_format != XFS_DINODE_FMT_LOCAL) + return __this_address; + } + + /* + * For all types, check that when the size says the fork should + * be in extent or btree format, the inode isn't claiming to be + * in local format. + */ if (be64_to_cpu(dip->di_size) > fork_size && fork_format == XFS_DINODE_FMT_LOCAL) return __this_address; From 9e1ad0875ed82af6065d8591804e7150be37e4ba Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Wed, 30 Apr 2025 14:27:00 -0700 Subject: [PATCH 35/97] xfs: make sure sb_fdblocks is non-negative [ Upstream commit 58f880711f2ba53fd5e959875aff5b3bf6d5c32e ] A user with a completely full filesystem experienced an unexpected shutdown when the filesystem tried to write the superblock during runtime. kernel shows the following dmesg: [ 8.176281] XFS (dm-4): Metadata corruption detected at xfs_sb_write_verify+0x60/0x120 [xfs], xfs_sb block 0x0 [ 8.177417] XFS (dm-4): Unmount and run xfs_repair [ 8.178016] XFS (dm-4): First 128 bytes of corrupted metadata buffer: [ 8.178703] 00000000: 58 46 53 42 00 00 10 00 00 00 00 00 01 90 00 00 XFSB............ [ 8.179487] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [ 8.180312] 00000020: cf 12 dc 89 ca 26 45 29 92 e6 e3 8d 3b b8 a2 c3 .....&E)....;... [ 8.181150] 00000030: 00 00 00 00 01 00 00 06 00 00 00 00 00 00 00 80 ................ [ 8.182003] 00000040: 00 00 00 00 00 00 00 81 00 00 00 00 00 00 00 82 ................ [ 8.182004] 00000050: 00 00 00 01 00 64 00 00 00 00 00 04 00 00 00 00 .....d.......... [ 8.182004] 00000060: 00 00 64 00 b4 a5 02 00 02 00 00 08 00 00 00 00 ..d............. [ 8.182005] 00000070: 00 00 00 00 00 00 00 00 0c 09 09 03 17 00 00 19 ................ [ 8.182008] XFS (dm-4): Corruption of in-memory data detected. Shutting down filesystem [ 8.182010] XFS (dm-4): Please unmount the filesystem and rectify the problem(s) When xfs_log_sb writes super block to disk, b_fdblocks is fetched from m_fdblocks without any lock. As m_fdblocks can experience a positive -> negative -> positive changing when the FS reaches fullness (see xfs_mod_fdblocks). So there is a chance that sb_fdblocks is negative, and because sb_fdblocks is type of unsigned long long, it reads super big. And sb_fdblocks being bigger than sb_dblocks is a problem during log recovery, xfs_validate_sb_write() complains. Fix: As sb_fdblocks will be re-calculated during mount when lazysbcount is enabled, We just need to make xfs_validate_sb_write() happy -- make sure sb_fdblocks is not nenative. This patch also takes care of other percpu counters in xfs_log_sb. Signed-off-by: Wengang Wang Reviewed-by: Darrick J. Wong Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_sb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 90ed55cd3d10..8e0a176b8e0b 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -1022,11 +1022,12 @@ xfs_log_sb( * and hence we don't need have to update it here. */ if (xfs_has_lazysbcount(mp)) { - mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); + mp->m_sb.sb_icount = percpu_counter_sum_positive(&mp->m_icount); mp->m_sb.sb_ifree = min_t(uint64_t, - percpu_counter_sum(&mp->m_ifree), + percpu_counter_sum_positive(&mp->m_ifree), mp->m_sb.sb_icount); - mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); + mp->m_sb.sb_fdblocks = + percpu_counter_sum_positive(&mp->m_fdblocks); } xfs_sb_to_disk(bp->b_addr, &mp->m_sb); From df403c882feb4e074d3a758c93a5d2a2c314ad3c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Apr 2025 14:27:01 -0700 Subject: [PATCH 36/97] xfs: fix freeing speculative preallocations for preallocated files [ Upstream commit 610b29161b0aa9feb59b78dc867553274f17fb01 ] xfs_can_free_eofblocks returns false for files that have persistent preallocations unless the force flag is passed and there are delayed blocks. This means it won't free delalloc reservations for files with persistent preallocations unless the force flag is set, and it will also free the persistent preallocations if the force flag is set and the file happens to have delayed allocations. Both of these are bad, so do away with the force flag and always free only post-EOF delayed allocations for files with the XFS_DIFLAG_PREALLOC or APPEND flags set. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_bmap_util.c | 30 ++++++++++++++++++++++-------- fs/xfs/xfs_bmap_util.h | 2 +- fs/xfs/xfs_icache.c | 2 +- fs/xfs/xfs_inode.c | 14 ++++---------- 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 399451aab26a..62b92e92a685 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -636,13 +636,11 @@ out_unlock: /* * Test whether it is appropriate to check an inode for and free post EOF - * blocks. The 'force' parameter determines whether we should also consider - * regular files that are marked preallocated or append-only. + * blocks. */ bool xfs_can_free_eofblocks( - struct xfs_inode *ip, - bool force) + struct xfs_inode *ip) { struct xfs_bmbt_irec imap; struct xfs_mount *mp = ip->i_mount; @@ -676,11 +674,11 @@ xfs_can_free_eofblocks( return false; /* - * Do not free real preallocated or append-only files unless the file - * has delalloc blocks and we are forced to remove them. + * Only free real extents for inodes with persistent preallocations or + * the append-only flag. */ if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) - if (!force || ip->i_delayed_blks == 0) + if (ip->i_delayed_blks == 0) return false; /* @@ -734,6 +732,22 @@ xfs_free_eofblocks( /* Wait on dio to ensure i_size has settled. */ inode_dio_wait(VFS_I(ip)); + /* + * For preallocated files only free delayed allocations. + * + * Note that this means we also leave speculative preallocations in + * place for preallocated files. + */ + if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) { + if (ip->i_delayed_blks) { + xfs_bmap_punch_delalloc_range(ip, + round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize), + LLONG_MAX); + } + xfs_inode_clear_eofblocks_tag(ip); + return 0; + } + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); if (error) { ASSERT(xfs_is_shutdown(mp)); @@ -1048,7 +1062,7 @@ xfs_prepare_shift( * Trim eofblocks to avoid shifting uninitialized post-eof preallocation * into the accessible region of the file. */ - if (xfs_can_free_eofblocks(ip, true)) { + if (xfs_can_free_eofblocks(ip)) { error = xfs_free_eofblocks(ip); if (error) return error; diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 6888078f5c31..1383019ccdb7 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -63,7 +63,7 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset, xfs_off_t len); /* EOF block manipulation functions */ -bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); +bool xfs_can_free_eofblocks(struct xfs_inode *ip); int xfs_free_eofblocks(struct xfs_inode *ip); int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 6df826fc787c..586d26c05160 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1186,7 +1186,7 @@ xfs_inode_free_eofblocks( } *lockflags |= XFS_IOLOCK_EXCL; - if (xfs_can_free_eofblocks(ip, false)) + if (xfs_can_free_eofblocks(ip)) return xfs_free_eofblocks(ip); /* inode could be preallocated or append-only */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 26961b0dae03..b26d26d29273 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1459,7 +1459,7 @@ xfs_release( if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) return 0; - if (xfs_can_free_eofblocks(ip, false)) { + if (xfs_can_free_eofblocks(ip)) { /* * Check if the inode is being opened, written and closed * frequently and we have delayed allocation blocks outstanding @@ -1675,15 +1675,13 @@ xfs_inode_needs_inactive( /* * This file isn't being freed, so check if there are post-eof blocks - * to free. @force is true because we are evicting an inode from the - * cache. Post-eof blocks must be freed, lest we end up with broken - * free space accounting. + * to free. * * Note: don't bother with iolock here since lockdep complains about * acquiring it in reclaim context. We have the only reference to the * inode at this point anyways. */ - return xfs_can_free_eofblocks(ip, true); + return xfs_can_free_eofblocks(ip); } /* @@ -1734,15 +1732,11 @@ xfs_inactive( if (VFS_I(ip)->i_nlink != 0) { /* - * force is true because we are evicting an inode from the - * cache. Post-eof blocks must be freed, lest we end up with - * broken free space accounting. - * * Note: don't bother with iolock here since lockdep complains * about acquiring it in reclaim context. We have the only * reference to the inode at this point anyways. */ - if (xfs_can_free_eofblocks(ip, true)) + if (xfs_can_free_eofblocks(ip)) error = xfs_free_eofblocks(ip); goto out; From 6724a3faa8ebea07970baf5c33acf1d957bb1413 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 30 Apr 2025 14:27:02 -0700 Subject: [PATCH 37/97] xfs: allow unlinked symlinks and dirs with zero size [ Upstream commit 1ec9307fc066dd8a140d5430f8a7576aa9d78cd3 ] For a very very long time, inode inactivation has set the inode size to zero before unmapping the extents associated with the data fork. Unfortunately, commit 3c6f46eacd876 changed the inode verifier to prohibit zero-length symlinks and directories. If an inode happens to get logged in this state and the system crashes before freeing the inode, log recovery will also fail on the broken inode. Therefore, allow zero-size symlinks and directories as long as the link count is zero; nobody will be able to open these files by handle so there isn't any risk of data exposure. Fixes: 3c6f46eacd876 ("xfs: sanity check directory inode di_size") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_inode_buf.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 127c9a698d9f..3c611c8ac158 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -370,10 +370,13 @@ xfs_dinode_verify_fork( /* * A directory small enough to fit in the inode must be stored * in local format. The directory sf <-> extents conversion - * code updates the directory size accordingly. + * code updates the directory size accordingly. Directories + * being truncated have zero size and are not subject to this + * check. */ if (S_ISDIR(mode)) { - if (be64_to_cpu(dip->di_size) <= fork_size && + if (dip->di_size && + be64_to_cpu(dip->di_size) <= fork_size && fork_format != XFS_DINODE_FMT_LOCAL) return __this_address; } @@ -511,9 +514,19 @@ xfs_dinode_verify( if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) return __this_address; - /* No zero-length symlinks/dirs. */ - if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) - return __this_address; + /* + * No zero-length symlinks/dirs unless they're unlinked and hence being + * inactivated. + */ + if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) { + if (dip->di_version > 1) { + if (dip->di_nlink) + return __this_address; + } else { + if (dip->di_onlink) + return __this_address; + } + } fa = xfs_dinode_verify_nrext64(mp, dip); if (fa) From 227bda0fd069986fadbef493ade6770fca580694 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 30 Apr 2025 14:27:03 -0700 Subject: [PATCH 38/97] xfs: restrict when we try to align cow fork delalloc to cowextsz hints [ Upstream commit 288e1f693f04e66be99f27e7cbe4a45936a66745 ] xfs/205 produces the following failure when always_cow is enabled: # --- a/tests/xfs/205.out 2024-02-28 16:20:24.437887970 -0800 # +++ b/tests/xfs/205.out.bad 2024-06-03 21:13:40.584000000 -0700 # @@ -1,4 +1,5 @@ # QA output created by 205 # *** one file # + !!! disk full (expected) # *** one file, a few bytes at a time # *** done This is the result of overly aggressive attempts to align cow fork delalloc reservations to the CoW extent size hint. Looking at the trace data, we're trying to append a single fsblock to the "fred" file. Trying to create a speculative post-eof reservation fails because there's not enough space. We then set @prealloc_blocks to zero and try again, but the cowextsz alignment code triggers, which expands our request for a 1-fsblock reservation into a 39-block reservation. There's not enough space for that, so the whole write fails with ENOSPC even though there's sufficient space in the filesystem to allocate the single block that we need to land the write. There are two things wrong here -- first, we shouldn't be attempting speculative preallocations beyond what was requested when we're low on space. Second, if we've already computed a posteof preallocation, we shouldn't bother trying to align that to the cowextsize hint. Fix both of these problems by adding a flag that only enables the expansion of the delalloc reservation to the cowextsize if we're doing a non-extending write, and only if we're not doing an ENOSPC retry. This requires us to move the ENOSPC retry logic to xfs_bmapi_reserve_delalloc. I probably should have caught this six years ago when 6ca30729c206d was being reviewed, but oh well. Update the comments to reflect what the code does now. Fixes: 6ca30729c206d ("xfs: bmap code cleanup") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 31 +++++++++++++++++++++++++++---- fs/xfs/xfs_iomap.c | 34 ++++++++++++---------------------- 2 files changed, 39 insertions(+), 26 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index a3c4d4a442af..14b0d230f61b 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3959,20 +3959,32 @@ xfs_bmapi_reserve_delalloc( xfs_extlen_t alen; xfs_extlen_t indlen; int error; - xfs_fileoff_t aoff = off; + xfs_fileoff_t aoff; + bool use_cowextszhint = + whichfork == XFS_COW_FORK && !prealloc; +retry: /* * Cap the alloc length. Keep track of prealloc so we know whether to * tag the inode before we return. */ + aoff = off; alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN); if (!eof) alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); if (prealloc && alen >= len) prealloc = alen - len; - /* Figure out the extent size, adjust alen */ - if (whichfork == XFS_COW_FORK) { + /* + * If we're targetting the COW fork but aren't creating a speculative + * posteof preallocation, try to expand the reservation to align with + * the COW extent size hint if there's sufficient free space. + * + * Unlike the data fork, the CoW cancellation functions will free all + * the reservations at inactivation, so we don't require that every + * delalloc reservation have a dirty pagecache. + */ + if (use_cowextszhint) { struct xfs_bmbt_irec prev; xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip); @@ -3991,7 +4003,7 @@ xfs_bmapi_reserve_delalloc( */ error = xfs_quota_reserve_blkres(ip, alen); if (error) - return error; + goto out; /* * Split changing sb for alen and indlen since they could be coming @@ -4036,6 +4048,17 @@ out_unreserve_blocks: out_unreserve_quota: if (XFS_IS_QUOTA_ON(mp)) xfs_quota_unreserve_blkres(ip, alen); +out: + if (error == -ENOSPC || error == -EDQUOT) { + trace_xfs_delalloc_enospc(ip, off, len); + + if (prealloc || use_cowextszhint) { + /* retry without any preallocation */ + use_cowextszhint = false; + prealloc = 0; + goto retry; + } + } return error; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index fab191a09442..28a1c19dfdb3 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1115,33 +1115,23 @@ xfs_buffered_write_iomap_begin( } } -retry: - error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb, - end_fsb - offset_fsb, prealloc_blocks, - allocfork == XFS_DATA_FORK ? &imap : &cmap, - allocfork == XFS_DATA_FORK ? &icur : &ccur, - allocfork == XFS_DATA_FORK ? eof : cow_eof); - switch (error) { - case 0: - break; - case -ENOSPC: - case -EDQUOT: - /* retry without any preallocation */ - trace_xfs_delalloc_enospc(ip, offset, count); - if (prealloc_blocks) { - prealloc_blocks = 0; - goto retry; - } - fallthrough; - default: - goto out_unlock; - } - if (allocfork == XFS_COW_FORK) { + error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb, + end_fsb - offset_fsb, prealloc_blocks, &cmap, + &ccur, cow_eof); + if (error) + goto out_unlock; + trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap); goto found_cow; } + error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb, + end_fsb - offset_fsb, prealloc_blocks, &imap, &icur, + eof); + if (error) + goto out_unlock; + /* * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch * them out if the write happens to fail. From 93eeb6df1605b3a24f38afdba7ab903ba6b64133 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 24 Jan 2025 17:18:33 -0800 Subject: [PATCH 39/97] KVM: x86: Load DR6 with guest value only before entering .vcpu_run() loop commit c2fee09fc167c74a64adb08656cb993ea475197e upstream. Move the conditional loading of hardware DR6 with the guest's DR6 value out of the core .vcpu_run() loop to fix a bug where KVM can load hardware with a stale vcpu->arch.dr6. When the guest accesses a DR and host userspace isn't debugging the guest, KVM disables DR interception and loads the guest's values into hardware on VM-Enter and saves them on VM-Exit. This allows the guest to access DRs at will, e.g. so that a sequence of DR accesses to configure a breakpoint only generates one VM-Exit. For DR0-DR3, the logic/behavior is identical between VMX and SVM, and also identical between KVM_DEBUGREG_BP_ENABLED (userspace debugging the guest) and KVM_DEBUGREG_WONT_EXIT (guest using DRs), and so KVM handles loading DR0-DR3 in common code, _outside_ of the core kvm_x86_ops.vcpu_run() loop. But for DR6, the guest's value doesn't need to be loaded into hardware for KVM_DEBUGREG_BP_ENABLED, and SVM provides a dedicated VMCB field whereas VMX requires software to manually load the guest value, and so loading the guest's value into DR6 is handled by {svm,vmx}_vcpu_run(), i.e. is done _inside_ the core run loop. Unfortunately, saving the guest values on VM-Exit is initiated by common x86, again outside of the core run loop. If the guest modifies DR6 (in hardware, when DR interception is disabled), and then the next VM-Exit is a fastpath VM-Exit, KVM will reload hardware DR6 with vcpu->arch.dr6 and clobber the guest's actual value. The bug shows up primarily with nested VMX because KVM handles the VMX preemption timer in the fastpath, and the window between hardware DR6 being modified (in guest context) and DR6 being read by guest software is orders of magnitude larger in a nested setup. E.g. in non-nested, the VMX preemption timer would need to fire precisely between #DB injection and the #DB handler's read of DR6, whereas with a KVM-on-KVM setup, the window where hardware DR6 is "dirty" extends all the way from L1 writing DR6 to VMRESUME (in L1). L1's view: ========== CPU 0/KVM-7289 [023] d.... 2925.640961: kvm_entry: vcpu 0 A: L1 Writes DR6 CPU 0/KVM-7289 [023] d.... 2925.640963: : Set DRs, DR6 = 0xffff0ff1 B: CPU 0/KVM-7289 [023] d.... 2925.640967: kvm_exit: vcpu 0 reason EXTERNAL_INTERRUPT intr_info 0x800000ec D: L1 reads DR6, arch.dr6 = 0 CPU 0/KVM-7289 [023] d.... 2925.640969: : Sync DRs, DR6 = 0xffff0ff0 CPU 0/KVM-7289 [023] d.... 2925.640976: kvm_entry: vcpu 0 L2 reads DR6, L1 disables DR interception CPU 0/KVM-7289 [023] d.... 2925.640980: kvm_exit: vcpu 0 reason DR_ACCESS info1 0x0000000000000216 CPU 0/KVM-7289 [023] d.... 2925.640983: kvm_entry: vcpu 0 CPU 0/KVM-7289 [023] d.... 2925.640983: : Set DRs, DR6 = 0xffff0ff0 L2 detects failure CPU 0/KVM-7289 [023] d.... 2925.640987: kvm_exit: vcpu 0 reason HLT L1 reads DR6 (confirms failure) CPU 0/KVM-7289 [023] d.... 2925.640990: : Sync DRs, DR6 = 0xffff0ff0 L0's view: ========== L2 reads DR6, arch.dr6 = 0 CPU 23/KVM-5046 [001] d.... 3410.005610: kvm_exit: vcpu 23 reason DR_ACCESS info1 0x0000000000000216 CPU 23/KVM-5046 [001] ..... 3410.005610: kvm_nested_vmexit: vcpu 23 reason DR_ACCESS info1 0x0000000000000216 L2 => L1 nested VM-Exit CPU 23/KVM-5046 [001] ..... 3410.005610: kvm_nested_vmexit_inject: reason: DR_ACCESS ext_inf1: 0x0000000000000216 CPU 23/KVM-5046 [001] d.... 3410.005610: kvm_entry: vcpu 23 CPU 23/KVM-5046 [001] d.... 3410.005611: kvm_exit: vcpu 23 reason VMREAD CPU 23/KVM-5046 [001] d.... 3410.005611: kvm_entry: vcpu 23 CPU 23/KVM-5046 [001] d.... 3410.005612: kvm_exit: vcpu 23 reason VMREAD CPU 23/KVM-5046 [001] d.... 3410.005612: kvm_entry: vcpu 23 L1 writes DR7, L0 disables DR interception CPU 23/KVM-5046 [001] d.... 3410.005612: kvm_exit: vcpu 23 reason DR_ACCESS info1 0x0000000000000007 CPU 23/KVM-5046 [001] d.... 3410.005613: kvm_entry: vcpu 23 L0 writes DR6 = 0 (arch.dr6) CPU 23/KVM-5046 [001] d.... 3410.005613: : Set DRs, DR6 = 0xffff0ff0 A: B: CPU 23/KVM-5046 [001] d.... 3410.005614: kvm_exit: vcpu 23 reason PREEMPTION_TIMER CPU 23/KVM-5046 [001] d.... 3410.005614: kvm_entry: vcpu 23 C: L0 writes DR6 = 0 (arch.dr6) CPU 23/KVM-5046 [001] d.... 3410.005614: : Set DRs, DR6 = 0xffff0ff0 L1 => L2 nested VM-Enter CPU 23/KVM-5046 [001] d.... 3410.005616: kvm_exit: vcpu 23 reason VMRESUME L0 reads DR6, arch.dr6 = 0 Reported-by: John Stultz Closes: https://lkml.kernel.org/r/CANDhNCq5_F3HfFYABqFGCA1bPd_%2BxgNj-iDQhH4tDk%2Bwi8iZZg%40mail.gmail.com Fixes: 375e28ffc0cf ("KVM: X86: Set host DR6 only on VMX and for KVM_DEBUGREG_WONT_EXIT") Fixes: d67668e9dd76 ("KVM: x86, SVM: isolate vcpu->arch.dr6 from vmcb->save.dr6") Cc: stable@vger.kernel.org Cc: Jim Mattson Tested-by: John Stultz Link: https://lore.kernel.org/r/20250125011833.3644371-1-seanjc@google.com Signed-off-by: Sean Christopherson [jth: Handled conflicts with kvm_x86_ops reshuffle] Signed-off-by: James Houghton Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/svm.c | 13 ++++++------- arch/x86/kvm/vmx/vmx.c | 11 +++++++---- arch/x86/kvm/x86.c | 3 +++ 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index abc07d004589..29bef25ac77c 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -47,6 +47,7 @@ KVM_X86_OP(set_idt) KVM_X86_OP(get_gdt) KVM_X86_OP(set_gdt) KVM_X86_OP(sync_dirty_debug_regs) +KVM_X86_OP(set_dr6) KVM_X86_OP(set_dr7) KVM_X86_OP(cache_reg) KVM_X86_OP(get_rflags) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9de3db4a32f8..eb06c2f68314 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1499,6 +1499,7 @@ struct kvm_x86_ops { void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); + void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d7d1b0f7073f..920e4a3583ab 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1920,11 +1920,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) svm->asid = sd->next_asid++; } -static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value) +static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) { - struct vmcb *vmcb = svm->vmcb; + struct vmcb *vmcb = to_svm(vcpu)->vmcb; - if (svm->vcpu.arch.guest_state_protected) + if (vcpu->arch.guest_state_protected) return; if (unlikely(value != vmcb->save.dr6)) { @@ -4035,10 +4035,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) * Run with all-zero DR6 unless needed, so that we can get the exact cause * of a #DB. */ - if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) - svm_set_dr6(svm, vcpu->arch.dr6); - else - svm_set_dr6(svm, DR6_ACTIVE_LOW); + if (likely(!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))) + svm_set_dr6(vcpu, DR6_ACTIVE_LOW); clgi(); kvm_load_guest_xsave_state(vcpu); @@ -4807,6 +4805,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_idt = svm_set_idt, .get_gdt = svm_get_gdt, .set_gdt = svm_set_gdt, + .set_dr6 = svm_set_dr6, .set_dr7 = svm_set_dr7, .sync_dirty_debug_regs = svm_sync_dirty_debug_regs, .cache_reg = svm_cache_reg, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 460ba48eb66c..6bd25401bc01 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5536,6 +5536,12 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) set_debugreg(DR6_RESERVED, 6); } +static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) +{ + lockdep_assert_irqs_disabled(); + set_debugreg(vcpu->arch.dr6, 6); +} + static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) { vmcs_writel(GUEST_DR7, val); @@ -7220,10 +7226,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->host_state.cr4 = cr4; } - /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */ - if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) - set_debugreg(vcpu->arch.dr6, 6); - /* When single-stepping over STI and MOV SS, we must clear the * corresponding interruptibility bits in the guest state. Otherwise * vmentry fails as it then expects bit 14 (BS) in pending debug @@ -8168,6 +8170,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .set_idt = vmx_set_idt, .get_gdt = vmx_get_gdt, .set_gdt = vmx_set_gdt, + .set_dr6 = vmx_set_dr6, .set_dr7 = vmx_set_dr7, .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, .cache_reg = vmx_cache_reg, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 580730b1e589..752fa116a261 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10841,6 +10841,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(vcpu->arch.eff_db[1], 1); set_debugreg(vcpu->arch.eff_db[2], 2); set_debugreg(vcpu->arch.eff_db[3], 3); + /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */ + if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) + static_call(kvm_x86_set_dr6)(vcpu, vcpu->arch.dr6); } else if (unlikely(hw_breakpoint_active())) { set_debugreg(0, 7); } From a99f5bf4f7197009859dbce14c12f8e2ce5a5a69 Mon Sep 17 00:00:00 2001 From: LongPing Wei Date: Thu, 17 Apr 2025 11:07:38 +0800 Subject: [PATCH 40/97] dm-bufio: don't schedule in atomic context commit a3d8f0a7f5e8b193db509c7191fefeed3533fc44 upstream. A BUG was reported as below when CONFIG_DEBUG_ATOMIC_SLEEP and try_verify_in_tasklet are enabled. [ 129.444685][ T934] BUG: sleeping function called from invalid context at drivers/md/dm-bufio.c:2421 [ 129.444723][ T934] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 934, name: kworker/1:4 [ 129.444740][ T934] preempt_count: 201, expected: 0 [ 129.444756][ T934] RCU nest depth: 0, expected: 0 [ 129.444781][ T934] Preemption disabled at: [ 129.444789][ T934] [] shrink_work+0x21c/0x248 [ 129.445167][ T934] kernel BUG at kernel/sched/walt/walt_debug.c:16! [ 129.445183][ T934] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP [ 129.445204][ T934] Skip md ftrace buffer dump for: 0x1609e0 [ 129.447348][ T934] CPU: 1 PID: 934 Comm: kworker/1:4 Tainted: G W OE 6.6.56-android15-8-o-g6f82312b30b9-debug #1 1400000003000000474e5500b3187743670464e8 [ 129.447362][ T934] Hardware name: Qualcomm Technologies, Inc. Parrot QRD, Alpha-M (DT) [ 129.447373][ T934] Workqueue: dm_bufio_cache shrink_work [ 129.447394][ T934] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 129.447406][ T934] pc : android_rvh_schedule_bug+0x0/0x8 [sched_walt_debug] [ 129.447435][ T934] lr : __traceiter_android_rvh_schedule_bug+0x44/0x6c [ 129.447451][ T934] sp : ffffffc0843dbc90 [ 129.447459][ T934] x29: ffffffc0843dbc90 x28: ffffffffffffffff x27: 0000000000000c8b [ 129.447479][ T934] x26: 0000000000000040 x25: ffffff804b3d6260 x24: ffffffd816232b68 [ 129.447497][ T934] x23: ffffff805171c5b4 x22: 0000000000000000 x21: ffffffd816231900 [ 129.447517][ T934] x20: ffffff80306ba898 x19: 0000000000000000 x18: ffffffc084159030 [ 129.447535][ T934] x17: 00000000d2b5dd1f x16: 00000000d2b5dd1f x15: ffffffd816720358 [ 129.447554][ T934] x14: 0000000000000004 x13: ffffff89ef978000 x12: 0000000000000003 [ 129.447572][ T934] x11: ffffffd817a823c4 x10: 0000000000000202 x9 : 7e779c5735de9400 [ 129.447591][ T934] x8 : ffffffd81560d004 x7 : 205b5d3938373434 x6 : ffffffd8167397c8 [ 129.447610][ T934] x5 : 0000000000000000 x4 : 0000000000000001 x3 : ffffffc0843db9e0 [ 129.447629][ T934] x2 : 0000000000002f15 x1 : 0000000000000000 x0 : 0000000000000000 [ 129.447647][ T934] Call trace: [ 129.447655][ T934] android_rvh_schedule_bug+0x0/0x8 [sched_walt_debug 1400000003000000474e550080cce8a8a78606b6] [ 129.447681][ T934] __might_resched+0x190/0x1a8 [ 129.447694][ T934] shrink_work+0x180/0x248 [ 129.447706][ T934] process_one_work+0x260/0x624 [ 129.447718][ T934] worker_thread+0x28c/0x454 [ 129.447729][ T934] kthread+0x118/0x158 [ 129.447742][ T934] ret_from_fork+0x10/0x20 [ 129.447761][ T934] Code: ???????? ???????? ???????? d2b5dd1f (d4210000) [ 129.447772][ T934] ---[ end trace 0000000000000000 ]--- dm_bufio_lock will call spin_lock_bh when try_verify_in_tasklet is enabled, and __scan will be called in atomic context. Fixes: 7cd326747f46 ("dm bufio: remove dm_bufio_cond_resched()") Signed-off-by: LongPing Wei Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-bufio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index ec662f97ba82..496f9176113f 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1685,7 +1685,8 @@ static void __scan(struct dm_bufio_client *c) atomic_long_dec(&c->need_shrink); freed++; } - cond_resched(); + if (!(static_branch_unlikely(&no_sleep_enabled) && c->no_sleep)) + cond_resched(); } } } From 92b0c8414ca15e3aafc46a740ac9dff2759c9166 Mon Sep 17 00:00:00 2001 From: Sheetal Date: Fri, 4 Apr 2025 10:59:53 +0000 Subject: [PATCH 41/97] ASoC: soc-pcm: Fix hw_params() and DAPM widget sequence [ Upstream commit 9aff2e8df240e84a36f2607f98a0a9924a24e65d ] Issue: When multiple audio streams share a common BE DAI, the BE DAI widget can be powered up before its hardware parameters are configured. This incorrect sequence leads to intermittent pcm_write errors. For example, the below Tegra use-case throws an error: aplay(2 streams) -> AMX(mux) -> ADX(demux) -> arecord(2 streams), here, 'AMX TX' and 'ADX RX' are common BE DAIs. For above usecase when failure happens below sequence is observed: aplay(1) FE open() - BE DAI callbacks added to the list - BE DAI state = SND_SOC_DPCM_STATE_OPEN aplay(2) FE open() - BE DAI callbacks are not added to the list as the state is already SND_SOC_DPCM_STATE_OPEN during aplay(1) FE open(). aplay(2) FE hw_params() - BE DAI hw_params() callback ignored aplay(2) FE prepare() - Widget is powered ON without BE DAI hw_params() call aplay(1) FE hw_params() - BE DAI hw_params() is now called Fix: Add BE DAIs in the list if its state is either SND_SOC_DPCM_STATE_OPEN or SND_SOC_DPCM_STATE_HW_PARAMS as well. It ensures the widget is powered ON after BE DAI hw_params() callback. Fixes: 0c25db3f7621 ("ASoC: soc-pcm: Don't reconnect an already active BE") Signed-off-by: Sheetal Link: https://patch.msgid.link/20250404105953.2784819-1-sheetal@nvidia.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-pcm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 3f998a09fc42..5a0fec90ae25 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1499,10 +1499,13 @@ static int dpcm_add_paths(struct snd_soc_pcm_runtime *fe, int stream, /* * Filter for systems with 'component_chaining' enabled. * This helps to avoid unnecessary re-configuration of an - * already active BE on such systems. + * already active BE on such systems and ensures the BE DAI + * widget is powered ON after hw_params() BE DAI callback. */ if (fe->card->component_chaining && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_NEW) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_OPEN) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_PARAMS) && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_CLOSE)) continue; From 93d646911be1e5be20d4f5d6c48359464cef0097 Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Fri, 21 Mar 2025 21:52:25 +0300 Subject: [PATCH 42/97] wifi: plfxlc: Remove erroneous assert in plfxlc_mac_release [ Upstream commit 0fb15ae3b0a9221be01715dac0335647c79f3362 ] plfxlc_mac_release() asserts that mac->lock is held. This assertion is incorrect, because even if it was possible, it would not be the valid behaviour. The function is used when probe fails or after the device is disconnected. In both cases mac->lock can not be held as the driver is not working with the device at the moment. All functions that use mac->lock unlock it just after it was held. There is also no need to hold mac->lock for plfxlc_mac_release() itself, as mac data is not affected, except for mac->flags, which is modified atomically. This bug leads to the following warning: ================================================================ WARNING: CPU: 0 PID: 127 at drivers/net/wireless/purelifi/plfxlc/mac.c:106 plfxlc_mac_release+0x7d/0xa0 Modules linked in: CPU: 0 PID: 127 Comm: kworker/0:2 Not tainted 6.1.124-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Workqueue: usb_hub_wq hub_event RIP: 0010:plfxlc_mac_release+0x7d/0xa0 drivers/net/wireless/purelifi/plfxlc/mac.c:106 Call Trace: probe+0x941/0xbd0 drivers/net/wireless/purelifi/plfxlc/usb.c:694 usb_probe_interface+0x5c0/0xaf0 drivers/usb/core/driver.c:396 really_probe+0x2ab/0xcb0 drivers/base/dd.c:639 __driver_probe_device+0x1a2/0x3d0 drivers/base/dd.c:785 driver_probe_device+0x50/0x420 drivers/base/dd.c:815 __device_attach_driver+0x2cf/0x510 drivers/base/dd.c:943 bus_for_each_drv+0x183/0x200 drivers/base/bus.c:429 __device_attach+0x359/0x570 drivers/base/dd.c:1015 bus_probe_device+0xba/0x1e0 drivers/base/bus.c:489 device_add+0xb48/0xfd0 drivers/base/core.c:3696 usb_set_configuration+0x19dd/0x2020 drivers/usb/core/message.c:2165 usb_generic_driver_probe+0x84/0x140 drivers/usb/core/generic.c:238 usb_probe_device+0x130/0x260 drivers/usb/core/driver.c:293 really_probe+0x2ab/0xcb0 drivers/base/dd.c:639 __driver_probe_device+0x1a2/0x3d0 drivers/base/dd.c:785 driver_probe_device+0x50/0x420 drivers/base/dd.c:815 __device_attach_driver+0x2cf/0x510 drivers/base/dd.c:943 bus_for_each_drv+0x183/0x200 drivers/base/bus.c:429 __device_attach+0x359/0x570 drivers/base/dd.c:1015 bus_probe_device+0xba/0x1e0 drivers/base/bus.c:489 device_add+0xb48/0xfd0 drivers/base/core.c:3696 usb_new_device+0xbdd/0x18f0 drivers/usb/core/hub.c:2620 hub_port_connect drivers/usb/core/hub.c:5477 [inline] hub_port_connect_change drivers/usb/core/hub.c:5617 [inline] port_event drivers/usb/core/hub.c:5773 [inline] hub_event+0x2efe/0x5730 drivers/usb/core/hub.c:5855 process_one_work+0x8a9/0x11d0 kernel/workqueue.c:2292 worker_thread+0xa47/0x1200 kernel/workqueue.c:2439 kthread+0x28d/0x320 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 ================================================================ Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 68d57a07bfe5 ("wireless: add plfxlc driver for pureLiFi X, XL, XC devices") Reported-by: syzbot+7d4f142f6c288de8abfe@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7d4f142f6c288de8abfe Signed-off-by: Murad Masimov Link: https://patch.msgid.link/20250321185226.71-2-m.masimov@mt-integration.ru Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/purelifi/plfxlc/mac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/purelifi/plfxlc/mac.c b/drivers/net/wireless/purelifi/plfxlc/mac.c index 87a4ff888ddd..70d6f5244e5e 100644 --- a/drivers/net/wireless/purelifi/plfxlc/mac.c +++ b/drivers/net/wireless/purelifi/plfxlc/mac.c @@ -103,7 +103,6 @@ int plfxlc_mac_init_hw(struct ieee80211_hw *hw) void plfxlc_mac_release(struct plfxlc_mac *mac) { plfxlc_chip_release(&mac->chip); - lockdep_assert_held(&mac->lock); } int plfxlc_op_start(struct ieee80211_hw *hw) From 2d4a121296aa3940d2df9906f955c2b6b4e38bc3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 23 Apr 2025 17:51:31 +0300 Subject: [PATCH 43/97] vxlan: vnifilter: Fix unlocked deletion of default FDB entry [ Upstream commit 087a9eb9e5978e3ba362e1163691e41097e8ca20 ] When a VNI is deleted from a VXLAN device in 'vnifilter' mode, the FDB entry associated with the default remote (assuming one was configured) is deleted without holding the hash lock. This is wrong and will result in a warning [1] being generated by the lockdep annotation that was added by commit ebe642067455 ("vxlan: Create wrappers for FDB lookup"). Reproducer: # ip link add vx0 up type vxlan dstport 4789 external vnifilter local 192.0.2.1 # bridge vni add vni 10010 remote 198.51.100.1 dev vx0 # bridge vni del vni 10010 dev vx0 Fix by acquiring the hash lock before the deletion and releasing it afterwards. Blame the original commit that introduced the issue rather than the one that exposed it. [1] WARNING: CPU: 3 PID: 392 at drivers/net/vxlan/vxlan_core.c:417 vxlan_find_mac+0x17f/0x1a0 [...] RIP: 0010:vxlan_find_mac+0x17f/0x1a0 [...] Call Trace: __vxlan_fdb_delete+0xbe/0x560 vxlan_vni_delete_group+0x2ba/0x940 vxlan_vni_del.isra.0+0x15f/0x580 vxlan_process_vni_filter+0x38b/0x7b0 vxlan_vnifilter_process+0x3bb/0x510 rtnetlink_rcv_msg+0x2f7/0xb70 netlink_rcv_skb+0x131/0x360 netlink_unicast+0x426/0x710 netlink_sendmsg+0x75a/0xc20 __sock_sendmsg+0xc1/0x150 ____sys_sendmsg+0x5aa/0x7b0 ___sys_sendmsg+0xfc/0x180 __sys_sendmsg+0x121/0x1b0 do_syscall_64+0xbb/0x1d0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fixes: f9c4bb0b245c ("vxlan: vni filtering support on collect metadata device") Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250423145131.513029-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/vxlan/vxlan_vnifilter.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c index 1ffc00e27080..c6d4fae958ca 100644 --- a/drivers/net/vxlan/vxlan_vnifilter.c +++ b/drivers/net/vxlan/vxlan_vnifilter.c @@ -627,7 +627,11 @@ static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, * default dst remote_ip previously added for this vni */ if (!vxlan_addr_any(&vninode->remote_ip) || - !vxlan_addr_any(&dst->remote_ip)) + !vxlan_addr_any(&dst->remote_ip)) { + u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, + vninode->vni); + + spin_lock_bh(&vxlan->hash_lock[hash_index]); __vxlan_fdb_delete(vxlan, all_zeros_mac, (vxlan_addr_any(&vninode->remote_ip) ? dst->remote_ip : vninode->remote_ip), @@ -635,6 +639,8 @@ static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, vninode->vni, vninode->vni, dst->remote_ifindex, true); + spin_unlock_bh(&vxlan->hash_lock[hash_index]); + } if (vxlan->dev->flags & IFF_UP) { if (vxlan_addr_multicast(&vninode->remote_ip) && From 455a5261c98a273d1765910a377d18f0a4062c86 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 23 Apr 2025 11:36:08 +0300 Subject: [PATCH 44/97] net/mlx5: E-Switch, Initialize MAC Address for Default GID [ Upstream commit 5d1a04f347e6cbf5ffe74da409a5d71fbe8c5f19 ] Initialize the source MAC address when creating the default GID entry. Since this entry is used only for loopback traffic, it only needs to be a unicast address. A zeroed-out MAC address is sufficient for this purpose. Without this fix, random bits would be assigned as the source address. If these bits formed a multicast address, the firmware would return an error, preventing the user from switching to switchdev mode: Error: mlx5_core: Failed setting eswitch to offloads. kernel answers: Invalid argument Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic") Signed-off-by: Maor Gottlieb Signed-off-by: Mark Bloch Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250423083611.324567-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 540cf05f6373..ab5afa6c5e0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -130,8 +130,8 @@ static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid * static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev) { + u8 mac[ETH_ALEN] = {}; union ib_gid gid; - u8 mac[ETH_ALEN]; mlx5_rdma_make_default_gid(dev, &gid); return mlx5_core_roce_gid_set(dev, 0, From 901346b67301ec3559a79c8418689dc573589e28 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Wed, 23 Apr 2025 11:36:11 +0300 Subject: [PATCH 45/97] net/mlx5: E-switch, Fix error handling for enabling roce [ Upstream commit 90538d23278a981e344d364e923162fce752afeb ] The cited commit assumes enabling roce always succeeds. But it is not true. Add error handling for it. Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Reviewed-by: Maor Gottlieb Signed-off-by: Mark Bloch Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250423083611.324567-6-mbloch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 5 ++++- drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 9 +++++---- drivers/net/ethernet/mellanox/mlx5/core/rdma.h | 4 ++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 433cdd0a2cf3..5237abbdcda1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3320,7 +3320,9 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) int err; mutex_init(&esw->offloads.termtbl_mutex); - mlx5_rdma_enable_roce(esw->dev); + err = mlx5_rdma_enable_roce(esw->dev); + if (err) + goto err_roce; err = mlx5_esw_host_number_init(esw); if (err) @@ -3378,6 +3380,7 @@ err_vport_metadata: esw_offloads_metadata_uninit(esw); err_metadata: mlx5_rdma_disable_roce(esw->dev); +err_roce: mutex_destroy(&esw->offloads.termtbl_mutex); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index ab5afa6c5e0f..e61a4fa46d77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -152,17 +152,17 @@ void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) mlx5_nic_vport_disable_roce(dev); } -void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { int err; if (!MLX5_CAP_GEN(dev, roce)) - return; + return 0; err = mlx5_nic_vport_enable_roce(dev); if (err) { mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err); - return; + return err; } err = mlx5_rdma_add_roce_addr(dev); @@ -177,10 +177,11 @@ void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) goto del_roce_addr; } - return; + return err; del_roce_addr: mlx5_rdma_del_roce_addr(dev); disable_roce: mlx5_nic_vport_disable_roce(dev); + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h index 750cff2a71a4..3d9e76c3d42f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h @@ -8,12 +8,12 @@ #ifdef CONFIG_MLX5_ESWITCH -void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev); +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev); void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ -static inline void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) {} +static inline int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {} #endif /* CONFIG_MLX5_ESWITCH */ From 2ab2780146c5c383a537525b8fbd1597a3e17281 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:07 +0300 Subject: [PATCH 46/97] net: mscc: ocelot: treat 802.1ad tagged traffic as 802.1Q-untagged [ Upstream commit 36dd1141be70b5966906919714dc504a24c65ddf ] I was revisiting the topic of 802.1ad treatment in the Ocelot switch [0] and realized that not only is its basic VLAN classification pipeline improper for offloading vlan_protocol 802.1ad bridges, but also improper for offloading regular 802.1Q bridges already. Namely, 802.1ad-tagged traffic should be treated as VLAN-untagged by bridged ports, but this switch treats it as if it was 802.1Q-tagged with the same VID as in the 802.1ad header. This is markedly different to what the Linux bridge expects; see the "other_tpid()" function in tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh. An idea came to me that the VCAP IS1 TCAM is more powerful than I'm giving it credit for, and that it actually overwrites the classified VID before the VLAN Table lookup takes place. In other words, it can be used even to save a packet from being dropped on ingress due to VLAN membership. Add a sophisticated TCAM rule hardcoded into the driver to force the switch to behave like a Linux bridge with vlan_filtering 1 vlan_protocol 802.1Q. Regarding the lifetime of the filter: eventually the bridge will disappear, and vlan_filtering on the port will be restored to 0 for standalone mode. Then the filter will be deleted. [0]: https://lore.kernel.org/netdev/20201009122947.nvhye4hvcha3tljh@skbuf/ Fixes: 7142529f1688 ("net: mscc: ocelot: add VLAN filtering") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller Stable-dep-of: 5ec6d7d737a4 ("net: mscc: ocelot: delete PVID VLAN when readding it as non-PVID") Signed-off-by: Sasha Levin --- drivers/net/ethernet/mscc/ocelot.c | 188 ++++++++++++++++++++++-- drivers/net/ethernet/mscc/ocelot_vcap.c | 1 + include/soc/mscc/ocelot_vcap.h | 2 + 3 files changed, 180 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 71dbdac38020..ec644a201b8e 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -416,9 +416,158 @@ static u16 ocelot_vlan_unaware_pvid(struct ocelot *ocelot, return VLAN_N_VID - bridge_num - 1; } +/** + * ocelot_update_vlan_reclassify_rule() - Make switch aware only to bridge VLAN TPID + * + * @ocelot: Switch private data structure + * @port: Index of ingress port + * + * IEEE 802.1Q-2018 clauses "5.5 C-VLAN component conformance" and "5.6 S-VLAN + * component conformance" suggest that a C-VLAN component should only recognize + * and filter on C-Tags, and an S-VLAN component should only recognize and + * process based on C-Tags. + * + * In Linux, as per commit 1a0b20b25732 ("Merge branch 'bridge-next'"), C-VLAN + * components are largely represented by a bridge with vlan_protocol 802.1Q, + * and S-VLAN components by a bridge with vlan_protocol 802.1ad. + * + * Currently the driver only offloads vlan_protocol 802.1Q, but the hardware + * design is non-conformant, because the switch assigns each frame to a VLAN + * based on an entirely different question, as detailed in figure "Basic VLAN + * Classification Flow" from its manual and reproduced below. + * + * Set TAG_TYPE, PCP, DEI, VID to port-default values in VLAN_CFG register + * if VLAN_AWARE_ENA[port] and frame has outer tag then: + * if VLAN_INNER_TAG_ENA[port] and frame has inner tag then: + * TAG_TYPE = (Frame.InnerTPID <> 0x8100) + * Set PCP, DEI, VID to values from inner VLAN header + * else: + * TAG_TYPE = (Frame.OuterTPID <> 0x8100) + * Set PCP, DEI, VID to values from outer VLAN header + * if VID == 0 then: + * VID = VLAN_CFG.VLAN_VID + * + * Summarized, the switch will recognize both 802.1Q and 802.1ad TPIDs as VLAN + * "with equal rights", and just set the TAG_TYPE bit to 0 (if 802.1Q) or to 1 + * (if 802.1ad). It will classify based on whichever of the tags is "outer", no + * matter what TPID that may have (or "inner", if VLAN_INNER_TAG_ENA[port]). + * + * In the VLAN Table, the TAG_TYPE information is not accessible - just the + * classified VID is - so it is as if each VLAN Table entry is for 2 VLANs: + * C-VLAN X, and S-VLAN X. + * + * Whereas the Linux bridge behavior is to only filter on frames with a TPID + * equal to the vlan_protocol, and treat everything else as VLAN-untagged. + * + * Consider an ingress packet tagged with 802.1ad VID=3 and 802.1Q VID=5, + * received on a bridge vlan_filtering=1 vlan_protocol=802.1Q port. This frame + * should be treated as 802.1Q-untagged, and classified to the PVID of that + * bridge port. Not to VID=3, and not to VID=5. + * + * The VCAP IS1 TCAM has everything we need to overwrite the choices made in + * the basic VLAN classification pipeline: it can match on TAG_TYPE in the key, + * and it can modify the classified VID in the action. Thus, for each port + * under a vlan_filtering bridge, we can insert a rule in VCAP IS1 lookup 0 to + * match on 802.1ad tagged frames and modify their classified VID to the 802.1Q + * PVID of the port. This effectively makes it appear to the outside world as + * if those packets were processed as VLAN-untagged. + * + * The rule needs to be updated each time the bridge PVID changes, and needs + * to be deleted if the bridge PVID is deleted, or if the port becomes + * VLAN-unaware. + */ +static int ocelot_update_vlan_reclassify_rule(struct ocelot *ocelot, int port) +{ + unsigned long cookie = OCELOT_VCAP_IS1_VLAN_RECLASSIFY(ocelot, port); + struct ocelot_vcap_block *block_vcap_is1 = &ocelot->block[VCAP_IS1]; + struct ocelot_port *ocelot_port = ocelot->ports[port]; + const struct ocelot_bridge_vlan *pvid_vlan; + struct ocelot_vcap_filter *filter; + int err, val, pcp, dei; + bool vid_replace_ena; + u16 vid; + + pvid_vlan = ocelot_port->pvid_vlan; + vid_replace_ena = ocelot_port->vlan_aware && pvid_vlan; + + filter = ocelot_vcap_block_find_filter_by_id(block_vcap_is1, cookie, + false); + if (!vid_replace_ena) { + /* If the reclassification filter doesn't need to exist, delete + * it if it was previously installed, and exit doing nothing + * otherwise. + */ + if (filter) + return ocelot_vcap_filter_del(ocelot, filter); + + return 0; + } + + /* The reclassification rule must apply. See if it already exists + * or if it must be created. + */ + + /* Treating as VLAN-untagged means using as classified VID equal to + * the bridge PVID, and PCP/DEI set to the port default QoS values. + */ + vid = pvid_vlan->vid; + val = ocelot_read_gix(ocelot, ANA_PORT_QOS_CFG, port); + pcp = ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL_X(val); + dei = !!(val & ANA_PORT_QOS_CFG_DP_DEFAULT_VAL); + + if (filter) { + bool changed = false; + + /* Filter exists, just update it */ + if (filter->action.vid != vid) { + filter->action.vid = vid; + changed = true; + } + if (filter->action.pcp != pcp) { + filter->action.pcp = pcp; + changed = true; + } + if (filter->action.dei != dei) { + filter->action.dei = dei; + changed = true; + } + + if (!changed) + return 0; + + return ocelot_vcap_filter_replace(ocelot, filter); + } + + /* Filter doesn't exist, create it */ + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + filter->key_type = OCELOT_VCAP_KEY_ANY; + filter->ingress_port_mask = BIT(port); + filter->vlan.tpid = OCELOT_VCAP_BIT_1; + filter->prio = 1; + filter->id.cookie = cookie; + filter->id.tc_offload = false; + filter->block_id = VCAP_IS1; + filter->type = OCELOT_VCAP_FILTER_OFFLOAD; + filter->lookup = 0; + filter->action.vid_replace_ena = true; + filter->action.pcp_dei_ena = true; + filter->action.vid = vid; + filter->action.pcp = pcp; + filter->action.dei = dei; + + err = ocelot_vcap_filter_add(ocelot, filter, NULL); + if (err) + kfree(filter); + + return err; +} + /* Default vlan to clasify for untagged frames (may be zero) */ -static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, - const struct ocelot_bridge_vlan *pvid_vlan) +static int ocelot_port_set_pvid(struct ocelot *ocelot, int port, + const struct ocelot_bridge_vlan *pvid_vlan) { struct ocelot_port *ocelot_port = ocelot->ports[port]; u16 pvid = ocelot_vlan_unaware_pvid(ocelot, ocelot_port->bridge); @@ -438,15 +587,23 @@ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, * happens automatically), but also 802.1p traffic which gets * classified to VLAN 0, but that is always in our RX filter, so it * would get accepted were it not for this setting. + * + * Also, we only support the bridge 802.1Q VLAN protocol, so + * 802.1ad-tagged frames (carrying S-Tags) should be considered + * 802.1Q-untagged, and also dropped. */ if (!pvid_vlan && ocelot_port->vlan_aware) val = ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | - ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA; + ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA | + ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA; ocelot_rmw_gix(ocelot, val, ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | - ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA, + ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA | + ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA, ANA_PORT_DROP_CFG, port); + + return ocelot_update_vlan_reclassify_rule(ocelot, port); } static struct ocelot_bridge_vlan *ocelot_bridge_vlan_find(struct ocelot *ocelot, @@ -594,7 +751,10 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, ANA_PORT_VLAN_CFG_VLAN_POP_CNT_M, ANA_PORT_VLAN_CFG, port); - ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan); + err = ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan); + if (err) + return err; + ocelot_port_manage_port_tag(ocelot, port); return 0; @@ -647,9 +807,12 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, return err; /* Default ingress vlan classification */ - if (pvid) - ocelot_port_set_pvid(ocelot, port, - ocelot_bridge_vlan_find(ocelot, vid)); + if (pvid) { + err = ocelot_port_set_pvid(ocelot, port, + ocelot_bridge_vlan_find(ocelot, vid)); + if (err) + return err; + } /* Untagged egress vlan clasification */ ocelot_port_manage_port_tag(ocelot, port); @@ -675,8 +838,11 @@ int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid) return err; /* Ingress */ - if (del_pvid) - ocelot_port_set_pvid(ocelot, port, NULL); + if (del_pvid) { + err = ocelot_port_set_pvid(ocelot, port, NULL); + if (err) + return err; + } /* Egress */ ocelot_port_manage_port_tag(ocelot, port); @@ -2502,7 +2668,7 @@ int ocelot_port_set_default_prio(struct ocelot *ocelot, int port, u8 prio) ANA_PORT_QOS_CFG, port); - return 0; + return ocelot_update_vlan_reclassify_rule(ocelot, port); } EXPORT_SYMBOL_GPL(ocelot_port_set_default_prio); diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index 73cdec5ca6a3..5734b86aed5b 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -695,6 +695,7 @@ static void is1_entry_set(struct ocelot *ocelot, int ix, vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_L2_MC, filter->dmac_mc); vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_L2_BC, filter->dmac_bc); vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_VLAN_TAGGED, tag->tagged); + vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_TPID, tag->tpid); vcap_key_set(vcap, &data, VCAP_IS1_HK_VID, tag->vid.value, tag->vid.mask); vcap_key_set(vcap, &data, VCAP_IS1_HK_PCP, diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index c601a4598b0d..eb19668a06db 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -13,6 +13,7 @@ */ #define OCELOT_VCAP_ES0_TAG_8021Q_RXVLAN(ocelot, port, upstream) ((upstream) << 16 | (port)) #define OCELOT_VCAP_IS1_TAG_8021Q_TXVLAN(ocelot, port) (port) +#define OCELOT_VCAP_IS1_VLAN_RECLASSIFY(ocelot, port) ((ocelot)->num_phys_ports + (port)) #define OCELOT_VCAP_IS2_TAG_8021Q_TXVLAN(ocelot, port) (port) #define OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, port) ((ocelot)->num_phys_ports + (port)) #define OCELOT_VCAP_IS2_MRP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2) @@ -499,6 +500,7 @@ struct ocelot_vcap_key_vlan { struct ocelot_vcap_u8 pcp; /* PCP (3 bit) */ enum ocelot_vcap_bit dei; /* DEI */ enum ocelot_vcap_bit tagged; /* Tagged/untagged frame */ + enum ocelot_vcap_bit tpid; }; struct ocelot_vcap_key_etype { From 4195bd195ae1e659cb422d0414f628a6575e675f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 25 Apr 2025 01:37:33 +0300 Subject: [PATCH 47/97] net: mscc: ocelot: delete PVID VLAN when readding it as non-PVID [ Upstream commit 5ec6d7d737a491256cd37e33910f7ac1978db591 ] The following set of commands: ip link add br0 type bridge vlan_filtering 1 # vlan_default_pvid 1 is implicit ip link set swp0 master br0 bridge vlan add dev swp0 vid 1 should result in the dropping of untagged and 802.1p-tagged traffic, but we see that it continues to be accepted. Whereas, had we deleted VID 1 instead, the aforementioned dropping would have worked This is because the ANA_PORT_DROP_CFG update logic doesn't run, because ocelot_vlan_add() only calls ocelot_port_set_pvid() if the new VLAN has the BRIDGE_VLAN_INFO_PVID flag. Similar to other drivers like mt7530_port_vlan_add() which handle this case correctly, we need to test whether the VLAN we're changing used to have the BRIDGE_VLAN_INFO_PVID flag, but lost it now. That amounts to a PVID deletion and should be treated as such. Regarding blame attribution: this never worked properly since the introduction of bridge VLAN filtering in commit 7142529f1688 ("net: mscc: ocelot: add VLAN filtering"). However, there was a significant paradigm shift which aligned the ANA_PORT_DROP_CFG register with the PVID concept rather than with the native VLAN concept, and that change wasn't targeted for 'stable'. Realistically, that is as far as this fix needs to be propagated to. Fixes: be0576fed6d3 ("net: mscc: ocelot: move the logic to drop 802.1p traffic to the pvid deletion") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250424223734.3096202-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mscc/ocelot.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index ec644a201b8e..203cb4978544 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -793,6 +793,7 @@ EXPORT_SYMBOL(ocelot_vlan_prepare); int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, bool untagged) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; int err; /* Ignore VID 0 added to our RX filter by the 8021q module, since @@ -812,6 +813,11 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, ocelot_bridge_vlan_find(ocelot, vid)); if (err) return err; + } else if (ocelot_port->pvid_vlan && + ocelot_bridge_vlan_find(ocelot, vid) == ocelot_port->pvid_vlan) { + err = ocelot_port_set_pvid(ocelot, port, NULL); + if (err) + return err; } /* Untagged egress vlan clasification */ From 94107259f972d2fd896dbbcaa176b3b2451ff9e5 Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Thu, 24 Apr 2025 10:38:48 +0200 Subject: [PATCH 48/97] net: ethernet: mtk-star-emac: fix spinlock recursion issues on rx/tx poll [ Upstream commit 6fe0866014486736cc3ba1c6fd4606d3dbe55c9c ] Use spin_lock_irqsave and spin_unlock_irqrestore instead of spin_lock and spin_unlock in mtk_star_emac driver to avoid spinlock recursion occurrence that can happen when enabling the DMA interrupts again in rx/tx poll. ``` BUG: spinlock recursion on CPU#0, swapper/0/0 lock: 0xffff00000db9cf20, .magic: dead4ead, .owner: swapper/0/0, .owner_cpu: 0 CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.15.0-rc2-next-20250417-00001-gf6a27738686c-dirty #28 PREEMPT Hardware name: MediaTek MT8365 Open Platform EVK (DT) Call trace: show_stack+0x18/0x24 (C) dump_stack_lvl+0x60/0x80 dump_stack+0x18/0x24 spin_dump+0x78/0x88 do_raw_spin_lock+0x11c/0x120 _raw_spin_lock+0x20/0x2c mtk_star_handle_irq+0xc0/0x22c [mtk_star_emac] __handle_irq_event_percpu+0x48/0x140 handle_irq_event+0x4c/0xb0 handle_fasteoi_irq+0xa0/0x1bc handle_irq_desc+0x34/0x58 generic_handle_domain_irq+0x1c/0x28 gic_handle_irq+0x4c/0x120 do_interrupt_handler+0x50/0x84 el1_interrupt+0x34/0x68 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x6c/0x70 regmap_mmio_read32le+0xc/0x20 (P) _regmap_bus_reg_read+0x6c/0xac _regmap_read+0x60/0xdc regmap_read+0x4c/0x80 mtk_star_rx_poll+0x2f4/0x39c [mtk_star_emac] __napi_poll+0x38/0x188 net_rx_action+0x164/0x2c0 handle_softirqs+0x100/0x244 __do_softirq+0x14/0x20 ____do_softirq+0x10/0x20 call_on_irq_stack+0x24/0x64 do_softirq_own_stack+0x1c/0x40 __irq_exit_rcu+0xd4/0x10c irq_exit_rcu+0x10/0x1c el1_interrupt+0x38/0x68 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x6c/0x70 cpuidle_enter_state+0xac/0x320 (P) cpuidle_enter+0x38/0x50 do_idle+0x1e4/0x260 cpu_startup_entry+0x34/0x3c rest_init+0xdc/0xe0 console_on_rootfs+0x0/0x6c __primary_switched+0x88/0x90 ``` Fixes: 0a8bd81fd6aa ("net: ethernet: mtk-star-emac: separate tx/rx handling with two NAPIs") Signed-off-by: Louis-Alexis Eyraud Reviewed-by: Maxime Chevallier Acked-by: Bartosz Golaszewski Link: https://patch.msgid.link/20250424-mtk_star_emac-fix-spinlock-recursion-issue-v2-1-f3fde2e529d8@collabora.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mediatek/mtk_star_emac.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index ad27749c0931..fd729469b29f 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1164,6 +1164,7 @@ static int mtk_star_tx_poll(struct napi_struct *napi, int budget) struct net_device *ndev = priv->ndev; unsigned int head = ring->head; unsigned int entry = ring->tail; + unsigned long flags; while (entry != head && count < (MTK_STAR_RING_NUM_DESCS - 1)) { ret = mtk_star_tx_complete_one(priv); @@ -1183,9 +1184,9 @@ static int mtk_star_tx_poll(struct napi_struct *napi, int budget) netif_wake_queue(ndev); if (napi_complete(napi)) { - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); mtk_star_enable_dma_irq(priv, false, true); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } return 0; @@ -1342,6 +1343,7 @@ push_new_skb: static int mtk_star_rx_poll(struct napi_struct *napi, int budget) { struct mtk_star_priv *priv; + unsigned long flags; int work_done = 0; priv = container_of(napi, struct mtk_star_priv, rx_napi); @@ -1349,9 +1351,9 @@ static int mtk_star_rx_poll(struct napi_struct *napi, int budget) work_done = mtk_star_rx(priv, budget); if (work_done < budget) { napi_complete_done(napi, work_done); - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); mtk_star_enable_dma_irq(priv, true, false); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } return work_done; From eaf24941380160b4e823427a3b0371c50c8bc000 Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Thu, 24 Apr 2025 10:38:49 +0200 Subject: [PATCH 49/97] net: ethernet: mtk-star-emac: rearm interrupts in rx_poll only when advised [ Upstream commit e54b4db35e201a9173da9cb7abc8377e12abaf87 ] In mtk_star_rx_poll function, on event processing completion, the mtk_star_emac driver calls napi_complete_done but ignores its return code and enable RX DMA interrupts inconditionally. This return code gives the info if a device should avoid rearming its interrupts or not, so fix this behaviour by taking it into account. Fixes: 8c7bd5a454ff ("net: ethernet: mtk-star-emac: new driver") Signed-off-by: Louis-Alexis Eyraud Acked-by: Bartosz Golaszewski Link: https://patch.msgid.link/20250424-mtk_star_emac-fix-spinlock-recursion-issue-v2-2-f3fde2e529d8@collabora.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mediatek/mtk_star_emac.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index fd729469b29f..c42e9f741f95 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1349,8 +1349,7 @@ static int mtk_star_rx_poll(struct napi_struct *napi, int budget) priv = container_of(napi, struct mtk_star_priv, rx_napi); work_done = mtk_star_rx(priv, budget); - if (work_done < budget) { - napi_complete_done(napi, work_done); + if (work_done < budget && napi_complete_done(napi, work_done)) { spin_lock_irqsave(&priv->lock, flags); mtk_star_enable_dma_irq(priv, true, false); spin_unlock_irqrestore(&priv->lock, flags); From 4f0ecf50cdf76da95828578a92f130b653ac2fcf Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:05 -0300 Subject: [PATCH 50/97] net_sched: drr: Fix double list add in class with netem as child qdisc [ Upstream commit f99a3fbf023e20b626be4b0f042463d598050c9a ] As described in Gerrard's report [1], there are use cases where a netem child qdisc will make the parent qdisc's enqueue callback reentrant. In the case of drr, there won't be a UAF, but the code will add the same classifier to the list twice, which will cause memory corruption. In addition to checking for qlen being zero, this patch checks whether the class was already added to the active_list (cl_is_active) before adding to the list to cover for the reentrant case. [1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/ Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-2-victor@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_drr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index e35a4e90f4e6..b35d6086a972 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -36,6 +36,11 @@ struct drr_sched { struct Qdisc_class_hash clhash; }; +static bool cl_is_active(struct drr_class *cl) +{ + return !list_empty(&cl->alist); +} + static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) { struct drr_sched *q = qdisc_priv(sch); @@ -335,7 +340,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; int err = 0; - bool first; cl = drr_classify(skb, sch, &err); if (cl == NULL) { @@ -345,7 +349,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -355,7 +358,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - if (first) { + if (!cl_is_active(cl)) { list_add_tail(&cl->alist, &q->active); cl->deficit = cl->quantum; } From 8df7d37d626430035b413b97cee18396b3450bef Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:06 -0300 Subject: [PATCH 51/97] net_sched: hfsc: Fix a UAF vulnerability in class with netem as child qdisc [ Upstream commit 141d34391abbb315d68556b7c67ad97885407547 ] As described in Gerrard's report [1], we have a UAF case when an hfsc class has a netem child qdisc. The crux of the issue is that hfsc is assuming that checking for cl->qdisc->q.qlen == 0 guarantees that it hasn't inserted the class in the vttree or eltree (which is not true for the netem duplicate case). This patch checks the n_active class variable to make sure that the code won't insert the class in the vttree or eltree twice, catering for the reentrant case. [1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/ Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Reported-by: Gerrard Tai Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-3-victor@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_hfsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index dbed490aafd3..4b9f323845b9 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1564,7 +1564,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) return err; } - if (first) { + if (first && !cl->cl_nactive) { if (cl->cl_flags & HFSC_RSC) init_ed(cl, len); if (cl->cl_flags & HFSC_FSC) From 9efb6a0fa88e0910d079fdfeb4f7ce4d4ac6c990 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:07 -0300 Subject: [PATCH 52/97] net_sched: ets: Fix double list add in class with netem as child qdisc [ Upstream commit 1a6d0c00fa07972384b0c308c72db091d49988b6 ] As described in Gerrard's report [1], there are use cases where a netem child qdisc will make the parent qdisc's enqueue callback reentrant. In the case of ets, there won't be a UAF, but the code will add the same classifier to the list twice, which will cause memory corruption. In addition to checking for qlen being zero, this patch checks whether the class was already added to the active_list (cl_is_active) before doing the addition to cater for the reentrant case. [1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/ Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-4-victor@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_ets.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 9fd70462b41d..18269f86d21e 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -74,6 +74,11 @@ static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = { [TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 }, }; +static bool cl_is_active(struct ets_class *cl) +{ + return !list_empty(&cl->alist); +} + static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr, unsigned int *quantum, struct netlink_ext_ack *extack) @@ -416,7 +421,6 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct ets_sched *q = qdisc_priv(sch); struct ets_class *cl; int err = 0; - bool first; cl = ets_classify(skb, sch, &err); if (!cl) { @@ -426,7 +430,6 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -436,7 +439,7 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - if (first && !ets_class_is_strict(q, cl)) { + if (!cl_is_active(cl) && !ets_class_is_strict(q, cl)) { list_add_tail(&cl->alist, &q->active); cl->deficit = cl->quantum; } From 0aa23e0856b7cedb3c88d8e3d281c212c7e4fbeb Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:08 -0300 Subject: [PATCH 53/97] net_sched: qfq: Fix double list add in class with netem as child qdisc [ Upstream commit f139f37dcdf34b67f5bf92bc8e0f7f6b3ac63aa4 ] As described in Gerrard's report [1], there are use cases where a netem child qdisc will make the parent qdisc's enqueue callback reentrant. In the case of qfq, there won't be a UAF, but the code will add the same classifier to the list twice, which will cause memory corruption. This patch checks whether the class was already added to the agg->active list (cl_is_active) before doing the addition to cater for the reentrant case. [1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/ Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-5-victor@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_qfq.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index ed01634af82c..e6743e17408b 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -204,6 +204,11 @@ struct qfq_sched { */ enum update_reason {enqueue, requeue}; +static bool cl_is_active(struct qfq_class *cl) +{ + return !list_empty(&cl->alist); +} + static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) { struct qfq_sched *q = qdisc_priv(sch); @@ -1216,7 +1221,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct qfq_class *cl; struct qfq_aggregate *agg; int err = 0; - bool first; cl = qfq_classify(skb, sch, &err); if (cl == NULL) { @@ -1238,7 +1242,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; - first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { pr_debug("qfq_enqueue: enqueue failed %d\n", err); @@ -1254,8 +1257,8 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, ++sch->q.qlen; agg = cl->agg; - /* if the queue was not empty, then done here */ - if (!first) { + /* if the class is active, then done here */ + if (cl_is_active(cl)) { if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) && list_first_entry(&agg->active, struct qfq_class, alist) == cl && cl->deficit < len) From 0561f2e374c3732b90e50f0a244791a4308ec67e Mon Sep 17 00:00:00 2001 From: Xuanqiang Luo Date: Fri, 25 Apr 2025 15:26:32 -0700 Subject: [PATCH 54/97] ice: Check VF VSI Pointer Value in ice_vc_add_fdir_fltr() [ Upstream commit 425c5f266b2edeee0ce16fedd8466410cdcfcfe3 ] As mentioned in the commit baeb705fd6a7 ("ice: always check VF VSI pointer values"), we need to perform a null pointer check on the return value of ice_get_vf_vsi() before using it. Fixes: 6ebbe97a4881 ("ice: Add a per-VF limit on number of FDIR filters") Signed-off-by: Xuanqiang Luo Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20250425222636.3188441-3-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index bff3e9662a8f..a9df95088df3 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -1811,6 +1811,11 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg) pf = vf->pf; dev = ice_pf_to_dev(pf); vf_vsi = ice_get_vf_vsi(vf); + if (!vf_vsi) { + dev_err(dev, "Can not get FDIR vf_vsi for VF %u\n", vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err_exit; + } #define ICE_VF_MAX_FDIR_FILTERS 128 if (!ice_fdir_num_avail_fltr(&pf->hw, vf_vsi) || From 9f9c9d1c0858c3ac5dd62cc5e6d68050e2a3e601 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 25 Apr 2025 16:50:47 +0100 Subject: [PATCH 55/97] net: dlink: Correct endianness handling of led_mode [ Upstream commit e7e5ae71831c44d58627a991e603845a2fed2cab ] As it's name suggests, parse_eeprom() parses EEPROM data. This is done by reading data, 16 bits at a time as follows: for (i = 0; i < 128; i++) ((__le16 *) sromdata)[i] = cpu_to_le16(read_eeprom(np, i)); sromdata is at the same memory location as psrom. And the type of psrom is a pointer to struct t_SROM. As can be seen in the loop above, data is stored in sromdata, and thus psrom, as 16-bit little-endian values. However, the integer fields of t_SROM are host byte order integers. And in the case of led_mode this leads to a little endian value being incorrectly treated as host byte order. Looking at rio_set_led_mode, this does appear to be a bug as that code masks led_mode with 0x1, 0x2 and 0x8. Logic that would be effected by a reversed byte order. This problem would only manifest on big endian hosts. Found by inspection while investigating a sparse warning regarding the crc field of t_SROM. I believe that warning is a false positive. And although I plan to send a follow-up to use little-endian types for other the integer fields of PSROM_t I do not believe that will involve any bug fixes. Compile tested only. Fixes: c3f45d322cbd ("dl2k: Add support for IP1000A-based cards") Signed-off-by: Simon Horman Link: https://patch.msgid.link/20250425-dlink-led-mode-v1-1-6bae3c36e736@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/dlink/dl2k.c | 2 +- drivers/net/ethernet/dlink/dl2k.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 2c67a857a42f..71cb7fe63de3 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -352,7 +352,7 @@ parse_eeprom (struct net_device *dev) eth_hw_addr_set(dev, psrom->mac_addr); if (np->chip_id == CHIP_IP1000A) { - np->led_mode = psrom->led_mode; + np->led_mode = le16_to_cpu(psrom->led_mode); return 0; } diff --git a/drivers/net/ethernet/dlink/dl2k.h b/drivers/net/ethernet/dlink/dl2k.h index 195dc6cfd895..0e33e2eaae96 100644 --- a/drivers/net/ethernet/dlink/dl2k.h +++ b/drivers/net/ethernet/dlink/dl2k.h @@ -335,7 +335,7 @@ typedef struct t_SROM { u16 sub_system_id; /* 0x06 */ u16 pci_base_1; /* 0x08 (IP1000A only) */ u16 pci_base_2; /* 0x0a (IP1000A only) */ - u16 led_mode; /* 0x0c (IP1000A only) */ + __le16 led_mode; /* 0x0c (IP1000A only) */ u16 reserved1[9]; /* 0x0e-0x1f */ u8 mac_addr[6]; /* 0x20-0x25 */ u8 reserved2[10]; /* 0x26-0x2f */ From f57fd07d7c5c4b9d13e7f6889f28f3cdff89e1f8 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 26 Apr 2025 17:48:55 +0300 Subject: [PATCH 56/97] net: dsa: felix: fix broken taprio gate states after clock jump [ Upstream commit 426d487bca38b34f39c483edfc6313a036446b33 ] Simplest setup to reproduce the issue: connect 2 ports of the LS1028A-RDB together (eno0 with swp0) and run: $ ip link set eno0 up && ip link set swp0 up $ tc qdisc replace dev swp0 parent root handle 100 taprio num_tc 8 \ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \ base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \ sched-entry S 20 300000 sched-entry S 48 200000 \ sched-entry S 20 300000 sched-entry S 83 200000 \ sched-entry S 40 300000 sched-entry S 00 200000 flags 2 $ ptp4l -i eno0 -f /etc/linuxptp/configs/gPTP.cfg -m & $ ptp4l -i swp0 -f /etc/linuxptp/configs/gPTP.cfg -m One will observe that the PTP state machine on swp0 starts synchronizing, then it attempts to do a clock step, and after that, it never fails to recover from the condition below. ptp4l[82.427]: selected best master clock 00049f.fffe.05f627 ptp4l[82.428]: port 1 (swp0): MASTER to UNCALIBRATED on RS_SLAVE ptp4l[83.252]: port 1 (swp0): UNCALIBRATED to SLAVE on MASTER_CLOCK_SELECTED ptp4l[83.886]: rms 4537731277 max 9075462553 freq -18518 +/- 11467 delay 818 +/- 0 ptp4l[84.170]: timed out while polling for tx timestamp ptp4l[84.171]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it ptp4l[84.172]: port 1 (swp0): send peer delay request failed ptp4l[84.173]: port 1 (swp0): clearing fault immediately ptp4l[84.269]: port 1 (swp0): SLAVE to LISTENING on INIT_COMPLETE ptp4l[85.303]: timed out while polling for tx timestamp ptp4l[84.171]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it ptp4l[84.172]: port 1 (swp0): send peer delay request failed ptp4l[84.173]: port 1 (swp0): clearing fault immediately ptp4l[84.269]: port 1 (swp0): SLAVE to LISTENING on INIT_COMPLETE ptp4l[85.303]: timed out while polling for tx timestamp ptp4l[85.304]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it ptp4l[85.305]: port 1 (swp0): send peer delay response failed ptp4l[85.306]: port 1 (swp0): clearing fault immediately ptp4l[86.304]: timed out while polling for tx timestamp A hint is given by the non-zero statistics for dropped packets which were expecting hardware TX timestamps: $ ethtool --include-statistics -T swp0 (...) Statistics: tx_pkts: 30 tx_lost: 11 tx_err: 0 We know that when PTP clock stepping takes place (from ocelot_ptp_settime64() or from ocelot_ptp_adjtime()), vsc9959_tas_clock_adjust() is called. Another interesting hint is that placing an early return in vsc9959_tas_clock_adjust(), so as to neutralize this function, fixes the issue and TX timestamps are no longer dropped. The debugging function written by me and included below is intended to read the GCL RAM, after the admin schedule became operational, through the two status registers available for this purpose: QSYS_GCL_STATUS_REG_1 and QSYS_GCL_STATUS_REG_2. static void vsc9959_print_tas_gcl(struct ocelot *ocelot) { u32 val, list_length, interval, gate_state; int i, err; err = read_poll_timeout(ocelot_read, val, !(val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING), 10, 100000, false, ocelot, QSYS_PARAM_STATUS_REG_8); if (err) { dev_err(ocelot->dev, "Failed to wait for TAS config pending bit to clear: %pe\n", ERR_PTR(err)); return; } val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_3); list_length = QSYS_PARAM_STATUS_REG_3_LIST_LENGTH_X(val); dev_info(ocelot->dev, "GCL length: %u\n", list_length); for (i = 0; i < list_length; i++) { ocelot_rmw(ocelot, QSYS_GCL_STATUS_REG_1_GCL_ENTRY_NUM(i), QSYS_GCL_STATUS_REG_1_GCL_ENTRY_NUM_M, QSYS_GCL_STATUS_REG_1); interval = ocelot_read(ocelot, QSYS_GCL_STATUS_REG_2); val = ocelot_read(ocelot, QSYS_GCL_STATUS_REG_1); gate_state = QSYS_GCL_STATUS_REG_1_GATE_STATE_X(val); dev_info(ocelot->dev, "GCL entry %d: states 0x%x interval %u\n", i, gate_state, interval); } } Calling it from two places: after the initial QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE performed by vsc9959_qos_port_tas_set(), and after the one done by vsc9959_tas_clock_adjust(), I notice the following difference. From the tc-taprio process context, where the schedule was initially configured, the GCL looks like this: mscc_felix 0000:00:00.5: GCL length: 8 mscc_felix 0000:00:00.5: GCL entry 0: states 0x20 interval 300000 mscc_felix 0000:00:00.5: GCL entry 1: states 0x10 interval 200000 mscc_felix 0000:00:00.5: GCL entry 2: states 0x20 interval 300000 mscc_felix 0000:00:00.5: GCL entry 3: states 0x48 interval 200000 mscc_felix 0000:00:00.5: GCL entry 4: states 0x20 interval 300000 mscc_felix 0000:00:00.5: GCL entry 5: states 0x83 interval 200000 mscc_felix 0000:00:00.5: GCL entry 6: states 0x40 interval 300000 mscc_felix 0000:00:00.5: GCL entry 7: states 0x0 interval 200000 But from the ptp4l clock stepping process context, when the vsc9959_tas_clock_adjust() hook is called, the GCL RAM of the operational schedule now looks like this: mscc_felix 0000:00:00.5: GCL length: 8 mscc_felix 0000:00:00.5: GCL entry 0: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 1: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 2: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 3: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 4: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 5: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 6: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 7: states 0x0 interval 0 I do not have a formal explanation, just experimental conclusions. It appears that after triggering QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE for a port's TAS, the GCL entry RAM is updated anyway, despite what the documentation claims: "Specify the time interval in QSYS::GCL_CFG_REG_2.TIME_INTERVAL. This triggers the actual RAM write with the gate state and the time interval for the entry number specified". We don't touch that register (through vsc9959_tas_gcl_set()) from vsc9959_tas_clock_adjust(), yet the GCL RAM is updated anyway. It seems to be updated with effectively stale memory, which in my testing can hold a variety of things, including even pieces of the previously applied schedule, for particular schedule lengths. As such, in most circumstances it is very difficult to pinpoint this issue, because the newly updated schedule would "behave strangely", but ultimately might still pass traffic to some extent, due to some gate entries still being present in the stale GCL entry RAM. It is easy to miss. With the particular schedule given at the beginning, the GCL RAM "happens" to be reproducibly rewritten with all zeroes, and this is consistent with what we see: when the time-aware shaper has gate entries with all gates closed, traffic is dropped on TX, no wonder we can't retrieve TX timestamps. Rewriting the GCL entry RAM when reapplying the new base time fixes the observed issue. Fixes: 8670dc33f48b ("net: dsa: felix: update base time of time-aware shaper when adjusting PTP time") Reported-by: Richie Pearn Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250426144859.3128352-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/ocelot/felix_vsc9959.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 391c4e3cb66f..67af798686b8 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1517,7 +1517,7 @@ static void vsc9959_tas_clock_adjust(struct ocelot *ocelot) struct tc_taprio_qopt_offload *taprio; struct ocelot_port *ocelot_port; struct timespec64 base_ts; - int port; + int i, port; u32 val; mutex_lock(&ocelot->tas_lock); @@ -1549,6 +1549,9 @@ static void vsc9959_tas_clock_adjust(struct ocelot *ocelot) QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB_M, QSYS_PARAM_CFG_REG_3); + for (i = 0; i < taprio->num_entries; i++) + vsc9959_tas_gcl_set(ocelot, i, &taprio->entries[i]); + ocelot_rmw(ocelot, QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE, QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE, QSYS_TAS_PARAM_CFG_CTRL); From 68bfb59b3c6296f86539f64a486a769679202fcf Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 26 Apr 2025 17:32:09 +0200 Subject: [PATCH 57/97] net: ipv6: fix UDPv6 GSO segmentation with NAT [ Upstream commit b936a9b8d4a585ccb6d454921c36286bfe63e01d ] If any address or port is changed, update it in all packets and recalculate checksum. Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") Signed-off-by: Felix Fietkau Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250426153210.14044-1-nbd@nbd.name Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/udp_offload.c | 61 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 2f1f038b0dc1..d415b4fb2f1f 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -246,6 +246,62 @@ static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs) return segs; } +static void __udpv6_gso_segment_csum(struct sk_buff *seg, + struct in6_addr *oldip, + const struct in6_addr *newip, + __be16 *oldport, __be16 newport) +{ + struct udphdr *uh = udp_hdr(seg); + + if (ipv6_addr_equal(oldip, newip) && *oldport == newport) + return; + + if (uh->check) { + inet_proto_csum_replace16(&uh->check, seg, oldip->s6_addr32, + newip->s6_addr32, true); + + inet_proto_csum_replace2(&uh->check, seg, *oldport, newport, + false); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + + *oldip = *newip; + *oldport = newport; +} + +static struct sk_buff *__udpv6_gso_segment_list_csum(struct sk_buff *segs) +{ + const struct ipv6hdr *iph; + const struct udphdr *uh; + struct ipv6hdr *iph2; + struct sk_buff *seg; + struct udphdr *uh2; + + seg = segs; + uh = udp_hdr(seg); + iph = ipv6_hdr(seg); + uh2 = udp_hdr(seg->next); + iph2 = ipv6_hdr(seg->next); + + if (!(*(const u32 *)&uh->source ^ *(const u32 *)&uh2->source) && + ipv6_addr_equal(&iph->saddr, &iph2->saddr) && + ipv6_addr_equal(&iph->daddr, &iph2->daddr)) + return segs; + + while ((seg = seg->next)) { + uh2 = udp_hdr(seg); + iph2 = ipv6_hdr(seg); + + __udpv6_gso_segment_csum(seg, &iph2->saddr, &iph->saddr, + &uh2->source, uh->source); + __udpv6_gso_segment_csum(seg, &iph2->daddr, &iph->daddr, + &uh2->dest, uh->dest); + } + + return segs; +} + static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, netdev_features_t features, bool is_ipv6) @@ -258,7 +314,10 @@ static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss); - return is_ipv6 ? skb : __udpv4_gso_segment_list_csum(skb); + if (is_ipv6) + return __udpv6_gso_segment_list_csum(skb); + else + return __udpv4_gso_segment_list_csum(skb); } struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, From b1c2b19dcc1b37e947e058b6393cbebb816dd4f2 Mon Sep 17 00:00:00 2001 From: Shruti Parab Date: Mon, 28 Apr 2025 15:59:01 -0700 Subject: [PATCH 58/97] bnxt_en: Fix coredump logic to free allocated buffer [ Upstream commit ea9376cf68230e05492f22ca45d329f16e262c7b ] When handling HWRM_DBG_COREDUMP_LIST FW command in bnxt_hwrm_dbg_dma_data(), the allocated buffer info->dest_buf is not freed in the error path. In the normal path, info->dest_buf is assigned to coredump->data and it will eventually be freed after the coredump is collected. Free info->dest_buf immediately inside bnxt_hwrm_dbg_dma_data() in the error path. Fixes: c74751f4c392 ("bnxt_en: Return error if FW returns more data than dump length") Reported-by: Michael Chan Reviewed-by: Kalesh AP Signed-off-by: Shruti Parab Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index c06789882036..b57d2a25ae27 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -72,6 +72,11 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, memcpy(info->dest_buf + off, dma_buf, len); } else { rc = -ENOBUFS; + if (cmn_req->req_type == + cpu_to_le16(HWRM_DBG_COREDUMP_LIST)) { + kfree(info->dest_buf); + info->dest_buf = NULL; + } break; } } From 43292b83424158fa6ec458799f3cb9c54d18c484 Mon Sep 17 00:00:00 2001 From: Shruti Parab Date: Mon, 28 Apr 2025 15:59:02 -0700 Subject: [PATCH 59/97] bnxt_en: Fix out-of-bound memcpy() during ethtool -w [ Upstream commit 6b87bd94f34370bbf1dfa59352bed8efab5bf419 ] When retrieving the FW coredump using ethtool, it can sometimes cause memory corruption: BUG: KFENCE: memory corruption in __bnxt_get_coredump+0x3ef/0x670 [bnxt_en] Corrupted memory at 0x000000008f0f30e8 [ ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ] (in kfence-#45): __bnxt_get_coredump+0x3ef/0x670 [bnxt_en] ethtool_get_dump_data+0xdc/0x1a0 __dev_ethtool+0xa1e/0x1af0 dev_ethtool+0xa8/0x170 dev_ioctl+0x1b5/0x580 sock_do_ioctl+0xab/0xf0 sock_ioctl+0x1ce/0x2e0 __x64_sys_ioctl+0x87/0xc0 do_syscall_64+0x5c/0xf0 entry_SYSCALL_64_after_hwframe+0x78/0x80 ... This happens when copying the coredump segment list in bnxt_hwrm_dbg_dma_data() with the HWRM_DBG_COREDUMP_LIST FW command. The info->dest_buf buffer is allocated based on the number of coredump segments returned by the FW. The segment list is then DMA'ed by the FW and the length of the DMA is returned by FW. The driver then copies this DMA'ed segment list to info->dest_buf. In some cases, this DMA length may exceed the info->dest_buf length and cause the above BUG condition. Fix it by capping the copy length to not exceed the length of info->dest_buf. The extra DMA data contains no useful information. This code path is shared for the HWRM_DBG_COREDUMP_LIST and the HWRM_DBG_COREDUMP_RETRIEVE FW commands. The buffering is different for these 2 FW commands. To simplify the logic, we need to move the line to adjust the buffer length for HWRM_DBG_COREDUMP_RETRIEVE up, so that the new check to cap the copy length will work for both commands. Fixes: c74751f4c392 ("bnxt_en: Return error if FW returns more data than dump length") Reviewed-by: Kalesh AP Signed-off-by: Shruti Parab Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/broadcom/bnxt/bnxt_coredump.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index b57d2a25ae27..32813cdd5aa5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -66,10 +66,19 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, } } + if (cmn_req->req_type == + cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE)) + info->dest_buf_size += len; + if (info->dest_buf) { if ((info->seg_start + off + len) <= BNXT_COREDUMP_BUF_LEN(info->buf_len)) { - memcpy(info->dest_buf + off, dma_buf, len); + u16 copylen = min_t(u16, len, + info->dest_buf_size - off); + + memcpy(info->dest_buf + off, dma_buf, copylen); + if (copylen < len) + break; } else { rc = -ENOBUFS; if (cmn_req->req_type == @@ -81,10 +90,6 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, } } - if (cmn_req->req_type == - cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE)) - info->dest_buf_size += len; - if (!(cmn_resp->flags & HWRM_DBG_CMN_FLAGS_MORE)) break; From 2922716d944caf59953c74b424b24784220d2781 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 28 Apr 2025 15:59:03 -0700 Subject: [PATCH 60/97] bnxt_en: Fix ethtool -d byte order for 32-bit values [ Upstream commit 02e8be5a032cae0f4ca33c6053c44d83cf4acc93 ] For version 1 register dump that includes the PCIe stats, the existing code incorrectly assumes that all PCIe stats are 64-bit values. Fix it by using an array containing the starting and ending index of the 32-bit values. The loop in bnxt_get_regs() will use the array to do proper endian swap for the 32-bit values. Fixes: b5d600b027eb ("bnxt_en: Add support for 'ethtool -d'") Reviewed-by: Shruti Parab Reviewed-by: Kalesh AP Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 36 ++++++++++++++++--- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 799adba0034a..7daaed4520ac 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1392,6 +1392,17 @@ static int bnxt_get_regs_len(struct net_device *dev) return reg_len; } +#define BNXT_PCIE_32B_ENTRY(start, end) \ + { offsetof(struct pcie_ctx_hw_stats, start), \ + offsetof(struct pcie_ctx_hw_stats, end) } + +static const struct { + u16 start; + u16 end; +} bnxt_pcie_32b_entries[] = { + BNXT_PCIE_32B_ENTRY(pcie_ltssm_histogram[0], pcie_ltssm_histogram[3]), +}; + static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) { @@ -1423,12 +1434,27 @@ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs, req->pcie_stat_host_addr = cpu_to_le64(hw_pcie_stats_addr); rc = hwrm_req_send(bp, req); if (!rc) { - __le64 *src = (__le64 *)hw_pcie_stats; - u64 *dst = (u64 *)(_p + BNXT_PXP_REG_LEN); - int i; + u8 *dst = (u8 *)(_p + BNXT_PXP_REG_LEN); + u8 *src = (u8 *)hw_pcie_stats; + int i, j; - for (i = 0; i < sizeof(*hw_pcie_stats) / sizeof(__le64); i++) - dst[i] = le64_to_cpu(src[i]); + for (i = 0, j = 0; i < sizeof(*hw_pcie_stats); ) { + if (i >= bnxt_pcie_32b_entries[j].start && + i <= bnxt_pcie_32b_entries[j].end) { + u32 *dst32 = (u32 *)(dst + i); + + *dst32 = le32_to_cpu(*(__le32 *)(src + i)); + i += 4; + if (i > bnxt_pcie_32b_entries[j].end && + j < ARRAY_SIZE(bnxt_pcie_32b_entries) - 1) + j++; + } else { + u64 *dst64 = (u64 *)(dst + i); + + *dst64 = le64_to_cpu(*(__le64 *)(src + i)); + i += 8; + } + } } hwrm_req_drop(bp, req); } From 44c3f30c8e206fd2ef248c40df40f5045c2f389d Mon Sep 17 00:00:00 2001 From: Michael Liang Date: Tue, 29 Apr 2025 10:42:01 -0600 Subject: [PATCH 61/97] nvme-tcp: fix premature queue removal and I/O failover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 77e40bbce93059658aee02786a32c5c98a240a8a ] This patch addresses a data corruption issue observed in nvme-tcp during testing. In an NVMe native multipath setup, when an I/O timeout occurs, all inflight I/Os are canceled almost immediately after the kernel socket is shut down. These canceled I/Os are reported as host path errors, triggering a failover that succeeds on a different path. However, at this point, the original I/O may still be outstanding in the host's network transmission path (e.g., the NIC’s TX queue). From the user-space app's perspective, the buffer associated with the I/O is considered completed since they're acked on the different path and may be reused for new I/O requests. Because nvme-tcp enables zero-copy by default in the transmission path, this can lead to corrupted data being sent to the original target, ultimately causing data corruption. We can reproduce this data corruption by injecting delay on one path and triggering i/o timeout. To prevent this issue, this change ensures that all inflight transmissions are fully completed from host's perspective before returning from queue stop. To handle concurrent I/O timeout from multiple namespaces under the same controller, always wait in queue stop regardless of queue's state. This aligns with the behavior of queue stopping in other NVMe fabric transports. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Signed-off-by: Michael Liang Reviewed-by: Mohamed Khalfella Reviewed-by: Randy Jennings Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/tcp.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 6dd19322c7f8..4e1b91c0416b 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1686,7 +1686,7 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) cancel_work_sync(&queue->io_work); } -static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +static void nvme_tcp_stop_queue_nowait(struct nvme_ctrl *nctrl, int qid) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; @@ -1700,6 +1700,31 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) mutex_unlock(&queue->queue_lock); } +static void nvme_tcp_wait_queue(struct nvme_ctrl *nctrl, int qid) +{ + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + struct nvme_tcp_queue *queue = &ctrl->queues[qid]; + int timeout = 100; + + while (timeout > 0) { + if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags) || + !sk_wmem_alloc_get(queue->sock->sk)) + return; + msleep(2); + timeout -= 2; + } + dev_warn(nctrl->device, + "qid %d: timeout draining sock wmem allocation expired\n", + qid); +} + +static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +{ + nvme_tcp_stop_queue_nowait(nctrl, qid); + nvme_tcp_wait_queue(nctrl, qid); +} + + static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue) { write_lock_bh(&queue->sock->sk->sk_callback_lock); @@ -1766,7 +1791,9 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) int i; for (i = 1; i < ctrl->queue_count; i++) - nvme_tcp_stop_queue(ctrl, i); + nvme_tcp_stop_queue_nowait(ctrl, i); + for (i = 1; i < ctrl->queue_count; i++) + nvme_tcp_wait_queue(ctrl, i); } static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl, From a0e0efbabbbe6a1859bc31bf65237ce91e124b9b Mon Sep 17 00:00:00 2001 From: Thangaraj Samynathan Date: Tue, 29 Apr 2025 10:55:27 +0530 Subject: [PATCH 62/97] net: lan743x: Fix memleak issue when GSO enabled [ Upstream commit 2d52e2e38b85c8b7bc00dca55c2499f46f8c8198 ] Always map the `skb` to the LS descriptor. Previously skb was mapped to EXT descriptor when the number of fragments is zero with GSO enabled. Mapping the skb to EXT descriptor prevents it from being freed, leading to a memory leak Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Thangaraj Samynathan Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250429052527.10031-1-thangaraj.s@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/lan743x_main.c | 8 ++++++-- drivers/net/ethernet/microchip/lan743x_main.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 0b2eaed11072..2e69ba0143b1 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1943,6 +1943,7 @@ static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx, if (nr_frags <= 0) { tx->frame_data0 |= TX_DESC_DATA0_LS_; tx->frame_data0 |= TX_DESC_DATA0_IOC_; + tx->frame_last = tx->frame_first; } tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; tx_descriptor->data0 = cpu_to_le32(tx->frame_data0); @@ -2012,6 +2013,7 @@ static int lan743x_tx_frame_add_fragment(struct lan743x_tx *tx, tx->frame_first = 0; tx->frame_data0 = 0; tx->frame_tail = 0; + tx->frame_last = 0; return -ENOMEM; } @@ -2052,16 +2054,18 @@ static void lan743x_tx_frame_end(struct lan743x_tx *tx, TX_DESC_DATA0_DTYPE_DATA_) { tx->frame_data0 |= TX_DESC_DATA0_LS_; tx->frame_data0 |= TX_DESC_DATA0_IOC_; + tx->frame_last = tx->frame_tail; } - tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; - buffer_info = &tx->buffer_info[tx->frame_tail]; + tx_descriptor = &tx->ring_cpu_ptr[tx->frame_last]; + buffer_info = &tx->buffer_info[tx->frame_last]; buffer_info->skb = skb; if (time_stamp) buffer_info->flags |= TX_BUFFER_INFO_FLAG_TIMESTAMP_REQUESTED; if (ignore_sync) buffer_info->flags |= TX_BUFFER_INFO_FLAG_IGNORE_SYNC; + tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; tx_descriptor->data0 = cpu_to_le32(tx->frame_data0); tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail); tx->last_tail = tx->frame_tail; diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 92a5660b8820..c0d209f36188 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -974,6 +974,7 @@ struct lan743x_tx { u32 frame_first; u32 frame_data0; u32 frame_tail; + u32 frame_last; struct lan743x_tx_buffer_info *buffer_info; From 328e124da61aed5d81f683c8f03425afa27417b2 Mon Sep 17 00:00:00 2001 From: Mattias Barthel Date: Tue, 29 Apr 2025 11:08:26 +0200 Subject: [PATCH 63/97] net: fec: ERR007885 Workaround for conventional TX [ Upstream commit a179aad12badc43201cbf45d1e8ed2c1383c76b9 ] Activate TX hang workaround also in fec_enet_txq_submit_skb() when TSO is not enabled. Errata: ERR007885 Symptoms: NETDEV WATCHDOG: eth0 (fec): transmit queue 0 timed out commit 37d6017b84f7 ("net: fec: Workaround for imx6sx enet tx hang when enable three queues") There is a TDAR race condition for mutliQ when the software sets TDAR and the UDMA clears TDAR simultaneously or in a small window (2-4 cycles). This will cause the udma_tx and udma_tx_arbiter state machines to hang. So, the Workaround is checking TDAR status four time, if TDAR cleared by hardware and then write TDAR, otherwise don't set TDAR. Fixes: 53bb20d1faba ("net: fec: add variable reg_desc_active to speed things up") Signed-off-by: Mattias Barthel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250429090826.3101258-1-mattiasbarthel@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fec_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 018ce4f4be6f..4a513dba8f53 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -692,7 +692,12 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, txq->bd.cur = bdp; /* Trigger transmission start */ - writel(0, txq->bd.reg_desc_active); + if (!(fep->quirks & FEC_QUIRK_ERR007885) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active)) + writel(0, txq->bd.reg_desc_active); return 0; } From e15d00593df13e38fc5511aac952a6279aae6931 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 30 Apr 2025 17:30:49 +0800 Subject: [PATCH 64/97] net: hns3: store rx VLAN tag offload state for VF [ Upstream commit ef2383d078edcbe3055032436b16cdf206f26de2 ] The VF driver missed to store the rx VLAN tag strip state when user change the rx VLAN tag offload state. And it will default to enable the rx vlan tag strip when re-init VF device after reset. So if user disable rx VLAN tag offload, and trig reset, then the HW will still strip the VLAN tag from packet nad fill into RX BD, but the VF driver will ignore it for rx VLAN tag offload disabled. It may cause the rx VLAN tag dropped. Fixes: b2641e2ad456 ("net: hns3: Add support of hardware rx-vlan-offload to HNS3 VF driver") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250430093052.2400464-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../hisilicon/hns3/hns3vf/hclgevf_main.c | 25 ++++++++++++++----- .../hisilicon/hns3/hns3vf/hclgevf_main.h | 1 + 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 06493853b2b4..b11d38a6093f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1309,9 +1309,8 @@ static void hclgevf_sync_vlan_filter(struct hclgevf_dev *hdev) rtnl_unlock(); } -static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) +static int hclgevf_en_hw_strip_rxvtag_cmd(struct hclgevf_dev *hdev, bool enable) { - struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); struct hclge_vf_to_pf_msg send_msg; hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN, @@ -1320,6 +1319,19 @@ static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); } +static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) +{ + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + int ret; + + ret = hclgevf_en_hw_strip_rxvtag_cmd(hdev, enable); + if (ret) + return ret; + + hdev->rxvtag_strip_en = enable; + return 0; +} + static int hclgevf_reset_tqp(struct hnae3_handle *handle) { #define HCLGEVF_RESET_ALL_QUEUE_DONE 1U @@ -2198,12 +2210,13 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev) tc_valid, tc_size); } -static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev) +static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev, + bool rxvtag_strip_en) { struct hnae3_handle *nic = &hdev->nic; int ret; - ret = hclgevf_en_hw_strip_rxvtag(nic, true); + ret = hclgevf_en_hw_strip_rxvtag(nic, rxvtag_strip_en); if (ret) { dev_err(&hdev->pdev->dev, "failed to enable rx vlan offload, ret = %d\n", ret); @@ -2872,7 +2885,7 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev) if (ret) return ret; - ret = hclgevf_init_vlan_config(hdev); + ret = hclgevf_init_vlan_config(hdev, hdev->rxvtag_strip_en); if (ret) { dev_err(&hdev->pdev->dev, "failed(%d) to initialize VLAN config\n", ret); @@ -2985,7 +2998,7 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) goto err_config; } - ret = hclgevf_init_vlan_config(hdev); + ret = hclgevf_init_vlan_config(hdev, true); if (ret) { dev_err(&hdev->pdev->dev, "failed(%d) to initialize VLAN config\n", ret); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 976414d00e67..1f62ac062d04 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -253,6 +253,7 @@ struct hclgevf_dev { int *vector_irq; bool gro_en; + bool rxvtag_strip_en; unsigned long vlan_del_fail_bmap[BITS_TO_LONGS(VLAN_N_VID)]; From 9137a78d0b6063756f6140f30acd5a9f4b01374f Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Wed, 30 Apr 2025 17:30:50 +0800 Subject: [PATCH 65/97] net: hns3: fix an interrupt residual problem [ Upstream commit 8e6b9c6ea5a55045eed6526d8ee49e93192d1a58 ] When a VF is passthrough to a VM, and the VM is killed, the reported interrupt may not been handled, it will remain, and won't be clear by the nic engine even with a flr or tqp reset. When the VM restart, the interrupt of the first vector may be dropped by the second enable_irq in vfio, see the issue below: https://gitlab.com/qemu-project/qemu/-/issues/2884#note_2423361621 We notice that the vfio has always behaved this way, and the interrupt is a residue of the nic engine, so we fix the problem by moving the vector enable process out of the enable_irq loop. Fixes: 08a100689d4b ("net: hns3: re-organize vector handle") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20250430093052.2400464-3-shaojijie@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/hisilicon/hns3/hns3_enet.c | 82 +++++++++---------- 1 file changed, 39 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 9d27fad9f35f..9bcd03e1994f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -472,20 +472,14 @@ static void hns3_mask_vector_irq(struct hns3_enet_tqp_vector *tqp_vector, writel(mask_en, tqp_vector->mask_addr); } -static void hns3_vector_enable(struct hns3_enet_tqp_vector *tqp_vector) +static void hns3_irq_enable(struct hns3_enet_tqp_vector *tqp_vector) { napi_enable(&tqp_vector->napi); enable_irq(tqp_vector->vector_irq); - - /* enable vector */ - hns3_mask_vector_irq(tqp_vector, 1); } -static void hns3_vector_disable(struct hns3_enet_tqp_vector *tqp_vector) +static void hns3_irq_disable(struct hns3_enet_tqp_vector *tqp_vector) { - /* disable vector */ - hns3_mask_vector_irq(tqp_vector, 0); - disable_irq(tqp_vector->vector_irq); napi_disable(&tqp_vector->napi); cancel_work_sync(&tqp_vector->rx_group.dim.work); @@ -706,11 +700,42 @@ static int hns3_set_rx_cpu_rmap(struct net_device *netdev) return 0; } +static void hns3_enable_irqs_and_tqps(struct net_device *netdev) +{ + struct hns3_nic_priv *priv = netdev_priv(netdev); + struct hnae3_handle *h = priv->ae_handle; + u16 i; + + for (i = 0; i < priv->vector_num; i++) + hns3_irq_enable(&priv->tqp_vector[i]); + + for (i = 0; i < priv->vector_num; i++) + hns3_mask_vector_irq(&priv->tqp_vector[i], 1); + + for (i = 0; i < h->kinfo.num_tqps; i++) + hns3_tqp_enable(h->kinfo.tqp[i]); +} + +static void hns3_disable_irqs_and_tqps(struct net_device *netdev) +{ + struct hns3_nic_priv *priv = netdev_priv(netdev); + struct hnae3_handle *h = priv->ae_handle; + u16 i; + + for (i = 0; i < h->kinfo.num_tqps; i++) + hns3_tqp_disable(h->kinfo.tqp[i]); + + for (i = 0; i < priv->vector_num; i++) + hns3_mask_vector_irq(&priv->tqp_vector[i], 0); + + for (i = 0; i < priv->vector_num; i++) + hns3_irq_disable(&priv->tqp_vector[i]); +} + static int hns3_nic_net_up(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); struct hnae3_handle *h = priv->ae_handle; - int i, j; int ret; ret = hns3_nic_reset_all_ring(h); @@ -719,23 +744,13 @@ static int hns3_nic_net_up(struct net_device *netdev) clear_bit(HNS3_NIC_STATE_DOWN, &priv->state); - /* enable the vectors */ - for (i = 0; i < priv->vector_num; i++) - hns3_vector_enable(&priv->tqp_vector[i]); - - /* enable rcb */ - for (j = 0; j < h->kinfo.num_tqps; j++) - hns3_tqp_enable(h->kinfo.tqp[j]); + hns3_enable_irqs_and_tqps(netdev); /* start the ae_dev */ ret = h->ae_algo->ops->start ? h->ae_algo->ops->start(h) : 0; if (ret) { set_bit(HNS3_NIC_STATE_DOWN, &priv->state); - while (j--) - hns3_tqp_disable(h->kinfo.tqp[j]); - - for (j = i - 1; j >= 0; j--) - hns3_vector_disable(&priv->tqp_vector[j]); + hns3_disable_irqs_and_tqps(netdev); } return ret; @@ -822,17 +837,9 @@ static void hns3_reset_tx_queue(struct hnae3_handle *h) static void hns3_nic_net_down(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = hns3_get_handle(netdev); const struct hnae3_ae_ops *ops; - int i; - /* disable vectors */ - for (i = 0; i < priv->vector_num; i++) - hns3_vector_disable(&priv->tqp_vector[i]); - - /* disable rcb */ - for (i = 0; i < h->kinfo.num_tqps; i++) - hns3_tqp_disable(h->kinfo.tqp[i]); + hns3_disable_irqs_and_tqps(netdev); /* stop ae_dev */ ops = priv->ae_handle->ae_algo->ops; @@ -5869,8 +5876,6 @@ int hns3_set_channels(struct net_device *netdev, void hns3_external_lb_prepare(struct net_device *ndev, bool if_running) { struct hns3_nic_priv *priv = netdev_priv(ndev); - struct hnae3_handle *h = priv->ae_handle; - int i; if (!if_running) return; @@ -5881,11 +5886,7 @@ void hns3_external_lb_prepare(struct net_device *ndev, bool if_running) netif_carrier_off(ndev); netif_tx_disable(ndev); - for (i = 0; i < priv->vector_num; i++) - hns3_vector_disable(&priv->tqp_vector[i]); - - for (i = 0; i < h->kinfo.num_tqps; i++) - hns3_tqp_disable(h->kinfo.tqp[i]); + hns3_disable_irqs_and_tqps(ndev); /* delay ring buffer clearing to hns3_reset_notify_uninit_enet * during reset process, because driver may not be able @@ -5901,7 +5902,6 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running) { struct hns3_nic_priv *priv = netdev_priv(ndev); struct hnae3_handle *h = priv->ae_handle; - int i; if (!if_running) return; @@ -5917,11 +5917,7 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running) clear_bit(HNS3_NIC_STATE_DOWN, &priv->state); - for (i = 0; i < priv->vector_num; i++) - hns3_vector_enable(&priv->tqp_vector[i]); - - for (i = 0; i < h->kinfo.num_tqps; i++) - hns3_tqp_enable(h->kinfo.tqp[i]); + hns3_enable_irqs_and_tqps(ndev); netif_tx_wake_all_queues(ndev); From de5ad4dbeca07b6f90af2002198ae1d58ed138c4 Mon Sep 17 00:00:00 2001 From: Hao Lan Date: Wed, 30 Apr 2025 17:30:51 +0800 Subject: [PATCH 66/97] net: hns3: fixed debugfs tm_qset size [ Upstream commit e317aebeefcb3b0c71f2305af3c22871ca6b3833 ] The size of the tm_qset file of debugfs is limited to 64 KB, which is too small in the scenario with 1280 qsets. The size needs to be expanded to 1 MB. Fixes: 5e69ea7ee2a6 ("net: hns3: refactor the debugfs process") Signed-off-by: Hao Lan Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20250430093052.2400464-4-shaojijie@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index d2603cfc122c..430b3ec800a9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -60,7 +60,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = { .name = "tm_qset", .cmd = HNAE3_DBG_CMD_TM_QSET, .dentry = HNS3_DBG_DENTRY_TM, - .buf_len = HNS3_DBG_READ_LEN, + .buf_len = HNS3_DBG_READ_LEN_1MB, .init = hns3_dbg_common_file_init, }, { From 2735065ac61b7d0364c21df88fd6d1bf857e8df0 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 30 Apr 2025 17:30:52 +0800 Subject: [PATCH 67/97] net: hns3: defer calling ptp_clock_register() [ Upstream commit 4971394d9d624f91689d766f31ce668d169d9959 ] Currently the ptp_clock_register() is called before relative ptp resource ready. It may cause unexpected result when upper layer called the ptp API during the timewindow. Fix it by moving the ptp_clock_register() to the function end. Fixes: 0bf5eb788512 ("net: hns3: add support for PTP") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250430093052.2400464-5-shaojijie@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index 4d4cea1f5015..b7cf9fbf9718 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -452,6 +452,13 @@ static int hclge_ptp_create_clock(struct hclge_dev *hdev) ptp->info.settime64 = hclge_ptp_settime; ptp->info.n_alarm = 0; + + spin_lock_init(&ptp->lock); + ptp->io_base = hdev->hw.hw.io_base + HCLGE_PTP_REG_OFFSET; + ptp->ts_cfg.rx_filter = HWTSTAMP_FILTER_NONE; + ptp->ts_cfg.tx_type = HWTSTAMP_TX_OFF; + hdev->ptp = ptp; + ptp->clock = ptp_clock_register(&ptp->info, &hdev->pdev->dev); if (IS_ERR(ptp->clock)) { dev_err(&hdev->pdev->dev, @@ -463,12 +470,6 @@ static int hclge_ptp_create_clock(struct hclge_dev *hdev) return -ENODEV; } - spin_lock_init(&ptp->lock); - ptp->io_base = hdev->hw.hw.io_base + HCLGE_PTP_REG_OFFSET; - ptp->ts_cfg.rx_filter = HWTSTAMP_FILTER_NONE; - ptp->ts_cfg.tx_type = HWTSTAMP_TX_OFF; - hdev->ptp = ptp; - return 0; } From 53b6b3614dbbfacabd08efb19183963492da41c9 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 30 Apr 2025 15:30:40 +0200 Subject: [PATCH 68/97] net: vertexcom: mse102x: Fix possible stuck of SPI interrupt [ Upstream commit 55f362885951b2d00fd7fbb02ef0227deea572c2 ] The MSE102x doesn't provide any SPI commands for interrupt handling. So in case the interrupt fired before the driver requests the IRQ, the interrupt will never fire again. In order to fix this always poll for pending packets after opening the interface. Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250430133043.7722-2-wahrenst@gmx.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/vertexcom/mse102x.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 8f67c39f479e..45f4d2cb5b31 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -509,6 +509,7 @@ static irqreturn_t mse102x_irq(int irq, void *_mse) static int mse102x_net_open(struct net_device *ndev) { struct mse102x_net *mse = netdev_priv(ndev); + struct mse102x_net_spi *mses = to_mse102x_spi(mse); int ret; ret = request_threaded_irq(ndev->irq, NULL, mse102x_irq, IRQF_ONESHOT, @@ -524,6 +525,13 @@ static int mse102x_net_open(struct net_device *ndev) netif_carrier_on(ndev); + /* The SPI interrupt can stuck in case of pending packet(s). + * So poll for possible packet(s) to re-arm the interrupt. + */ + mutex_lock(&mses->lock); + mse102x_rx_pkt_spi(mse); + mutex_unlock(&mses->lock); + netif_dbg(mse, ifup, ndev, "network device up\n"); return 0; From 9e951979ef86a69b5ecd57b6bb91449ed336dae4 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 30 Apr 2025 15:30:41 +0200 Subject: [PATCH 69/97] net: vertexcom: mse102x: Fix LEN_MASK [ Upstream commit 74987089ec678b4018dba0a609e9f4bf6ef7f4ad ] The LEN_MASK for CMD_RTS doesn't cover the whole parameter mask. The Bit 11 is reserved, so adjust LEN_MASK accordingly. Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250430133043.7722-3-wahrenst@gmx.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/vertexcom/mse102x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 45f4d2cb5b31..55f34d9d111c 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -33,7 +33,7 @@ #define CMD_CTR (0x2 << CMD_SHIFT) #define CMD_MASK GENMASK(15, CMD_SHIFT) -#define LEN_MASK GENMASK(CMD_SHIFT - 1, 0) +#define LEN_MASK GENMASK(CMD_SHIFT - 2, 0) #define DET_CMD_LEN 4 #define DET_SOF_LEN 2 From 7bdc90cf922e73254c76fe20e4b1f4a75de982fd Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 30 Apr 2025 15:30:42 +0200 Subject: [PATCH 70/97] net: vertexcom: mse102x: Add range check for CMD_RTS [ Upstream commit d4dda902dac194e3231a1ed0f76c6c3b6340ba8a ] Since there is no protection in the SPI protocol against electrical interferences, the driver shouldn't blindly trust the length payload of CMD_RTS. So introduce a bounds check for incoming frames. Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250430133043.7722-4-wahrenst@gmx.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/vertexcom/mse102x.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 55f34d9d111c..2b1aac72601d 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -337,8 +338,9 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) } rxlen = cmd_resp & LEN_MASK; - if (!rxlen) { - net_dbg_ratelimited("%s: No frame length defined\n", __func__); + if (rxlen < ETH_ZLEN || rxlen > VLAN_ETH_FRAME_LEN) { + net_dbg_ratelimited("%s: Invalid frame length: %d\n", __func__, + rxlen); mse->stats.invalid_len++; return; } From 01b832214793b9dd7b2673249d3a78bf38f36d40 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 30 Apr 2025 15:30:43 +0200 Subject: [PATCH 71/97] net: vertexcom: mse102x: Fix RX error handling [ Upstream commit ee512922ddd7d64afe2b28830a88f19063217649 ] In case the CMD_RTS got corrupted by interferences, the MSE102x doesn't allow a retransmission of the command. Instead the Ethernet frame must be shifted out of the SPI FIFO. Since the actual length is unknown, assume the maximum possible value. Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250430133043.7722-5-wahrenst@gmx.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/vertexcom/mse102x.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 2b1aac72601d..060a566bc6aa 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -263,7 +263,7 @@ static int mse102x_tx_frame_spi(struct mse102x_net *mse, struct sk_buff *txp, } static int mse102x_rx_frame_spi(struct mse102x_net *mse, u8 *buff, - unsigned int frame_len) + unsigned int frame_len, bool drop) { struct mse102x_net_spi *mses = to_mse102x_spi(mse); struct spi_transfer *xfer = &mses->spi_xfer; @@ -281,6 +281,9 @@ static int mse102x_rx_frame_spi(struct mse102x_net *mse, u8 *buff, netdev_err(mse->ndev, "%s: spi_sync() failed: %d\n", __func__, ret); mse->stats.xfer_err++; + } else if (drop) { + netdev_dbg(mse->ndev, "%s: Drop frame\n", __func__); + ret = -EINVAL; } else if (*sof != cpu_to_be16(DET_SOF)) { netdev_dbg(mse->ndev, "%s: SPI start of frame is invalid (0x%04x)\n", __func__, *sof); @@ -308,6 +311,7 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) struct sk_buff *skb; unsigned int rxalign; unsigned int rxlen; + bool drop = false; __be16 rx = 0; u16 cmd_resp; u8 *rxpkt; @@ -330,7 +334,8 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) net_dbg_ratelimited("%s: Unexpected response (0x%04x)\n", __func__, cmd_resp); mse->stats.invalid_rts++; - return; + drop = true; + goto drop; } net_dbg_ratelimited("%s: Unexpected response to first CMD\n", @@ -342,9 +347,16 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) net_dbg_ratelimited("%s: Invalid frame length: %d\n", __func__, rxlen); mse->stats.invalid_len++; - return; + drop = true; } + /* In case of a invalid CMD_RTS, the frame must be consumed anyway. + * So assume the maximum possible frame length. + */ +drop: + if (drop) + rxlen = VLAN_ETH_FRAME_LEN; + rxalign = ALIGN(rxlen + DET_SOF_LEN + DET_DFT_LEN, 4); skb = netdev_alloc_skb_ip_align(mse->ndev, rxalign); if (!skb) @@ -355,7 +367,7 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) * They are copied, but ignored. */ rxpkt = skb_put(skb, rxlen) - DET_SOF_LEN; - if (mse102x_rx_frame_spi(mse, rxpkt, rxlen)) { + if (mse102x_rx_frame_spi(mse, rxpkt, rxlen, drop)) { mse->ndev->stats.rx_errors++; dev_kfree_skb(skb); return; From 8dcd4981166aedda08410a329938b11a497c7d5d Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 22 Jun 2023 00:51:03 +0800 Subject: [PATCH 72/97] md: move initialization and destruction of 'io_acct_set' to md.c commit c567c86b90d4715081adfe5eb812141a5b6b4883 upstream. 'io_acct_set' is only used for raid0 and raid456, prepare to use it for raid1 and raid10, so that io accounting from different levels can be consistent. By the way, follow up patches will also use this io clone mechanism to make sure 'active_io' represents in flight io, not io that is dispatching, so that mddev_suspend will wait for io to be done as designed. Signed-off-by: Yu Kuai Reviewed-by: Xiao Ni Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230621165110.1498313-2-yukuai1@huaweicloud.com Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 27 ++++++++++----------------- drivers/md/md.h | 2 -- drivers/md/raid0.c | 16 ++-------------- drivers/md/raid5.c | 41 +++++++++++------------------------------ 4 files changed, 23 insertions(+), 63 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index d5fbccc72810..a9fcfcbc2d11 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5965,6 +5965,13 @@ int md_run(struct mddev *mddev) goto exit_bio_set; } + if (!bioset_initialized(&mddev->io_acct_set)) { + err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE, + offsetof(struct md_io_acct, bio_clone), 0); + if (err) + goto exit_sync_set; + } + spin_lock(&pers_lock); pers = find_pers(mddev->level, mddev->clevel); if (!pers || !try_module_get(pers->owner)) { @@ -6142,6 +6149,8 @@ bitmap_abort: module_put(pers->owner); md_bitmap_destroy(mddev); abort: + bioset_exit(&mddev->io_acct_set); +exit_sync_set: bioset_exit(&mddev->sync_set); exit_bio_set: bioset_exit(&mddev->bio_set); @@ -6374,6 +6383,7 @@ static void __md_stop(struct mddev *mddev) percpu_ref_exit(&mddev->active_io); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); + bioset_exit(&mddev->io_acct_set); } void md_stop(struct mddev *mddev) @@ -8744,23 +8754,6 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, } EXPORT_SYMBOL_GPL(md_submit_discard_bio); -int acct_bioset_init(struct mddev *mddev) -{ - int err = 0; - - if (!bioset_initialized(&mddev->io_acct_set)) - err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE, - offsetof(struct md_io_acct, bio_clone), 0); - return err; -} -EXPORT_SYMBOL_GPL(acct_bioset_init); - -void acct_bioset_exit(struct mddev *mddev) -{ - bioset_exit(&mddev->io_acct_set); -} -EXPORT_SYMBOL_GPL(acct_bioset_exit); - static void md_end_io_acct(struct bio *bio) { struct md_io_acct *md_io_acct = bio->bi_private; diff --git a/drivers/md/md.h b/drivers/md/md.h index 4f0b48097455..1fda5e139beb 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -746,8 +746,6 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev); extern void md_finish_reshape(struct mddev *mddev); void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, struct bio *bio, sector_t start, sector_t size); -int acct_bioset_init(struct mddev *mddev); -void acct_bioset_exit(struct mddev *mddev); void md_account_bio(struct mddev *mddev, struct bio **bio); extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 7c6a0b4437d8..c50a7abda744 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -377,7 +377,6 @@ static void raid0_free(struct mddev *mddev, void *priv) struct r0conf *conf = priv; free_conf(mddev, conf); - acct_bioset_exit(mddev); } static int raid0_run(struct mddev *mddev) @@ -392,16 +391,11 @@ static int raid0_run(struct mddev *mddev) if (md_check_no_bitmap(mddev)) return -EINVAL; - if (acct_bioset_init(mddev)) { - pr_err("md/raid0:%s: alloc acct bioset failed.\n", mdname(mddev)); - return -ENOMEM; - } - /* if private is not null, we are here after takeover */ if (mddev->private == NULL) { ret = create_strip_zones(mddev, &conf); if (ret < 0) - goto exit_acct_set; + return ret; mddev->private = conf; } conf = mddev->private; @@ -432,15 +426,9 @@ static int raid0_run(struct mddev *mddev) ret = md_integrity_register(mddev); if (ret) - goto free; + free_conf(mddev, conf); return ret; - -free: - free_conf(mddev, conf); -exit_acct_set: - acct_bioset_exit(mddev); - return ret; } /* diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4315dabd3202..6e80a439ec45 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7770,19 +7770,12 @@ static int raid5_run(struct mddev *mddev) struct md_rdev *rdev; struct md_rdev *journal_dev = NULL; sector_t reshape_offset = 0; - int i, ret = 0; + int i; long long min_offset_diff = 0; int first = 1; - if (acct_bioset_init(mddev)) { - pr_err("md/raid456:%s: alloc acct bioset failed.\n", mdname(mddev)); + if (mddev_init_writes_pending(mddev) < 0) return -ENOMEM; - } - - if (mddev_init_writes_pending(mddev) < 0) { - ret = -ENOMEM; - goto exit_acct_set; - } if (mddev->recovery_cp != MaxSector) pr_notice("md/raid:%s: not clean -- starting background reconstruction\n", @@ -7813,8 +7806,7 @@ static int raid5_run(struct mddev *mddev) (mddev->bitmap_info.offset || mddev->bitmap_info.file)) { pr_notice("md/raid:%s: array cannot have both journal and bitmap\n", mdname(mddev)); - ret = -EINVAL; - goto exit_acct_set; + return -EINVAL; } if (mddev->reshape_position != MaxSector) { @@ -7839,15 +7831,13 @@ static int raid5_run(struct mddev *mddev) if (journal_dev) { pr_warn("md/raid:%s: don't support reshape with journal - aborting.\n", mdname(mddev)); - ret = -EINVAL; - goto exit_acct_set; + return -EINVAL; } if (mddev->new_level != mddev->level) { pr_warn("md/raid:%s: unsupported reshape required - aborting.\n", mdname(mddev)); - ret = -EINVAL; - goto exit_acct_set; + return -EINVAL; } old_disks = mddev->raid_disks - mddev->delta_disks; /* reshape_position must be on a new-stripe boundary, and one @@ -7863,8 +7853,7 @@ static int raid5_run(struct mddev *mddev) if (sector_div(here_new, chunk_sectors * new_data_disks)) { pr_warn("md/raid:%s: reshape_position not on a stripe boundary\n", mdname(mddev)); - ret = -EINVAL; - goto exit_acct_set; + return -EINVAL; } reshape_offset = here_new * chunk_sectors; /* here_new is the stripe we will write to */ @@ -7886,8 +7875,7 @@ static int raid5_run(struct mddev *mddev) else if (mddev->ro == 0) { pr_warn("md/raid:%s: in-place reshape must be started in read-only mode - aborting\n", mdname(mddev)); - ret = -EINVAL; - goto exit_acct_set; + return -EINVAL; } } else if (mddev->reshape_backwards ? (here_new * chunk_sectors + min_offset_diff <= @@ -7897,8 +7885,7 @@ static int raid5_run(struct mddev *mddev) /* Reading from the same stripe as writing to - bad */ pr_warn("md/raid:%s: reshape_position too early for auto-recovery - aborting.\n", mdname(mddev)); - ret = -EINVAL; - goto exit_acct_set; + return -EINVAL; } pr_debug("md/raid:%s: reshape will continue\n", mdname(mddev)); /* OK, we should be able to continue; */ @@ -7922,10 +7909,8 @@ static int raid5_run(struct mddev *mddev) else conf = mddev->private; - if (IS_ERR(conf)) { - ret = PTR_ERR(conf); - goto exit_acct_set; - } + if (IS_ERR(conf)) + return PTR_ERR(conf); if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { if (!journal_dev) { @@ -8125,10 +8110,7 @@ abort: free_conf(conf); mddev->private = NULL; pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev)); - ret = -EIO; -exit_acct_set: - acct_bioset_exit(mddev); - return ret; + return -EIO; } static void raid5_free(struct mddev *mddev, void *priv) @@ -8136,7 +8118,6 @@ static void raid5_free(struct mddev *mddev, void *priv) struct r5conf *conf = priv; free_conf(conf); - acct_bioset_exit(mddev); mddev->to_remove = &raid5_attrs_group; } From 85bb073b8941673aec6f7dce47c521c7f1988944 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Tue, 26 Nov 2024 15:56:56 +0800 Subject: [PATCH 73/97] PCI: imx6: Skip controller_id generation logic for i.MX7D MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f068ffdd034c93f0c768acdc87d4d2d7023c1379 upstream. The i.MX7D only has one PCIe controller, so controller_id should always be 0. The previous code is incorrect although yielding the correct result. Fix by removing "IMX7D" from the switch case branch. Fixes: 2d8ed461dbc9 ("PCI: imx6: Add support for i.MX8MQ") Link: https://lore.kernel.org/r/20241126075702.4099164-5-hongxing.zhu@nxp.com Signed-off-by: Richard Zhu Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Reviewed-by: Frank Li [Because this switch case does more than just controller_id logic, move the "IMX7D" case label instead of removing it entirely.] Signed-off-by: Ryan Matthews Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pci-imx6.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index a3acf144a40d..4f20094faee5 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -1172,11 +1172,10 @@ static int imx6_pcie_probe(struct platform_device *pdev) if (IS_ERR(imx6_pcie->pcie_aux)) return dev_err_probe(dev, PTR_ERR(imx6_pcie->pcie_aux), "pcie_aux clock source missing or invalid\n"); - fallthrough; - case IMX7D: if (dbi_base->start == IMX8MQ_PCIE2_BASE_ADDR) imx6_pcie->controller_id = 1; - + fallthrough; + case IMX7D: imx6_pcie->pciephy_reset = devm_reset_control_get_exclusive(dev, "pciephy"); if (IS_ERR(imx6_pcie->pciephy_reset)) { From 73cf6af13153d62f9b76eff422eea79dbc70f15e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:10:23 -0700 Subject: [PATCH 74/97] sch_htb: make htb_qlen_notify() idempotent commit 5ba8b837b522d7051ef81bacf3d95383ff8edce5 upstream. htb_qlen_notify() always deactivates the HTB class and in fact could trigger a warning if it is already deactivated. Therefore, it is not idempotent and not friendly to its callers, like fq_codel_dequeue(). Let's make it idempotent to ease qdisc_tree_reduce_backlog() callers' life. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250403211033.166059-2-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_htb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index d23f8ea63082..fb0fb8825574 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1482,6 +1482,8 @@ static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct htb_class *cl = (struct htb_class *)arg; + if (!cl->prio_activity) + return; htb_deactivate(qdisc_priv(sch), cl); } From 0e59ce6cf3c6e24433a47e753b033d71ba0ae42e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:10:24 -0700 Subject: [PATCH 75/97] sch_drr: make drr_qlen_notify() idempotent commit df008598b3a00be02a8051fde89ca0fbc416bd55 upstream. drr_qlen_notify() always deletes the DRR class from its active list with list_del(), therefore, it is not idempotent and not friendly to its callers, like fq_codel_dequeue(). Let's make it idempotent to ease qdisc_tree_reduce_backlog() callers' life. Also change other list_del()'s to list_del_init() just to be extra safe. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250403211033.166059-3-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_drr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index b35d6086a972..3aee041c7bdb 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -111,6 +111,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -ENOBUFS; gnet_stats_basic_sync_init(&cl->bstats); + INIT_LIST_HEAD(&cl->alist); cl->common.classid = classid; cl->quantum = quantum; cl->qdisc = qdisc_create_dflt(sch->dev_queue, @@ -233,7 +234,7 @@ static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg) { struct drr_class *cl = (struct drr_class *)arg; - list_del(&cl->alist); + list_del_init(&cl->alist); } static int drr_dump_class(struct Qdisc *sch, unsigned long arg, @@ -392,7 +393,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch) if (unlikely(skb == NULL)) goto out; if (cl->qdisc->q.qlen == 0) - list_del(&cl->alist); + list_del_init(&cl->alist); bstats_update(&cl->bstats, skb); qdisc_bstats_update(sch, skb); @@ -433,7 +434,7 @@ static void drr_reset_qdisc(struct Qdisc *sch) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->qdisc->q.qlen) - list_del(&cl->alist); + list_del_init(&cl->alist); qdisc_reset(cl->qdisc); } } From 0475c85426b18eccdcb7f9fb58d8f8e9c6c58c87 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:10:25 -0700 Subject: [PATCH 76/97] sch_hfsc: make hfsc_qlen_notify() idempotent commit 51eb3b65544c9efd6a1026889ee5fb5aa62da3bb upstream. hfsc_qlen_notify() is not idempotent either and not friendly to its callers, like fq_codel_dequeue(). Let's make it idempotent to ease qdisc_tree_reduce_backlog() callers' life: 1. update_vf() decreases cl->cl_nactive, so we can check whether it is non-zero before calling it. 2. eltree_remove() always removes RB node cl->el_node, but we can use RB_EMPTY_NODE() + RB_CLEAR_NODE() to make it safe. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250403211033.166059-4-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_hfsc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 4b9f323845b9..fc1370c29373 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -204,7 +204,10 @@ eltree_insert(struct hfsc_class *cl) static inline void eltree_remove(struct hfsc_class *cl) { - rb_erase(&cl->el_node, &cl->sched->eligible); + if (!RB_EMPTY_NODE(&cl->el_node)) { + rb_erase(&cl->el_node, &cl->sched->eligible); + RB_CLEAR_NODE(&cl->el_node); + } } static inline void @@ -1222,7 +1225,8 @@ hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg) /* vttree is now handled in update_vf() so that update_vf(cl, 0, 0) * needs to be called explicitly to remove a class from vttree. */ - update_vf(cl, 0, 0); + if (cl->cl_nactive) + update_vf(cl, 0, 0); if (cl->cl_flags & HFSC_RSC) eltree_remove(cl); } From 83fb922d061f773524eeecb5926a9ee503aaeb5c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:10:26 -0700 Subject: [PATCH 77/97] sch_qfq: make qfq_qlen_notify() idempotent commit 55f9eca4bfe30a15d8656f915922e8c98b7f0728 upstream. qfq_qlen_notify() always deletes its class from its active list with list_del_init() _and_ calls qfq_deactivate_agg() when the whole list becomes empty. To make it idempotent, just skip everything when it is not in the active list. Also change other list_del()'s to list_del_init() just to be extra safe. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250403211033.166059-5-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_qfq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index e6743e17408b..6462468bf77c 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -354,7 +354,7 @@ static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl) struct qfq_aggregate *agg = cl->agg; - list_del(&cl->alist); /* remove from RR queue of the aggregate */ + list_del_init(&cl->alist); /* remove from RR queue of the aggregate */ if (list_empty(&agg->active)) /* agg is now inactive */ qfq_deactivate_agg(q, agg); } @@ -483,6 +483,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, gnet_stats_basic_sync_init(&cl->bstats); cl->common.classid = classid; cl->deficit = lmax; + INIT_LIST_HEAD(&cl->alist); cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, NULL); @@ -989,7 +990,7 @@ static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg, cl->deficit -= (int) len; if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */ - list_del(&cl->alist); + list_del_init(&cl->alist); else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) { cl->deficit += agg->lmax; list_move_tail(&cl->alist, &agg->active); @@ -1420,6 +1421,8 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)arg; + if (list_empty(&cl->alist)) + return; qfq_deactivate_class(q, cl); } From be9e23028113446add10a9b66cf8f15c66a6257f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:10:27 -0700 Subject: [PATCH 78/97] sch_ets: make est_qlen_notify() idempotent commit a7a15f39c682ac4268624da2abdb9114bdde96d5 upstream. est_qlen_notify() deletes its class from its active list with list_del() when qlen is 0, therefore, it is not idempotent and not friendly to its callers, like fq_codel_dequeue(). Let's make it idempotent to ease qdisc_tree_reduce_backlog() callers' life. Also change other list_del()'s to list_del_init() just to be extra safe. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250403211033.166059-6-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_ets.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 18269f86d21e..9da86db4d2c2 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -298,7 +298,7 @@ static void ets_class_qlen_notify(struct Qdisc *sch, unsigned long arg) * to remove them. */ if (!ets_class_is_strict(q, cl) && sch->q.qlen) - list_del(&cl->alist); + list_del_init(&cl->alist); } static int ets_class_dump(struct Qdisc *sch, unsigned long arg, @@ -491,7 +491,7 @@ static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch) if (unlikely(!skb)) goto out; if (cl->qdisc->q.qlen == 0) - list_del(&cl->alist); + list_del_init(&cl->alist); return ets_qdisc_dequeue_skb(sch, skb); } @@ -660,7 +660,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, } for (i = q->nbands; i < oldbands; i++) { if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) - list_del(&q->classes[i].alist); + list_del_init(&q->classes[i].alist); qdisc_tree_flush_backlog(q->classes[i].qdisc); } q->nstrict = nstrict; @@ -716,7 +716,7 @@ static void ets_qdisc_reset(struct Qdisc *sch) for (band = q->nstrict; band < q->nbands; band++) { if (q->classes[band].qdisc->q.qlen) - list_del(&q->classes[band].alist); + list_del_init(&q->classes[band].alist); } for (band = 0; band < q->nbands; band++) qdisc_reset(q->classes[band].qdisc); From c13b801e8597419b0fe8a40a13b16936435958f6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 7 May 2025 16:06:48 +0200 Subject: [PATCH 79/97] Revert "x86/kexec: Allocate PGD for x86_64 transition page tables separately" This reverts commit 6821918f451942aa79759f29677a22f2d4ff4cbe which is commit 4b5bc2ec9a239bce261ffeafdd63571134102323 upstream. The patch it relies on is not in the 6.1.y tree, and has been reported to cause problems, so let's revert it for now. Reported-by: Eric Hagberg Link: https://lore.kernel.org/r/CAAH4uRBxJ_XvYjCpgYXHqrKSNj6x9pA7X6NBPNTekeQ90DQSJA@mail.gmail.com Cc: David Woodhouse Cc: Ingo Molnar Cc: Baoquan He Cc: Vivek Goyal Cc: Dave Young Cc: Eric Biederman Cc: Ard Biesheuvel Cc: "H. Peter Anvin" Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kexec.h | 18 +++--------- arch/x86/kernel/machine_kexec_64.c | 45 ++++++++++++++---------------- 2 files changed, 25 insertions(+), 38 deletions(-) diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index e2e1ec99c999..256eee99afc8 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -16,7 +16,6 @@ # define PAGES_NR 4 #endif -# define KEXEC_CONTROL_PAGE_SIZE 4096 # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 #ifndef __ASSEMBLY__ @@ -45,6 +44,7 @@ struct kimage; /* Maximum address we can use for the control code buffer */ # define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE +# define KEXEC_CONTROL_PAGE_SIZE 4096 /* The native architecture */ # define KEXEC_ARCH KEXEC_ARCH_386 @@ -59,6 +59,9 @@ struct kimage; /* Maximum address we can use for the control pages */ # define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1) +/* Allocate one page for the pdp and the second for the code */ +# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL) + /* The native architecture */ # define KEXEC_ARCH KEXEC_ARCH_X86_64 #endif @@ -143,19 +146,6 @@ struct kimage_arch { }; #else struct kimage_arch { - /* - * This is a kimage control page, as it must not overlap with either - * source or destination address ranges. - */ - pgd_t *pgd; - /* - * The virtual mapping of the control code page itself is used only - * during the transition, while the current kernel's pages are all - * in place. Thus the intermediate page table pages used to map it - * are not control pages, but instead just normal pages obtained - * with get_zeroed_page(). And have to be tracked (below) so that - * they can be freed. - */ p4d_t *p4d; pud_t *pud; pmd_t *pmd; diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 5d61a342871b..24b6eaacc81e 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -149,8 +149,7 @@ static void free_transition_pgtable(struct kimage *image) image->arch.pte = NULL; } -static int init_transition_pgtable(struct kimage *image, pgd_t *pgd, - unsigned long control_page) +static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) { pgprot_t prot = PAGE_KERNEL_EXEC_NOENC; unsigned long vaddr, paddr; @@ -161,7 +160,7 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd, pte_t *pte; vaddr = (unsigned long)relocate_kernel; - paddr = control_page; + paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE); pgd += pgd_index(vaddr); if (!pgd_present(*pgd)) { p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL); @@ -220,7 +219,7 @@ static void *alloc_pgt_page(void *data) return p; } -static int init_pgtable(struct kimage *image, unsigned long control_page) +static int init_pgtable(struct kimage *image, unsigned long start_pgtable) { struct x86_mapping_info info = { .alloc_pgt_page = alloc_pgt_page, @@ -229,12 +228,12 @@ static int init_pgtable(struct kimage *image, unsigned long control_page) .kernpg_flag = _KERNPG_TABLE_NOENC, }; unsigned long mstart, mend; + pgd_t *level4p; int result; int i; - image->arch.pgd = alloc_pgt_page(image); - if (!image->arch.pgd) - return -ENOMEM; + level4p = (pgd_t *)__va(start_pgtable); + clear_page(level4p); if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { info.page_flag |= _PAGE_ENC; @@ -248,8 +247,8 @@ static int init_pgtable(struct kimage *image, unsigned long control_page) mstart = pfn_mapped[i].start << PAGE_SHIFT; mend = pfn_mapped[i].end << PAGE_SHIFT; - result = kernel_ident_mapping_init(&info, image->arch.pgd, - mstart, mend); + result = kernel_ident_mapping_init(&info, + level4p, mstart, mend); if (result) return result; } @@ -264,8 +263,8 @@ static int init_pgtable(struct kimage *image, unsigned long control_page) mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz; - result = kernel_ident_mapping_init(&info, image->arch.pgd, - mstart, mend); + result = kernel_ident_mapping_init(&info, + level4p, mstart, mend); if (result) return result; @@ -275,19 +274,15 @@ static int init_pgtable(struct kimage *image, unsigned long control_page) * Prepare EFI systab and ACPI tables for kexec kernel since they are * not covered by pfn_mapped. */ - result = map_efi_systab(&info, image->arch.pgd); + result = map_efi_systab(&info, level4p); if (result) return result; - result = map_acpi_tables(&info, image->arch.pgd); + result = map_acpi_tables(&info, level4p); if (result) return result; - /* - * This must be last because the intermediate page table pages it - * allocates will not be control pages and may overlap the image. - */ - return init_transition_pgtable(image, image->arch.pgd, control_page); + return init_transition_pgtable(image, level4p); } static void load_segments(void) @@ -304,14 +299,14 @@ static void load_segments(void) int machine_kexec_prepare(struct kimage *image) { - unsigned long control_page; + unsigned long start_pgtable; int result; /* Calculate the offsets */ - control_page = page_to_pfn(image->control_code_page) << PAGE_SHIFT; + start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; /* Setup the identity mapped 64bit page table */ - result = init_pgtable(image, control_page); + result = init_pgtable(image, start_pgtable); if (result) return result; @@ -358,12 +353,13 @@ void machine_kexec(struct kimage *image) #endif } - control_page = page_address(image->control_code_page); + control_page = page_address(image->control_code_page) + PAGE_SIZE; __memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; - page_list[PA_TABLE_PAGE] = (unsigned long)__pa(image->arch.pgd); + page_list[PA_TABLE_PAGE] = + (unsigned long)__pa(page_address(image->control_code_page)); if (image->type == KEXEC_TYPE_DEFAULT) page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) @@ -582,7 +578,8 @@ static void kexec_mark_crashkres(bool protect) /* Don't touch the control code page used in crash_kexec().*/ control = PFN_PHYS(page_to_pfn(kexec_crash_image->control_code_page)); - kexec_mark_range(crashk_res.start, control - 1, protect); + /* Control code page is located in the 2nd page. */ + kexec_mark_range(crashk_res.start, control + PAGE_SIZE - 1, protect); control += KEXEC_CONTROL_PAGE_SIZE; kexec_mark_range(control, crashk_res.end, protect); } From ff4273d47da81b95ed9396110bcbd1b7b7470fe8 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 6 Mar 2025 18:54:47 +0000 Subject: [PATCH 80/97] firmware: arm_scmi: Balance device refcount when destroying devices [ Upstream commit 9ca67840c0ddf3f39407339624cef824a4f27599 ] Using device_find_child() to lookup the proper SCMI device to destroy causes an unbalance in device refcount, since device_find_child() calls an implicit get_device(): this, in turns, inhibits the call of the provided release methods upon devices destruction. As a consequence, one of the structures that is not freed properly upon destruction is the internal struct device_private dev->p populated by the drivers subsystem core. KMemleak detects this situation since loading/unloding some SCMI driver causes related devices to be created/destroyed without calling any device_release method. unreferenced object 0xffff00000f583800 (size 512): comm "insmod", pid 227, jiffies 4294912190 hex dump (first 32 bytes): 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... ff ff ff ff ff ff ff ff 60 36 1d 8a 00 80 ff ff ........`6...... backtrace (crc 114e2eed): kmemleak_alloc+0xbc/0xd8 __kmalloc_cache_noprof+0x2dc/0x398 device_add+0x954/0x12d0 device_register+0x28/0x40 __scmi_device_create.part.0+0x1bc/0x380 scmi_device_create+0x2d0/0x390 scmi_create_protocol_devices+0x74/0xf8 scmi_device_request_notifier+0x1f8/0x2a8 notifier_call_chain+0x110/0x3b0 blocking_notifier_call_chain+0x70/0xb0 scmi_driver_register+0x350/0x7f0 0xffff80000a3b3038 do_one_initcall+0x12c/0x730 do_init_module+0x1dc/0x640 load_module+0x4b20/0x5b70 init_module_from_file+0xec/0x158 $ ./scripts/faddr2line ./vmlinux device_add+0x954/0x12d0 device_add+0x954/0x12d0: kmalloc_noprof at include/linux/slab.h:901 (inlined by) kzalloc_noprof at include/linux/slab.h:1037 (inlined by) device_private_init at drivers/base/core.c:3510 (inlined by) device_add at drivers/base/core.c:3561 Balance device refcount by issuing a put_device() on devices found via device_find_child(). Reported-by: Alice Ryhl Closes: https://lore.kernel.org/linux-arm-kernel/Z8nK3uFkspy61yjP@arm.com/T/#mc1f73a0ea5e41014fa145147b7b839fc988ada8f CC: Sudeep Holla CC: Catalin Marinas Fixes: d4f9dddd21f3 ("firmware: arm_scmi: Add dynamic scmi devices creation") Signed-off-by: Cristian Marussi Tested-by: Alice Ryhl Message-Id: <20250306185447.2039336-1-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index 35bb70724d44..8c6f99d15f22 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -73,6 +73,9 @@ struct scmi_device *scmi_child_dev_find(struct device *parent, if (!dev) return NULL; + /* Drop the refcnt bumped implicitly by device_find_child */ + put_device(dev); + return to_scmi_dev(dev); } From 88d7fd2d4623b2cb13d056e1bde1861e4dec2408 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 21 Mar 2025 11:57:00 +0000 Subject: [PATCH 81/97] firmware: arm_ffa: Skip Rx buffer ownership release if not acquired MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4567bdaaaaa1744da3d7da07d9aca2f941f5b4e5 ] Completion of the FFA_PARTITION_INFO_GET ABI transfers the ownership of the caller’s Rx buffer from the producer(typically partition mnager) to the consumer(this driver/OS). FFA_RX_RELEASE transfers the ownership from the consumer back to the producer. However, when we set the flag to just return the count of partitions deployed in the system corresponding to the specified UUID while invoking FFA_PARTITION_INFO_GET, the Rx buffer ownership shouldn't be transferred to this driver. We must be able to skip transferring back the ownership to the partition manager when we request just to get the count of the partitions as the buffers are not acquired in this case. Firmware may return FFA_RET_DENIED or other error for the ffa_rx_release() in such cases. Fixes: bb1be7498500 ("firmware: arm_ffa: Add v1.1 get_partition_info support") Message-Id: <20250321115700.3525197-1-sudeep.holla@arm.com> Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_ffa/driver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index e9f86b757301..e1e278d431e9 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -306,7 +306,8 @@ __ffa_partition_info_get(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3, memcpy(buffer + idx, drv_info->rx_buffer + idx * sz, buf_sz); - ffa_rx_release(); + if (!(flags & PARTITION_INFO_GET_RETURN_COUNT_ONLY)) + ffa_rx_release(); mutex_unlock(&drv_info->rx_lock); From 72a797facb50aeef98a9d56b6b49674dbf53f692 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Szymanski?= Date: Fri, 14 Mar 2025 17:20:38 +0100 Subject: [PATCH 82/97] ARM: dts: opos6ul: add ksz8081 phy properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6e1a7bc8382b0d4208258f7d2a4474fae788dd90 ] Commit c7e73b5051d6 ("ARM: imx: mach-imx6ul: remove 14x14 EVK specific PHY fixup") removed a PHY fixup that setted the clock mode and the LED mode. Make the Ethernet interface work again by doing as advised in the commit's log, set clock mode and the LED mode in the device tree. Fixes: c7e73b5051d6 ("ARM: imx: mach-imx6ul: remove 14x14 EVK specific PHY fixup") Signed-off-by: Sébastien Szymanski Reviewed-by: Oleksij Rempel Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi b/arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi index f2386dcb9ff2..dda4fa91b2f2 100644 --- a/arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi @@ -40,6 +40,9 @@ reg = <1>; interrupt-parent = <&gpio4>; interrupts = <16 IRQ_TYPE_LEVEL_LOW>; + micrel,led-mode = <1>; + clocks = <&clks IMX6UL_CLK_ENET_REF>; + clock-names = "rmii-ref"; status = "okay"; }; }; From 9b89102fbb8fc5393e2a0f981aafdb3cf43591ee Mon Sep 17 00:00:00 2001 From: Fiona Klute Date: Wed, 16 Apr 2025 12:24:13 +0200 Subject: [PATCH 83/97] net: phy: microchip: force IRQ polling mode for lan88xx [ Upstream commit 30a41ed32d3088cd0d682a13d7f30b23baed7e93 ] With lan88xx based devices the lan78xx driver can get stuck in an interrupt loop while bringing the device up, flooding the kernel log with messages like the following: lan78xx 2-3:1.0 enp1s0u3: kevent 4 may have been dropped Removing interrupt support from the lan88xx PHY driver forces the driver to use polling instead, which avoids the problem. The issue has been observed with Raspberry Pi devices at least since 4.14 (see [1], bug report for their downstream kernel), as well as with Nvidia devices [2] in 2020, where disabling interrupts was the vendor-suggested workaround (together with the claim that phylib changes in 4.9 made the interrupt handling in lan78xx incompatible). Iperf reports well over 900Mbits/sec per direction with client in --dualtest mode, so there does not seem to be a significant impact on throughput (lan88xx device connected via switch to the peer). [1] https://github.com/raspberrypi/linux/issues/2447 [2] https://forums.developer.nvidia.com/t/jetson-xavier-and-lan7800-problem/142134/11 Link: https://lore.kernel.org/0901d90d-3f20-4a10-b680-9c978e04ddda@lunn.ch Fixes: 792aec47d59d ("add microchip LAN88xx phy driver") Signed-off-by: Fiona Klute Cc: kernel-list@raspberrypi.com Cc: stable@vger.kernel.org Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250416102413.30654-1-fiona.klute@gmx.de Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/phy/microchip.c | 46 +++---------------------------------- 1 file changed, 3 insertions(+), 43 deletions(-) diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 0b88635f4fbc..623607fd2cef 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -31,47 +31,6 @@ static int lan88xx_write_page(struct phy_device *phydev, int page) return __phy_write(phydev, LAN88XX_EXT_PAGE_ACCESS, page); } -static int lan88xx_phy_config_intr(struct phy_device *phydev) -{ - int rc; - - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { - /* unmask all source and clear them before enable */ - rc = phy_write(phydev, LAN88XX_INT_MASK, 0x7FFF); - rc = phy_read(phydev, LAN88XX_INT_STS); - rc = phy_write(phydev, LAN88XX_INT_MASK, - LAN88XX_INT_MASK_MDINTPIN_EN_ | - LAN88XX_INT_MASK_LINK_CHANGE_); - } else { - rc = phy_write(phydev, LAN88XX_INT_MASK, 0); - if (rc) - return rc; - - /* Ack interrupts after they have been disabled */ - rc = phy_read(phydev, LAN88XX_INT_STS); - } - - return rc < 0 ? rc : 0; -} - -static irqreturn_t lan88xx_handle_interrupt(struct phy_device *phydev) -{ - int irq_status; - - irq_status = phy_read(phydev, LAN88XX_INT_STS); - if (irq_status < 0) { - phy_error(phydev); - return IRQ_NONE; - } - - if (!(irq_status & LAN88XX_INT_STS_LINK_CHANGE_)) - return IRQ_NONE; - - phy_trigger_machine(phydev); - - return IRQ_HANDLED; -} - static int lan88xx_suspend(struct phy_device *phydev) { struct lan88xx_priv *priv = phydev->priv; @@ -392,8 +351,9 @@ static struct phy_driver microchip_phy_driver[] = { .config_aneg = lan88xx_config_aneg, .link_change_notify = lan88xx_link_change_notify, - .config_intr = lan88xx_phy_config_intr, - .handle_interrupt = lan88xx_handle_interrupt, + /* Interrupt handling is broken, do not define related + * functions to force polling. + */ .suspend = lan88xx_suspend, .resume = genphy_resume, From 36d4ce271b97d7d23a67e690b79e04ea853325b1 Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Mon, 21 Apr 2025 22:12:59 +0200 Subject: [PATCH 84/97] Revert "drm/meson: vclk: fix calculation of 59.94 fractional rates" [ Upstream commit f37bb5486ea536c1d61df89feeaeff3f84f0b560 ] This reverts commit bfbc68e. The patch does permit the offending YUV420 @ 59.94 phy_freq and vclk_freq mode to match in calculations. It also results in all fractional rates being unavailable for use. This was unintended and requires the patch to be reverted. Fixes: bfbc68e4d869 ("drm/meson: vclk: fix calculation of 59.94 fractional rates") Cc: stable@vger.kernel.org Signed-off-by: Christian Hewitt Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250421201300.778955-2-martin.blumenstingl@googlemail.com Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250421201300.778955-2-martin.blumenstingl@googlemail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/meson/meson_vclk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index 2a942dc6a6dc..2a82119eb58e 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -790,13 +790,13 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, FREQ_1000_1001(params[i].pixel_freq)); DRM_DEBUG_DRIVER("i = %d phy_freq = %d alt = %d\n", i, params[i].phy_freq, - FREQ_1000_1001(params[i].phy_freq/1000)*1000); + FREQ_1000_1001(params[i].phy_freq/10)*10); /* Match strict frequency */ if (phy_freq == params[i].phy_freq && vclk_freq == params[i].vclk_freq) return MODE_OK; /* Match 1000/1001 variant */ - if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/1000)*1000) && + if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/10)*10) && vclk_freq == FREQ_1000_1001(params[i].vclk_freq)) return MODE_OK; } @@ -1070,7 +1070,7 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, for (freq = 0 ; params[freq].pixel_freq ; ++freq) { if ((phy_freq == params[freq].phy_freq || - phy_freq == FREQ_1000_1001(params[freq].phy_freq/1000)*1000) && + phy_freq == FREQ_1000_1001(params[freq].phy_freq/10)*10) && (vclk_freq == params[freq].vclk_freq || vclk_freq == FREQ_1000_1001(params[freq].vclk_freq))) { if (vclk_freq != params[freq].vclk_freq) From c8f809b3de3553323e099f4a051d06da03ad316f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 21 Nov 2022 15:39:33 +0100 Subject: [PATCH 85/97] irqchip/gic-v2m: Mark a few functions __init [ Upstream commit d51a15af37ce8cf59e73de51dcdce3c9f4944974 ] They are all part of the init sequence. Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221121140048.534395323@linutronix.de Stable-dep-of: 3318dc299b07 ("irqchip/gic-v2m: Prevent use after free of gicv2m_get_fwnode()") Signed-off-by: Sasha Levin --- drivers/irqchip/irq-gic-v2m.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index 414cd925064f..c2e52c9a3854 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -262,7 +262,7 @@ static struct msi_domain_info gicv2m_pmsi_domain_info = { .chip = &gicv2m_pmsi_irq_chip, }; -static void gicv2m_teardown(void) +static void __init gicv2m_teardown(void) { struct v2m_data *v2m, *tmp; @@ -277,7 +277,7 @@ static void gicv2m_teardown(void) } } -static int gicv2m_allocate_domains(struct irq_domain *parent) +static __init int gicv2m_allocate_domains(struct irq_domain *parent) { struct irq_domain *inner_domain, *pci_domain, *plat_domain; struct v2m_data *v2m; @@ -404,7 +404,7 @@ err_free_v2m: return ret; } -static const struct of_device_id gicv2m_device_id[] = { +static __initconst struct of_device_id gicv2m_device_id[] = { { .compatible = "arm,gic-v2m-frame", }, {}, }; @@ -454,7 +454,7 @@ static int __init gicv2m_of_init(struct fwnode_handle *parent_handle, #ifdef CONFIG_ACPI static int acpi_num_msi; -static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) +static __init struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) { struct v2m_data *data; @@ -469,7 +469,7 @@ static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) return data->fwnode; } -static bool acpi_check_amazon_graviton_quirks(void) +static __init bool acpi_check_amazon_graviton_quirks(void) { static struct acpi_table_madt *madt; acpi_status status; From dc0d654eb4179b06d3206e4396d072108b9ba082 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Apr 2025 17:16:16 +0100 Subject: [PATCH 86/97] irqchip/gic-v2m: Prevent use after free of gicv2m_get_fwnode() [ Upstream commit 3318dc299b072a0511d6dfd8367f3304fb6d9827 ] With ACPI in place, gicv2m_get_fwnode() is registered with the pci subsystem as pci_msi_get_fwnode_cb(), which may get invoked at runtime during a PCI host bridge probe. But, the call back is wrongly marked as __init, causing it to be freed, while being registered with the PCI subsystem and could trigger: Unable to handle kernel paging request at virtual address ffff8000816c0400 gicv2m_get_fwnode+0x0/0x58 (P) pci_set_bus_msi_domain+0x74/0x88 pci_register_host_bridge+0x194/0x548 This is easily reproducible on a Juno board with ACPI boot. Retain the function for later use. Fixes: 0644b3daca28 ("irqchip/gic-v2m: acpi: Introducing GICv2m ACPI support") Signed-off-by: Suzuki K Poulose Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/irqchip/irq-gic-v2m.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index c2e52c9a3854..c04f2481068b 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -454,7 +454,7 @@ static int __init gicv2m_of_init(struct fwnode_handle *parent_handle, #ifdef CONFIG_ACPI static int acpi_num_msi; -static __init struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) +static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) { struct v2m_data *data; From 4c4f168b46229d527bda801ef15ad793b069f0ae Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 6 May 2025 11:31:50 +0000 Subject: [PATCH 87/97] dm: fix copying after src array boundaries commit f1aff4bc199cb92c055668caed65505e3b4d2656 upstream. The blammed commit copied to argv the size of the reallocated argv, instead of the size of the old_argv, thus reading and copying from past the old_argv allocated memory. Following BUG_ON was hit: [ 3.038929][ T1] kernel BUG at lib/string_helpers.c:1040! [ 3.039147][ T1] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP ... [ 3.056489][ T1] Call trace: [ 3.056591][ T1] __fortify_panic+0x10/0x18 (P) [ 3.056773][ T1] dm_split_args+0x20c/0x210 [ 3.056942][ T1] dm_table_add_target+0x13c/0x360 [ 3.057132][ T1] table_load+0x110/0x3ac [ 3.057292][ T1] dm_ctl_ioctl+0x424/0x56c [ 3.057457][ T1] __arm64_sys_ioctl+0xa8/0xec [ 3.057634][ T1] invoke_syscall+0x58/0x10c [ 3.057804][ T1] el0_svc_common+0xa8/0xdc [ 3.057970][ T1] do_el0_svc+0x1c/0x28 [ 3.058123][ T1] el0_svc+0x50/0xac [ 3.058266][ T1] el0t_64_sync_handler+0x60/0xc4 [ 3.058452][ T1] el0t_64_sync+0x1b0/0x1b4 [ 3.058620][ T1] Code: f800865e a9bf7bfd 910003fd 941f48aa (d4210000) [ 3.058897][ T1] ---[ end trace 0000000000000000 ]--- [ 3.059083][ T1] Kernel panic - not syncing: Oops - BUG: Fatal exception Fix it by copying the size of src, and not the size of dst, as it was. Fixes: 5a2a6c428190 ("dm: always update the array size in realloc_argv on success") Cc: stable@vger.kernel.org Signed-off-by: Tudor Ambarus Signed-off-by: Mikulas Patocka Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 052ef081fcdf..492a3b38f552 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -503,9 +503,9 @@ static char **realloc_argv(unsigned int *size, char **old_argv) } argv = kmalloc_array(new_size, sizeof(*argv), gfp); if (argv) { - *size = new_size; if (old_argv) memcpy(argv, old_argv, *size * sizeof(*argv)); + *size = new_size; } kfree(old_argv); From 28ddd9cf9d581d3292f49e6d8fad102ee1762857 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 6 Aug 2024 20:31:15 -0300 Subject: [PATCH 88/97] iommu/arm-smmu-v3: Use the new rb tree helpers [ Upstream commit a2bb820e862d61f9ca1499e500915f9f505a2655 ] Since v5.12 the rbtree has gained some simplifying helpers aimed at making rb tree users write less convoluted boiler plate code. Instead the caller provides a single comparison function and the helpers generate the prior open-coded stuff. Update smmu->streams to use rb_find_add() and rb_find(). Tested-by: Nicolin Chen Reviewed-by: Mostafa Saleh Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v3-9fef8cdc2ff6+150d1-smmuv3_tidy_jgg@nvidia.com Signed-off-by: Will Deacon Stable-dep-of: b00d24997a11 ("iommu/arm-smmu-v3: Fix iommu_device_probe bug due to duplicated stream ids") Signed-off-by: Sasha Levin --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 68 ++++++++++----------- 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 96b72f3dad0d..1ab2abab4680 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1443,26 +1443,37 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) return 0; } +static int arm_smmu_streams_cmp_key(const void *lhs, const struct rb_node *rhs) +{ + struct arm_smmu_stream *stream_rhs = + rb_entry(rhs, struct arm_smmu_stream, node); + const u32 *sid_lhs = lhs; + + if (*sid_lhs < stream_rhs->id) + return -1; + if (*sid_lhs > stream_rhs->id) + return 1; + return 0; +} + +static int arm_smmu_streams_cmp_node(struct rb_node *lhs, + const struct rb_node *rhs) +{ + return arm_smmu_streams_cmp_key( + &rb_entry(lhs, struct arm_smmu_stream, node)->id, rhs); +} + static struct arm_smmu_master * arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) { struct rb_node *node; - struct arm_smmu_stream *stream; lockdep_assert_held(&smmu->streams_mutex); - node = smmu->streams.rb_node; - while (node) { - stream = rb_entry(node, struct arm_smmu_stream, node); - if (stream->id < sid) - node = node->rb_right; - else if (stream->id > sid) - node = node->rb_left; - else - return stream->master; - } - - return NULL; + node = rb_find(&sid, &smmu->streams, arm_smmu_streams_cmp_key); + if (!node) + return NULL; + return rb_entry(node, struct arm_smmu_stream, node)->master; } /* IRQ and event handlers */ @@ -2590,8 +2601,6 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, { int i; int ret = 0; - struct arm_smmu_stream *new_stream, *cur_stream; - struct rb_node **new_node, *parent_node = NULL; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev); master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams), @@ -2602,9 +2611,9 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, mutex_lock(&smmu->streams_mutex); for (i = 0; i < fwspec->num_ids; i++) { + struct arm_smmu_stream *new_stream = &master->streams[i]; u32 sid = fwspec->ids[i]; - new_stream = &master->streams[i]; new_stream->id = sid; new_stream->master = master; @@ -2613,28 +2622,13 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, break; /* Insert into SID tree */ - new_node = &(smmu->streams.rb_node); - while (*new_node) { - cur_stream = rb_entry(*new_node, struct arm_smmu_stream, - node); - parent_node = *new_node; - if (cur_stream->id > new_stream->id) { - new_node = &((*new_node)->rb_left); - } else if (cur_stream->id < new_stream->id) { - new_node = &((*new_node)->rb_right); - } else { - dev_warn(master->dev, - "stream %u already in tree\n", - cur_stream->id); - ret = -EINVAL; - break; - } - } - if (ret) + if (rb_find_add(&new_stream->node, &smmu->streams, + arm_smmu_streams_cmp_node)) { + dev_warn(master->dev, "stream %u already in tree\n", + sid); + ret = -EINVAL; break; - - rb_link_node(&new_stream->node, parent_node, new_node); - rb_insert_color(&new_stream->node, &smmu->streams); + } } if (ret) { From 8316820e4082052666872398a31c91668d1f8461 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 15 Apr 2025 11:56:20 -0700 Subject: [PATCH 89/97] iommu/arm-smmu-v3: Fix iommu_device_probe bug due to duplicated stream ids [ Upstream commit b00d24997a11c10d3e420614f0873b83ce358a34 ] ASPEED VGA card has two built-in devices: 0008:06:00.0 PCI bridge: ASPEED Technology, Inc. AST1150 PCI-to-PCI Bridge (rev 06) 0008:07:00.0 VGA compatible controller: ASPEED Technology, Inc. ASPEED Graphics Family (rev 52) Its toplogy looks like this: +-[0008:00]---00.0-[01-09]--+-00.0-[02-09]--+-00.0-[03]----00.0 Sandisk Corp Device 5017 | +-01.0-[04]-- | +-02.0-[05]----00.0 NVIDIA Corporation Device | +-03.0-[06-07]----00.0-[07]----00.0 ASPEED Technology, Inc. ASPEED Graphics Family | +-04.0-[08]----00.0 Renesas Technology Corp. uPD720201 USB 3.0 Host Controller | \-05.0-[09]----00.0 Realtek Semiconductor Co., Ltd. RTL8111/8168/8411 PCI Express Gigabit Ethernet Controller \-00.1 PMC-Sierra Inc. Device 4028 The IORT logic populaties two identical IDs into the fwspec->ids array via DMA aliasing in iort_pci_iommu_init() called by pci_for_each_dma_alias(). Though the SMMU driver had been able to handle this situation since commit 563b5cbe334e ("iommu/arm-smmu-v3: Cope with duplicated Stream IDs"), that got broken by the later commit cdf315f907d4 ("iommu/arm-smmu-v3: Maintain a SID->device structure"), which ended up with allocating separate streams with the same stuffing. On a kernel prior to v6.15-rc1, there has been an overlooked warning: pci 0008:07:00.0: vgaarb: setting as boot VGA device pci 0008:07:00.0: vgaarb: bridge control possible pci 0008:07:00.0: vgaarb: VGA device added: decodes=io+mem,owns=none,locks=none pcieport 0008:06:00.0: Adding to iommu group 14 ast 0008:07:00.0: stream 67328 already in tree <===== WARNING ast 0008:07:00.0: enabling device (0002 -> 0003) ast 0008:07:00.0: Using default configuration ast 0008:07:00.0: AST 2600 detected ast 0008:07:00.0: [drm] Using analog VGA ast 0008:07:00.0: [drm] dram MCLK=396 Mhz type=1 bus_width=16 [drm] Initialized ast 0.1.0 for 0008:07:00.0 on minor 0 ast 0008:07:00.0: [drm] fb0: astdrmfb frame buffer device With v6.15-rc, since the commit bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path"), the error returned with the warning is moved to the SMMU device probe flow: arm_smmu_probe_device+0x15c/0x4c0 __iommu_probe_device+0x150/0x4f8 probe_iommu_group+0x44/0x80 bus_for_each_dev+0x7c/0x100 bus_iommu_probe+0x48/0x1a8 iommu_device_register+0xb8/0x178 arm_smmu_device_probe+0x1350/0x1db0 which then fails the entire SMMU driver probe: pci 0008:06:00.0: Adding to iommu group 21 pci 0008:07:00.0: stream 67328 already in tree arm-smmu-v3 arm-smmu-v3.9.auto: Failed to register iommu arm-smmu-v3 arm-smmu-v3.9.auto: probe with driver arm-smmu-v3 failed with error -22 Since SMMU driver had been already expecting a potential duplicated Stream ID in arm_smmu_install_ste_for_dev(), change the arm_smmu_insert_master() routine to ignore a duplicated ID from the fwspec->sids array as well. Note: this has been failing the iommu_device_probe() since 2021, although a recent iommu commit in v6.15-rc1 that moves iommu_device_probe() started to fail the SMMU driver probe. Since nobody has cared about DMA Alias support, leave that as it was but fix the fundamental iommu_device_probe() breakage. Fixes: cdf315f907d4 ("iommu/arm-smmu-v3: Maintain a SID->device structure") Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20250415185620.504299-1-nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 1ab2abab4680..6a60bad48b27 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2612,6 +2612,7 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, mutex_lock(&smmu->streams_mutex); for (i = 0; i < fwspec->num_ids; i++) { struct arm_smmu_stream *new_stream = &master->streams[i]; + struct rb_node *existing; u32 sid = fwspec->ids[i]; new_stream->id = sid; @@ -2622,10 +2623,20 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, break; /* Insert into SID tree */ - if (rb_find_add(&new_stream->node, &smmu->streams, - arm_smmu_streams_cmp_node)) { - dev_warn(master->dev, "stream %u already in tree\n", - sid); + existing = rb_find_add(&new_stream->node, &smmu->streams, + arm_smmu_streams_cmp_node); + if (existing) { + struct arm_smmu_master *existing_master = + rb_entry(existing, struct arm_smmu_stream, node) + ->master; + + /* Bridged PCI devices may end up with duplicated IDs */ + if (existing_master == master) + continue; + + dev_warn(master->dev, + "stream %u already in tree from dev %s\n", sid, + dev_name(existing_master->dev)); ret = -EINVAL; break; } From 8a86bb891bf89b235623eef32f8aaef2a8ee28b3 Mon Sep 17 00:00:00 2001 From: hersen wu Date: Mon, 14 Nov 2022 14:29:56 -0500 Subject: [PATCH 90/97] drm/amd/display: phase2 enable mst hdcp multiple displays [ Upstream commit aa9fdd5d5add50305d2022fa072fe6f189283415 ] [why] For MST topology with 1 physical link and multiple connectors (>=2), e.g. daisy cahined MST + SST, or 1-to-multi MST hub, if userspace set to enable the HDCP simultaneously on all connected outputs, the commit tail iteratively call the hdcp_update_display() for each display (connector). However, the hdcp workqueue data structure for each link has only one DM connector and encryption status members, which means the work queue of property_validate/update() would only be triggered for the last connector within this physical link, and therefore the HDCP property value of other connectors would stay on DESIRED instead of switching to ENABLED, which is NOT as expected. [how] Use array of AMDGPU_DM_MAX_DISPLAY_INDEX for both aconnector and encryption status in hdcp workqueue data structure for each physical link. For property validate/update work queue, we iterates over the array and do similar operation/check for each connected display. Tested-by: Daniel Wheeler Signed-off-by: hersen wu Signed-off-by: Alex Deucher Stable-dep-of: be593d9d91c5 ("drm/amd/display: Fix slab-use-after-free in hdcp") Signed-off-by: Sasha Levin --- .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 170 +++++++++++++----- .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.h | 5 +- 2 files changed, 127 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 3f211c0308a2..7fc26ca30dcd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -170,9 +170,10 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, struct mod_hdcp_display *display = &hdcp_work[link_index].display; struct mod_hdcp_link *link = &hdcp_work[link_index].link; struct mod_hdcp_display_query query; + unsigned int conn_index = aconnector->base.index; mutex_lock(&hdcp_w->mutex); - hdcp_w->aconnector = aconnector; + hdcp_w->aconnector[conn_index] = aconnector; query.display = NULL; mod_hdcp_query_display(&hdcp_w->hdcp, aconnector->base.index, &query); @@ -204,7 +205,7 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, msecs_to_jiffies(DRM_HDCP_CHECK_PERIOD_MS)); } else { display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION; - hdcp_w->encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; + hdcp_w->encryption_status[conn_index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; cancel_delayed_work(&hdcp_w->property_validate_dwork); } @@ -223,9 +224,10 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, { struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; struct drm_connector_state *conn_state = aconnector->base.state; + unsigned int conn_index = aconnector->base.index; mutex_lock(&hdcp_w->mutex); - hdcp_w->aconnector = aconnector; + hdcp_w->aconnector[conn_index] = aconnector; /* the removal of display will invoke auth reset -> hdcp destroy and * we'd expect the Content Protection (CP) property changed back to @@ -247,13 +249,18 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_index) { struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; + unsigned int conn_index; mutex_lock(&hdcp_w->mutex); mod_hdcp_reset_connection(&hdcp_w->hdcp, &hdcp_w->output); cancel_delayed_work(&hdcp_w->property_validate_dwork); - hdcp_w->encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; + + for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) { + hdcp_w->encryption_status[conn_index] = + MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; + } process_output(hdcp_w); @@ -290,49 +297,83 @@ static void event_callback(struct work_struct *work) } + static void event_property_update(struct work_struct *work) { - struct hdcp_workqueue *hdcp_work = container_of(work, struct hdcp_workqueue, property_update_work); - struct amdgpu_dm_connector *aconnector = hdcp_work->aconnector; - struct drm_device *dev = hdcp_work->aconnector->base.dev; + struct amdgpu_dm_connector *aconnector = NULL; + struct drm_device *dev; long ret; + unsigned int conn_index; + struct drm_connector *connector; + struct drm_connector_state *conn_state; - drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); - mutex_lock(&hdcp_work->mutex); + for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) { + aconnector = hdcp_work->aconnector[conn_index]; + if (!aconnector) + continue; - if (aconnector->base.state && aconnector->base.state->commit) { - ret = wait_for_completion_interruptible_timeout(&aconnector->base.state->commit->hw_done, 10 * HZ); + if (!aconnector->base.index) + continue; - if (ret == 0) { - DRM_ERROR("HDCP state unknown! Setting it to DESIRED"); - hdcp_work->encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; + connector = &aconnector->base; + + /* check if display connected */ + if (connector->status != connector_status_connected) + continue; + + conn_state = aconnector->base.state; + + if (!conn_state) + continue; + + dev = connector->dev; + + if (!dev) + continue; + + drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); + mutex_lock(&hdcp_work->mutex); + + if (conn_state->commit) { + ret = wait_for_completion_interruptible_timeout( + &conn_state->commit->hw_done, 10 * HZ); + if (ret == 0) { + DRM_ERROR( + "HDCP state unknown! Setting it to DESIRED"); + hdcp_work->encryption_status[conn_index] = + MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; + } } - } - - if (aconnector->base.state) { - if (hdcp_work->encryption_status != MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF) { - if (aconnector->base.state->hdcp_content_type == + if (hdcp_work->encryption_status[conn_index] != + MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF) { + if (conn_state->hdcp_content_type == DRM_MODE_HDCP_CONTENT_TYPE0 && - hdcp_work->encryption_status <= - MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON) - drm_hdcp_update_content_protection(&aconnector->base, - DRM_MODE_CONTENT_PROTECTION_ENABLED); - else if (aconnector->base.state->hdcp_content_type == - DRM_MODE_HDCP_CONTENT_TYPE1 && - hdcp_work->encryption_status == - MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON) - drm_hdcp_update_content_protection(&aconnector->base, - DRM_MODE_CONTENT_PROTECTION_ENABLED); - } else { - drm_hdcp_update_content_protection(&aconnector->base, - DRM_MODE_CONTENT_PROTECTION_DESIRED); - } - } + hdcp_work->encryption_status[conn_index] <= + MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON) { - mutex_unlock(&hdcp_work->mutex); - drm_modeset_unlock(&dev->mode_config.connection_mutex); + DRM_DEBUG_DRIVER("[HDCP_DM] DRM_MODE_CONTENT_PROTECTION_ENABLED\n"); + drm_hdcp_update_content_protection( + connector, + DRM_MODE_CONTENT_PROTECTION_ENABLED); + } else if (conn_state->hdcp_content_type == + DRM_MODE_HDCP_CONTENT_TYPE1 && + hdcp_work->encryption_status[conn_index] == + MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON) { + drm_hdcp_update_content_protection( + connector, + DRM_MODE_CONTENT_PROTECTION_ENABLED); + } + } else { + DRM_DEBUG_DRIVER("[HDCP_DM] DRM_MODE_CONTENT_PROTECTION_DESIRED\n"); + drm_hdcp_update_content_protection( + connector, DRM_MODE_CONTENT_PROTECTION_DESIRED); + + } + mutex_unlock(&hdcp_work->mutex); + drm_modeset_unlock(&dev->mode_config.connection_mutex); + } } static void event_property_validate(struct work_struct *work) @@ -340,19 +381,51 @@ static void event_property_validate(struct work_struct *work) struct hdcp_workqueue *hdcp_work = container_of(to_delayed_work(work), struct hdcp_workqueue, property_validate_dwork); struct mod_hdcp_display_query query; - struct amdgpu_dm_connector *aconnector = hdcp_work->aconnector; - - if (!aconnector) - return; + struct amdgpu_dm_connector *aconnector; + unsigned int conn_index; mutex_lock(&hdcp_work->mutex); - query.encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; - mod_hdcp_query_display(&hdcp_work->hdcp, aconnector->base.index, &query); + for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; + conn_index++) { + aconnector = hdcp_work->aconnector[conn_index]; - if (query.encryption_status != hdcp_work->encryption_status) { - hdcp_work->encryption_status = query.encryption_status; - schedule_work(&hdcp_work->property_update_work); + + if (!aconnector) + continue; + + if (!aconnector->base.index) + continue; + + /* check if display connected */ + if (aconnector->base.status != connector_status_connected) + continue; + + if (!aconnector->base.state) + continue; + + query.encryption_status = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; + mod_hdcp_query_display(&hdcp_work->hdcp, aconnector->base.index, + &query); + + DRM_DEBUG_DRIVER("[HDCP_DM] disp %d, connector->CP %u, (query, work): (%d, %d)\n", + aconnector->base.index, + aconnector->base.state->content_protection, + query.encryption_status, + hdcp_work->encryption_status[conn_index]); + + if (query.encryption_status != + hdcp_work->encryption_status[conn_index]) { + DRM_DEBUG_DRIVER("[HDCP_DM] encryption_status change from %x to %x\n", + hdcp_work->encryption_status[conn_index], query.encryption_status); + + hdcp_work->encryption_status[conn_index] = + query.encryption_status; + + DRM_DEBUG_DRIVER("[HDCP_DM] trigger property_update_work\n"); + + schedule_work(&hdcp_work->property_update_work); + } } mutex_unlock(&hdcp_work->mutex); @@ -687,6 +760,13 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct hdcp_work[i].hdcp.config.ddc.funcs.read_i2c = lp_read_i2c; hdcp_work[i].hdcp.config.ddc.funcs.write_dpcd = lp_write_dpcd; hdcp_work[i].hdcp.config.ddc.funcs.read_dpcd = lp_read_dpcd; + + memset(hdcp_work[i].aconnector, 0, + sizeof(struct amdgpu_dm_connector *) * + AMDGPU_DM_MAX_DISPLAY_INDEX); + memset(hdcp_work[i].encryption_status, 0, + sizeof(enum mod_hdcp_encryption_status) * + AMDGPU_DM_MAX_DISPLAY_INDEX); } cp_psp->funcs.update_stream_config = update_config; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h index bbbf7d0eff82..69b445b011c8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h @@ -43,7 +43,7 @@ struct hdcp_workqueue { struct delayed_work callback_dwork; struct delayed_work watchdog_timer_dwork; struct delayed_work property_validate_dwork; - struct amdgpu_dm_connector *aconnector; + struct amdgpu_dm_connector *aconnector[AMDGPU_DM_MAX_DISPLAY_INDEX]; struct mutex mutex; struct mod_hdcp hdcp; @@ -51,8 +51,7 @@ struct hdcp_workqueue { struct mod_hdcp_display display; struct mod_hdcp_link link; - enum mod_hdcp_encryption_status encryption_status; - + enum mod_hdcp_encryption_status encryption_status[AMDGPU_DM_MAX_DISPLAY_INDEX]; /* when display is unplugged from mst hub, connctor will be * destroyed within dm_dp_mst_connector_destroy. connector * hdcp perperties, like type, undesired, desired, enabled, From e56b7400e9d7c28431aa7ede8967e9e79c31aed9 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 13 Jul 2023 17:10:57 +0530 Subject: [PATCH 91/97] drm/amd/display: Clean up style problems in amdgpu_dm_hdcp.c [ Upstream commit a19de9dbb4d293c064b02cec8ef134cb9812d639 ] Conform to Linux kernel coding style. And promote sysfs entry for set/get srm to kdoc. Suggested-by: Rodrigo Siqueira Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher Stable-dep-of: be593d9d91c5 ("drm/amd/display: Fix slab-use-after-free in hdcp") Signed-off-by: Sasha Levin --- .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 185 +++++++++--------- 1 file changed, 89 insertions(+), 96 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 7fc26ca30dcd..15537f554ca8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -39,10 +39,10 @@ static bool lp_write_i2c(void *handle, uint32_t address, const uint8_t *data, uint32_t size) { - struct dc_link *link = handle; struct i2c_payload i2c_payloads[] = {{true, address, size, (void *)data} }; - struct i2c_command cmd = {i2c_payloads, 1, I2C_COMMAND_ENGINE_HW, link->dc->caps.i2c_speed_in_khz}; + struct i2c_command cmd = {i2c_payloads, 1, I2C_COMMAND_ENGINE_HW, + link->dc->caps.i2c_speed_in_khz}; return dm_helpers_submit_i2c(link->ctx, link, &cmd); } @@ -52,8 +52,10 @@ lp_read_i2c(void *handle, uint32_t address, uint8_t offset, uint8_t *data, uint3 { struct dc_link *link = handle; - struct i2c_payload i2c_payloads[] = {{true, address, 1, &offset}, {false, address, size, data} }; - struct i2c_command cmd = {i2c_payloads, 2, I2C_COMMAND_ENGINE_HW, link->dc->caps.i2c_speed_in_khz}; + struct i2c_payload i2c_payloads[] = {{true, address, 1, &offset}, + {false, address, size, data} }; + struct i2c_command cmd = {i2c_payloads, 2, I2C_COMMAND_ENGINE_HW, + link->dc->caps.i2c_speed_in_khz}; return dm_helpers_submit_i2c(link->ctx, link, &cmd); } @@ -76,7 +78,6 @@ lp_read_dpcd(void *handle, uint32_t address, uint8_t *data, uint32_t size) static uint8_t *psp_get_srm(struct psp_context *psp, uint32_t *srm_version, uint32_t *srm_size) { - struct ta_hdcp_shared_memory *hdcp_cmd; if (!psp->hdcp_context.context.initialized) { @@ -96,13 +97,12 @@ static uint8_t *psp_get_srm(struct psp_context *psp, uint32_t *srm_version, uint *srm_version = hdcp_cmd->out_msg.hdcp_get_srm.srm_version; *srm_size = hdcp_cmd->out_msg.hdcp_get_srm.srm_buf_size; - return hdcp_cmd->out_msg.hdcp_get_srm.srm_buf; } -static int psp_set_srm(struct psp_context *psp, uint8_t *srm, uint32_t srm_size, uint32_t *srm_version) +static int psp_set_srm(struct psp_context *psp, + u8 *srm, uint32_t srm_size, uint32_t *srm_version) { - struct ta_hdcp_shared_memory *hdcp_cmd; if (!psp->hdcp_context.context.initialized) { @@ -119,7 +119,8 @@ static int psp_set_srm(struct psp_context *psp, uint8_t *srm, uint32_t srm_size, psp_hdcp_invoke(psp, hdcp_cmd->cmd_id); - if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS || hdcp_cmd->out_msg.hdcp_set_srm.valid_signature != 1 || + if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS || + hdcp_cmd->out_msg.hdcp_set_srm.valid_signature != 1 || hdcp_cmd->out_msg.hdcp_set_srm.srm_version == PSP_SRM_VERSION_MAX) return -EINVAL; @@ -150,7 +151,6 @@ static void process_output(struct hdcp_workqueue *hdcp_work) static void link_lock(struct hdcp_workqueue *work, bool lock) { - int i = 0; for (i = 0; i < work->max_link; i++) { @@ -160,10 +160,11 @@ static void link_lock(struct hdcp_workqueue *work, bool lock) mutex_unlock(&work[i].mutex); } } + void hdcp_update_display(struct hdcp_workqueue *hdcp_work, unsigned int link_index, struct amdgpu_dm_connector *aconnector, - uint8_t content_type, + u8 content_type, bool enable_encryption) { struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; @@ -178,18 +179,19 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, query.display = NULL; mod_hdcp_query_display(&hdcp_w->hdcp, aconnector->base.index, &query); - if (query.display != NULL) { + if (query.display) { memcpy(display, query.display, sizeof(struct mod_hdcp_display)); mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); hdcp_w->link.adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_0; if (enable_encryption) { - /* Explicitly set the saved SRM as sysfs call will be after we already enabled hdcp - * (s3 resume case) + /* Explicitly set the saved SRM as sysfs call will be after + * we already enabled hdcp (s3 resume case) */ if (hdcp_work->srm_size > 0) - psp_set_srm(hdcp_work->hdcp.config.psp.handle, hdcp_work->srm, hdcp_work->srm_size, + psp_set_srm(hdcp_work->hdcp.config.psp.handle, hdcp_work->srm, + hdcp_work->srm_size, &hdcp_work->srm_version); display->adjust.disable = MOD_HDCP_DISPLAY_NOT_DISABLE; @@ -219,7 +221,7 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, } static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, - unsigned int link_index, + unsigned int link_index, struct amdgpu_dm_connector *aconnector) { struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; @@ -238,7 +240,8 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, conn_state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED; DRM_DEBUG_DRIVER("[HDCP_DM] display %d, CP 2 -> 1, type %u, DPMS %u\n", - aconnector->base.index, conn_state->hdcp_content_type, aconnector->base.dpms); + aconnector->base.index, conn_state->hdcp_content_type, + aconnector->base.dpms); } mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); @@ -246,6 +249,7 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, process_output(hdcp_w); mutex_unlock(&hdcp_w->mutex); } + void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_index) { struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; @@ -274,15 +278,12 @@ void hdcp_handle_cpirq(struct hdcp_workqueue *hdcp_work, unsigned int link_index schedule_work(&hdcp_w->cpirq_work); } - - - static void event_callback(struct work_struct *work) { struct hdcp_workqueue *hdcp_work; hdcp_work = container_of(to_delayed_work(work), struct hdcp_workqueue, - callback_dwork); + callback_dwork); mutex_lock(&hdcp_work->mutex); @@ -294,13 +295,12 @@ static void event_callback(struct work_struct *work) process_output(hdcp_work); mutex_unlock(&hdcp_work->mutex); - - } static void event_property_update(struct work_struct *work) { - struct hdcp_workqueue *hdcp_work = container_of(work, struct hdcp_workqueue, property_update_work); + struct hdcp_workqueue *hdcp_work = container_of(work, struct hdcp_workqueue, + property_update_work); struct amdgpu_dm_connector *aconnector = NULL; struct drm_device *dev; long ret; @@ -337,11 +337,10 @@ static void event_property_update(struct work_struct *work) mutex_lock(&hdcp_work->mutex); if (conn_state->commit) { - ret = wait_for_completion_interruptible_timeout( - &conn_state->commit->hw_done, 10 * HZ); + ret = wait_for_completion_interruptible_timeout(&conn_state->commit->hw_done, + 10 * HZ); if (ret == 0) { - DRM_ERROR( - "HDCP state unknown! Setting it to DESIRED"); + DRM_ERROR("HDCP state unknown! Setting it to DESIRED\n"); hdcp_work->encryption_status[conn_index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; } @@ -352,24 +351,20 @@ static void event_property_update(struct work_struct *work) DRM_MODE_HDCP_CONTENT_TYPE0 && hdcp_work->encryption_status[conn_index] <= MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON) { - DRM_DEBUG_DRIVER("[HDCP_DM] DRM_MODE_CONTENT_PROTECTION_ENABLED\n"); - drm_hdcp_update_content_protection( - connector, - DRM_MODE_CONTENT_PROTECTION_ENABLED); + drm_hdcp_update_content_protection(connector, + DRM_MODE_CONTENT_PROTECTION_ENABLED); } else if (conn_state->hdcp_content_type == DRM_MODE_HDCP_CONTENT_TYPE1 && hdcp_work->encryption_status[conn_index] == MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON) { - drm_hdcp_update_content_protection( - connector, - DRM_MODE_CONTENT_PROTECTION_ENABLED); + drm_hdcp_update_content_protection(connector, + DRM_MODE_CONTENT_PROTECTION_ENABLED); } } else { DRM_DEBUG_DRIVER("[HDCP_DM] DRM_MODE_CONTENT_PROTECTION_DESIRED\n"); - drm_hdcp_update_content_protection( - connector, DRM_MODE_CONTENT_PROTECTION_DESIRED); - + drm_hdcp_update_content_protection(connector, + DRM_MODE_CONTENT_PROTECTION_DESIRED); } mutex_unlock(&hdcp_work->mutex); drm_modeset_unlock(&dev->mode_config.connection_mutex); @@ -409,7 +404,7 @@ static void event_property_validate(struct work_struct *work) &query); DRM_DEBUG_DRIVER("[HDCP_DM] disp %d, connector->CP %u, (query, work): (%d, %d)\n", - aconnector->base.index, + aconnector->base.index, aconnector->base.state->content_protection, query.encryption_status, hdcp_work->encryption_status[conn_index]); @@ -417,7 +412,8 @@ static void event_property_validate(struct work_struct *work) if (query.encryption_status != hdcp_work->encryption_status[conn_index]) { DRM_DEBUG_DRIVER("[HDCP_DM] encryption_status change from %x to %x\n", - hdcp_work->encryption_status[conn_index], query.encryption_status); + hdcp_work->encryption_status[conn_index], + query.encryption_status); hdcp_work->encryption_status[conn_index] = query.encryption_status; @@ -436,7 +432,7 @@ static void event_watchdog_timer(struct work_struct *work) struct hdcp_workqueue *hdcp_work; hdcp_work = container_of(to_delayed_work(work), - struct hdcp_workqueue, + struct hdcp_workqueue, watchdog_timer_dwork); mutex_lock(&hdcp_work->mutex); @@ -450,7 +446,6 @@ static void event_watchdog_timer(struct work_struct *work) process_output(hdcp_work); mutex_unlock(&hdcp_work->mutex); - } static void event_cpirq(struct work_struct *work) @@ -466,10 +461,8 @@ static void event_cpirq(struct work_struct *work) process_output(hdcp_work); mutex_unlock(&hdcp_work->mutex); - } - void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work) { int i = 0; @@ -486,10 +479,8 @@ void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work) kfree(hdcp_work); } - static bool enable_assr(void *handle, struct dc_link *link) { - struct hdcp_workqueue *hdcp_work = handle; struct mod_hdcp hdcp = hdcp_work->hdcp; struct psp_context *psp = hdcp.config.psp.handle; @@ -507,7 +498,8 @@ static bool enable_assr(void *handle, struct dc_link *link) memset(dtm_cmd, 0, sizeof(struct ta_dtm_shared_memory)); dtm_cmd->cmd_id = TA_DTM_COMMAND__TOPOLOGY_ASSR_ENABLE; - dtm_cmd->dtm_in_message.topology_assr_enable.display_topology_dig_be_index = link->link_enc_hw_inst; + dtm_cmd->dtm_in_message.topology_assr_enable.display_topology_dig_be_index = + link->link_enc_hw_inst; dtm_cmd->dtm_status = TA_DTM_STATUS__GENERIC_FAILURE; psp_dtm_invoke(psp, dtm_cmd->cmd_id); @@ -549,7 +541,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) else if (aconnector->dc_em_sink) sink = aconnector->dc_em_sink; - if (sink != NULL) + if (sink) link->mode = mod_hdcp_signal_type_to_operation_mode(sink->sink_signal); display->controller = CONTROLLER_ID_D0 + config->otg_inst; @@ -574,16 +566,20 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) conn_state = aconnector->base.state; DRM_DEBUG_DRIVER("[HDCP_DM] display %d, CP %d, type %d\n", aconnector->base.index, - (!!aconnector->base.state) ? aconnector->base.state->content_protection : -1, - (!!aconnector->base.state) ? aconnector->base.state->hdcp_content_type : -1); + (!!aconnector->base.state) ? + aconnector->base.state->content_protection : -1, + (!!aconnector->base.state) ? + aconnector->base.state->hdcp_content_type : -1); if (conn_state) hdcp_update_display(hdcp_work, link_index, aconnector, - conn_state->hdcp_content_type, false); + conn_state->hdcp_content_type, false); } - -/* NOTE: From the usermodes prospective you only need to call write *ONCE*, the kernel +/** + * DOC: Add sysfs interface for set/get srm + * + * NOTE: From the usermodes prospective you only need to call write *ONCE*, the kernel * will automatically call once or twice depending on the size * * call: "cat file > /sys/class/drm/card0/device/hdcp_srm" from usermode no matter what the size is @@ -594,23 +590,23 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) * sysfs interface doesn't tell us the size we will get so we are sending partial SRMs to psp and on * the last call we will send the full SRM. PSP will fail on every call before the last. * - * This means we don't know if the SRM is good until the last call. And because of this limitation we - * cannot throw errors early as it will stop the kernel from writing to sysfs + * This means we don't know if the SRM is good until the last call. And because of this + * limitation we cannot throw errors early as it will stop the kernel from writing to sysfs * * Example 1: - * Good SRM size = 5096 - * first call to write 4096 -> PSP fails - * Second call to write 1000 -> PSP Pass -> SRM is set + * Good SRM size = 5096 + * first call to write 4096 -> PSP fails + * Second call to write 1000 -> PSP Pass -> SRM is set * * Example 2: - * Bad SRM size = 4096 - * first call to write 4096 -> PSP fails (This is the same as above, but we don't know if this - * is the last call) + * Bad SRM size = 4096 + * first call to write 4096 -> PSP fails (This is the same as above, but we don't know if this + * is the last call) * * Solution?: - * 1: Parse the SRM? -> It is signed so we don't know the EOF - * 2: We can have another sysfs that passes the size before calling set. -> simpler solution - * below + * 1: Parse the SRM? -> It is signed so we don't know the EOF + * 2: We can have another sysfs that passes the size before calling set. -> simpler solution + * below * * Easy Solution: * Always call get after Set to verify if set was successful. @@ -619,20 +615,21 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) * +----------------------+ * PSP will only update its srm if its older than the one we are trying to load. * Always do set first than get. - * -if we try to "1. SET" a older version PSP will reject it and we can "2. GET" the newer - * version and save it + * -if we try to "1. SET" a older version PSP will reject it and we can "2. GET" the newer + * version and save it * - * -if we try to "1. SET" a newer version PSP will accept it and we can "2. GET" the - * same(newer) version back and save it + * -if we try to "1. SET" a newer version PSP will accept it and we can "2. GET" the + * same(newer) version back and save it * - * -if we try to "1. SET" a newer version and PSP rejects it. That means the format is - * incorrect/corrupted and we should correct our SRM by getting it from PSP + * -if we try to "1. SET" a newer version and PSP rejects it. That means the format is + * incorrect/corrupted and we should correct our SRM by getting it from PSP */ -static ssize_t srm_data_write(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer, +static ssize_t srm_data_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { struct hdcp_workqueue *work; - uint32_t srm_version = 0; + u32 srm_version = 0; work = container_of(bin_attr, struct hdcp_workqueue, attr); link_lock(work, true); @@ -646,19 +643,19 @@ static ssize_t srm_data_write(struct file *filp, struct kobject *kobj, struct bi work->srm_version = srm_version; } - link_lock(work, false); return count; } -static ssize_t srm_data_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer, +static ssize_t srm_data_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { struct hdcp_workqueue *work; - uint8_t *srm = NULL; - uint32_t srm_version; - uint32_t srm_size; + u8 *srm = NULL; + u32 srm_version; + u32 srm_size; size_t ret = count; work = container_of(bin_attr, struct hdcp_workqueue, attr); @@ -691,12 +688,12 @@ ret: /* From the hdcp spec (5.Renewability) SRM needs to be stored in a non-volatile memory. * * For example, - * if Application "A" sets the SRM (ver 2) and we reboot/suspend and later when Application "B" - * needs to use HDCP, the version in PSP should be SRM(ver 2). So SRM should be persistent - * across boot/reboots/suspend/resume/shutdown + * if Application "A" sets the SRM (ver 2) and we reboot/suspend and later when Application "B" + * needs to use HDCP, the version in PSP should be SRM(ver 2). So SRM should be persistent + * across boot/reboots/suspend/resume/shutdown * - * Currently when the system goes down (suspend/shutdown) the SRM is cleared from PSP. For HDCP we need - * to make the SRM persistent. + * Currently when the system goes down (suspend/shutdown) the SRM is cleared from PSP. For HDCP + * we need to make the SRM persistent. * * -PSP owns the checking of SRM but doesn't have the ability to store it in a non-volatile memory. * -The kernel cannot write to the file systems. @@ -706,8 +703,8 @@ ret: * * Usermode can read/write to/from PSP using the sysfs interface * For example: - * to save SRM from PSP to storage : cat /sys/class/drm/card0/device/hdcp_srm > srmfile - * to load from storage to PSP: cat srmfile > /sys/class/drm/card0/device/hdcp_srm + * to save SRM from PSP to storage : cat /sys/class/drm/card0/device/hdcp_srm > srmfile + * to load from storage to PSP: cat srmfile > /sys/class/drm/card0/device/hdcp_srm */ static const struct bin_attribute data_attr = { .attr = {.name = "hdcp_srm", .mode = 0664}, @@ -716,10 +713,9 @@ static const struct bin_attribute data_attr = { .read = srm_data_read, }; - -struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct cp_psp *cp_psp, struct dc *dc) +struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, + struct cp_psp *cp_psp, struct dc *dc) { - int max_caps = dc->caps.max_links; struct hdcp_workqueue *hdcp_work; int i = 0; @@ -728,14 +724,16 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct if (ZERO_OR_NULL_PTR(hdcp_work)) return NULL; - hdcp_work->srm = kcalloc(PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE, sizeof(*hdcp_work->srm), GFP_KERNEL); + hdcp_work->srm = kcalloc(PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE, + sizeof(*hdcp_work->srm), GFP_KERNEL); - if (hdcp_work->srm == NULL) + if (!hdcp_work->srm) goto fail_alloc_context; - hdcp_work->srm_temp = kcalloc(PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE, sizeof(*hdcp_work->srm_temp), GFP_KERNEL); + hdcp_work->srm_temp = kcalloc(PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE, + sizeof(*hdcp_work->srm_temp), GFP_KERNEL); - if (hdcp_work->srm_temp == NULL) + if (!hdcp_work->srm_temp) goto fail_alloc_context; hdcp_work->max_link = max_caps; @@ -788,10 +786,5 @@ fail_alloc_context: kfree(hdcp_work); return NULL; - - - } - - From e07ed98515836fbbdc88df85c9618d9f67a74226 Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Mon, 24 Jul 2023 16:32:47 -0400 Subject: [PATCH 92/97] drm/amd/display: Change HDCP update sequence for DM [ Upstream commit 393e83484839970e4975dfa1f0666f939a6f3e3d ] Refactor the sequence in hdcp_update_display() to use mod_hdcp_update_display(). Previous sequence: - remove()->add() This Sequence was used to update the display, (mod_hdcp_update_display didn't exist at the time). This meant for any hdcp updates (type changes, enable/disable) we would remove, reconstruct, and add. This leads to unnecessary calls to psp eventually New Sequence using mod_hdcp_update_display(): - add() once when stream is enabled - use update() for all updates The update function checks for prev == new states and will not unnecessarily end up calling psp via add/remove. Reviewed-by: Qingqing Zhuo Acked-by: Tom Chung Signed-off-by: Bhawanpreet Lakha Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Stable-dep-of: be593d9d91c5 ("drm/amd/display: Fix slab-use-after-free in hdcp") Signed-off-by: Sasha Levin --- .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 74 +++++++++---------- 1 file changed, 35 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 15537f554ca8..7c67bb771f99 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -168,53 +168,45 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, bool enable_encryption) { struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; - struct mod_hdcp_display *display = &hdcp_work[link_index].display; - struct mod_hdcp_link *link = &hdcp_work[link_index].link; - struct mod_hdcp_display_query query; + struct mod_hdcp_link_adjustment link_adjust; + struct mod_hdcp_display_adjustment display_adjust; unsigned int conn_index = aconnector->base.index; mutex_lock(&hdcp_w->mutex); hdcp_w->aconnector[conn_index] = aconnector; - query.display = NULL; - mod_hdcp_query_display(&hdcp_w->hdcp, aconnector->base.index, &query); + memset(&link_adjust, 0, sizeof(link_adjust)); + memset(&display_adjust, 0, sizeof(display_adjust)); - if (query.display) { - memcpy(display, query.display, sizeof(struct mod_hdcp_display)); - mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); + if (enable_encryption) { + /* Explicitly set the saved SRM as sysfs call will be after we already enabled hdcp + * (s3 resume case) + */ + if (hdcp_work->srm_size > 0) + psp_set_srm(hdcp_work->hdcp.config.psp.handle, hdcp_work->srm, + hdcp_work->srm_size, + &hdcp_work->srm_version); - hdcp_w->link.adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_0; + display_adjust.disable = MOD_HDCP_DISPLAY_NOT_DISABLE; - if (enable_encryption) { - /* Explicitly set the saved SRM as sysfs call will be after - * we already enabled hdcp (s3 resume case) - */ - if (hdcp_work->srm_size > 0) - psp_set_srm(hdcp_work->hdcp.config.psp.handle, hdcp_work->srm, - hdcp_work->srm_size, - &hdcp_work->srm_version); + link_adjust.auth_delay = 2; - display->adjust.disable = MOD_HDCP_DISPLAY_NOT_DISABLE; - if (content_type == DRM_MODE_HDCP_CONTENT_TYPE0) { - hdcp_w->link.adjust.hdcp1.disable = 0; - hdcp_w->link.adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_0; - } else if (content_type == DRM_MODE_HDCP_CONTENT_TYPE1) { - hdcp_w->link.adjust.hdcp1.disable = 1; - hdcp_w->link.adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_1; - } - - schedule_delayed_work(&hdcp_w->property_validate_dwork, - msecs_to_jiffies(DRM_HDCP_CHECK_PERIOD_MS)); - } else { - display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION; - hdcp_w->encryption_status[conn_index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; - cancel_delayed_work(&hdcp_w->property_validate_dwork); + if (content_type == DRM_MODE_HDCP_CONTENT_TYPE0) { + link_adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_0; + } else if (content_type == DRM_MODE_HDCP_CONTENT_TYPE1) { + link_adjust.hdcp1.disable = 1; + link_adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_1; } - display->state = MOD_HDCP_DISPLAY_ACTIVE; + schedule_delayed_work(&hdcp_w->property_validate_dwork, + msecs_to_jiffies(DRM_HDCP_CHECK_PERIOD_MS)); + } else { + display_adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION; + hdcp_w->encryption_status[conn_index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; + cancel_delayed_work(&hdcp_w->property_validate_dwork); } - mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output); + mod_hdcp_update_display(&hdcp_w->hdcp, conn_index, &link_adjust, &display_adjust, &hdcp_w->output); process_output(hdcp_w); mutex_unlock(&hdcp_w->mutex); @@ -521,7 +513,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) int link_index = aconnector->dc_link->link_index; struct mod_hdcp_display *display = &hdcp_work[link_index].display; struct mod_hdcp_link *link = &hdcp_work[link_index].link; - struct drm_connector_state *conn_state; + struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; struct dc_sink *sink = NULL; bool link_is_hdcp14 = false; @@ -563,7 +555,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION; link->adjust.auth_delay = 3; link->adjust.hdcp1.disable = 0; - conn_state = aconnector->base.state; + hdcp_w->encryption_status[display->index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; DRM_DEBUG_DRIVER("[HDCP_DM] display %d, CP %d, type %d\n", aconnector->base.index, (!!aconnector->base.state) ? @@ -571,9 +563,13 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) (!!aconnector->base.state) ? aconnector->base.state->hdcp_content_type : -1); - if (conn_state) - hdcp_update_display(hdcp_work, link_index, aconnector, - conn_state->hdcp_content_type, false); + mutex_lock(&hdcp_w->mutex); + + mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output); + + process_output(hdcp_w); + mutex_unlock(&hdcp_w->mutex); + } /** From 942ecb9e8f8544796ac8041895028b0c4d0f7b51 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 28 Feb 2025 13:30:01 -0600 Subject: [PATCH 93/97] drm/amd/display: Add scoped mutexes for amdgpu_dm_dhcp [ Upstream commit 6b675ab8efbf2bcee25be29e865455c56e246401 ] [Why] Guards automatically release mutex when it goes out of scope making code easier to follow. [How] Replace all use of mutex_lock()/mutex_unlock() with guard(mutex). Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Stable-dep-of: be593d9d91c5 ("drm/amd/display: Fix slab-use-after-free in hdcp") Signed-off-by: Sasha Levin --- .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 37 +++++-------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 7c67bb771f99..6222d5a16883 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -172,7 +172,7 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, struct mod_hdcp_display_adjustment display_adjust; unsigned int conn_index = aconnector->base.index; - mutex_lock(&hdcp_w->mutex); + guard(mutex)(&hdcp_w->mutex); hdcp_w->aconnector[conn_index] = aconnector; memset(&link_adjust, 0, sizeof(link_adjust)); @@ -209,7 +209,6 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, mod_hdcp_update_display(&hdcp_w->hdcp, conn_index, &link_adjust, &display_adjust, &hdcp_w->output); process_output(hdcp_w); - mutex_unlock(&hdcp_w->mutex); } static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, @@ -220,7 +219,7 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, struct drm_connector_state *conn_state = aconnector->base.state; unsigned int conn_index = aconnector->base.index; - mutex_lock(&hdcp_w->mutex); + guard(mutex)(&hdcp_w->mutex); hdcp_w->aconnector[conn_index] = aconnector; /* the removal of display will invoke auth reset -> hdcp destroy and @@ -239,7 +238,6 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); process_output(hdcp_w); - mutex_unlock(&hdcp_w->mutex); } void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_index) @@ -247,7 +245,7 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; unsigned int conn_index; - mutex_lock(&hdcp_w->mutex); + guard(mutex)(&hdcp_w->mutex); mod_hdcp_reset_connection(&hdcp_w->hdcp, &hdcp_w->output); @@ -259,8 +257,6 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde } process_output(hdcp_w); - - mutex_unlock(&hdcp_w->mutex); } void hdcp_handle_cpirq(struct hdcp_workqueue *hdcp_work, unsigned int link_index) @@ -277,7 +273,7 @@ static void event_callback(struct work_struct *work) hdcp_work = container_of(to_delayed_work(work), struct hdcp_workqueue, callback_dwork); - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); cancel_delayed_work(&hdcp_work->callback_dwork); @@ -285,8 +281,6 @@ static void event_callback(struct work_struct *work) &hdcp_work->output); process_output(hdcp_work); - - mutex_unlock(&hdcp_work->mutex); } static void event_property_update(struct work_struct *work) @@ -326,7 +320,7 @@ static void event_property_update(struct work_struct *work) continue; drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); if (conn_state->commit) { ret = wait_for_completion_interruptible_timeout(&conn_state->commit->hw_done, @@ -358,7 +352,6 @@ static void event_property_update(struct work_struct *work) drm_hdcp_update_content_protection(connector, DRM_MODE_CONTENT_PROTECTION_DESIRED); } - mutex_unlock(&hdcp_work->mutex); drm_modeset_unlock(&dev->mode_config.connection_mutex); } } @@ -371,7 +364,7 @@ static void event_property_validate(struct work_struct *work) struct amdgpu_dm_connector *aconnector; unsigned int conn_index; - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) { @@ -415,8 +408,6 @@ static void event_property_validate(struct work_struct *work) schedule_work(&hdcp_work->property_update_work); } } - - mutex_unlock(&hdcp_work->mutex); } static void event_watchdog_timer(struct work_struct *work) @@ -427,7 +418,7 @@ static void event_watchdog_timer(struct work_struct *work) struct hdcp_workqueue, watchdog_timer_dwork); - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); cancel_delayed_work(&hdcp_work->watchdog_timer_dwork); @@ -436,8 +427,6 @@ static void event_watchdog_timer(struct work_struct *work) &hdcp_work->output); process_output(hdcp_work); - - mutex_unlock(&hdcp_work->mutex); } static void event_cpirq(struct work_struct *work) @@ -446,13 +435,11 @@ static void event_cpirq(struct work_struct *work) hdcp_work = container_of(work, struct hdcp_workqueue, cpirq_work); - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); mod_hdcp_process_event(&hdcp_work->hdcp, MOD_HDCP_EVENT_CPIRQ, &hdcp_work->output); process_output(hdcp_work); - - mutex_unlock(&hdcp_work->mutex); } void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work) @@ -486,7 +473,7 @@ static bool enable_assr(void *handle, struct dc_link *link) dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.context.mem_context.shared_buf; - mutex_lock(&psp->dtm_context.mutex); + guard(mutex)(&psp->dtm_context.mutex); memset(dtm_cmd, 0, sizeof(struct ta_dtm_shared_memory)); dtm_cmd->cmd_id = TA_DTM_COMMAND__TOPOLOGY_ASSR_ENABLE; @@ -501,8 +488,6 @@ static bool enable_assr(void *handle, struct dc_link *link) res = false; } - mutex_unlock(&psp->dtm_context.mutex); - return res; } @@ -563,13 +548,11 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) (!!aconnector->base.state) ? aconnector->base.state->hdcp_content_type : -1); - mutex_lock(&hdcp_w->mutex); + guard(mutex)(&hdcp_w->mutex); mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output); process_output(hdcp_w); - mutex_unlock(&hdcp_w->mutex); - } /** From e25139c4aa5621f2db8e86688c33546cdd885e42 Mon Sep 17 00:00:00 2001 From: Chris Bainbridge Date: Thu, 17 Apr 2025 16:50:05 -0500 Subject: [PATCH 94/97] drm/amd/display: Fix slab-use-after-free in hdcp [ Upstream commit be593d9d91c5a3a363d456b9aceb71029aeb3f1d ] The HDCP code in amdgpu_dm_hdcp.c copies pointers to amdgpu_dm_connector objects without incrementing the kref reference counts. When using a USB-C dock, and the dock is unplugged, the corresponding amdgpu_dm_connector objects are freed, creating dangling pointers in the HDCP code. When the dock is plugged back, the dangling pointers are dereferenced, resulting in a slab-use-after-free: [ 66.775837] BUG: KASAN: slab-use-after-free in event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.776171] Read of size 4 at addr ffff888127804120 by task kworker/0:1/10 [ 66.776179] CPU: 0 UID: 0 PID: 10 Comm: kworker/0:1 Not tainted 6.14.0-rc7-00180-g54505f727a38-dirty #233 [ 66.776183] Hardware name: HP HP Pavilion Aero Laptop 13-be0xxx/8916, BIOS F.17 12/18/2024 [ 66.776186] Workqueue: events event_property_validate [amdgpu] [ 66.776494] Call Trace: [ 66.776496] [ 66.776497] dump_stack_lvl+0x70/0xa0 [ 66.776504] print_report+0x175/0x555 [ 66.776507] ? __virt_addr_valid+0x243/0x450 [ 66.776510] ? kasan_complete_mode_report_info+0x66/0x1c0 [ 66.776515] kasan_report+0xeb/0x1c0 [ 66.776518] ? event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.776819] ? event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.777121] __asan_report_load4_noabort+0x14/0x20 [ 66.777124] event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.777342] ? __lock_acquire+0x6b40/0x6b40 [ 66.777347] ? enable_assr+0x250/0x250 [amdgpu] [ 66.777571] process_one_work+0x86b/0x1510 [ 66.777575] ? pwq_dec_nr_in_flight+0xcf0/0xcf0 [ 66.777578] ? assign_work+0x16b/0x280 [ 66.777580] ? lock_is_held_type+0xa3/0x130 [ 66.777583] worker_thread+0x5c0/0xfa0 [ 66.777587] ? process_one_work+0x1510/0x1510 [ 66.777588] kthread+0x3a2/0x840 [ 66.777591] ? kthread_is_per_cpu+0xd0/0xd0 [ 66.777594] ? trace_hardirqs_on+0x4f/0x60 [ 66.777597] ? _raw_spin_unlock_irq+0x27/0x60 [ 66.777599] ? calculate_sigpending+0x77/0xa0 [ 66.777602] ? kthread_is_per_cpu+0xd0/0xd0 [ 66.777605] ret_from_fork+0x40/0x90 [ 66.777607] ? kthread_is_per_cpu+0xd0/0xd0 [ 66.777609] ret_from_fork_asm+0x11/0x20 [ 66.777614] [ 66.777643] Allocated by task 10: [ 66.777646] kasan_save_stack+0x39/0x60 [ 66.777649] kasan_save_track+0x14/0x40 [ 66.777652] kasan_save_alloc_info+0x37/0x50 [ 66.777655] __kasan_kmalloc+0xbb/0xc0 [ 66.777658] __kmalloc_cache_noprof+0x1c8/0x4b0 [ 66.777661] dm_dp_add_mst_connector+0xdd/0x5c0 [amdgpu] [ 66.777880] drm_dp_mst_port_add_connector+0x47e/0x770 [drm_display_helper] [ 66.777892] drm_dp_send_link_address+0x1554/0x2bf0 [drm_display_helper] [ 66.777901] drm_dp_check_and_send_link_address+0x187/0x1f0 [drm_display_helper] [ 66.777909] drm_dp_mst_link_probe_work+0x2b8/0x410 [drm_display_helper] [ 66.777917] process_one_work+0x86b/0x1510 [ 66.777919] worker_thread+0x5c0/0xfa0 [ 66.777922] kthread+0x3a2/0x840 [ 66.777925] ret_from_fork+0x40/0x90 [ 66.777927] ret_from_fork_asm+0x11/0x20 [ 66.777932] Freed by task 1713: [ 66.777935] kasan_save_stack+0x39/0x60 [ 66.777938] kasan_save_track+0x14/0x40 [ 66.777940] kasan_save_free_info+0x3b/0x60 [ 66.777944] __kasan_slab_free+0x52/0x70 [ 66.777946] kfree+0x13f/0x4b0 [ 66.777949] dm_dp_mst_connector_destroy+0xfa/0x150 [amdgpu] [ 66.778179] drm_connector_free+0x7d/0xb0 [ 66.778184] drm_mode_object_put.part.0+0xee/0x160 [ 66.778188] drm_mode_object_put+0x37/0x50 [ 66.778191] drm_atomic_state_default_clear+0x220/0xd60 [ 66.778194] __drm_atomic_state_free+0x16e/0x2a0 [ 66.778197] drm_mode_atomic_ioctl+0x15ed/0x2ba0 [ 66.778200] drm_ioctl_kernel+0x17a/0x310 [ 66.778203] drm_ioctl+0x584/0xd10 [ 66.778206] amdgpu_drm_ioctl+0xd2/0x1c0 [amdgpu] [ 66.778375] __x64_sys_ioctl+0x139/0x1a0 [ 66.778378] x64_sys_call+0xee7/0xfb0 [ 66.778381] do_syscall_64+0x87/0x140 [ 66.778385] entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fix this by properly incrementing and decrementing the reference counts when making and deleting copies of the amdgpu_dm_connector pointers. (Mario: rebase on current code and update fixes tag) Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4006 Signed-off-by: Chris Bainbridge Fixes: da3fd7ac0bcf3 ("drm/amd/display: Update CP property based on HW query") Reviewed-by: Alex Hung Link: https://lore.kernel.org/r/20250417215005.37964-1-mario.limonciello@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit d4673f3c3b3dcb74e36e53cdfc880baa7a87b330) Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 6222d5a16883..6110d88efdbb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -173,6 +173,9 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, unsigned int conn_index = aconnector->base.index; guard(mutex)(&hdcp_w->mutex); + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); hdcp_w->aconnector[conn_index] = aconnector; memset(&link_adjust, 0, sizeof(link_adjust)); @@ -220,7 +223,6 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, unsigned int conn_index = aconnector->base.index; guard(mutex)(&hdcp_w->mutex); - hdcp_w->aconnector[conn_index] = aconnector; /* the removal of display will invoke auth reset -> hdcp destroy and * we'd expect the Content Protection (CP) property changed back to @@ -236,7 +238,10 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, } mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); - + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } process_output(hdcp_w); } @@ -254,6 +259,10 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) { hdcp_w->encryption_status[conn_index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } } process_output(hdcp_w); @@ -496,6 +505,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) struct hdcp_workqueue *hdcp_work = handle; struct amdgpu_dm_connector *aconnector = config->dm_stream_ctx; int link_index = aconnector->dc_link->link_index; + unsigned int conn_index = aconnector->base.index; struct mod_hdcp_display *display = &hdcp_work[link_index].display; struct mod_hdcp_link *link = &hdcp_work[link_index].link; struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; @@ -551,7 +561,10 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) guard(mutex)(&hdcp_w->mutex); mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output); - + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = aconnector; process_output(hdcp_w); } From 79ff5e26295640d5b2b8767eeafe49b6e064ab8c Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 31 Jul 2024 13:12:58 -0600 Subject: [PATCH 95/97] ASoC: Use of_property_read_bool() [ Upstream commit 69dd15a8ef0ae494179fd15023aa8172188db6b7 ] Use of_property_read_bool() to read boolean properties rather than of_get_property(). This is part of a larger effort to remove callers of of_get_property() and similar functions. of_get_property() leaks the DT property data pointer which is a problem for dynamically allocated nodes which may be freed. Signed-off-by: Rob Herring (Arm) Link: https://patch.msgid.link/20240731191312.1710417-20-robh@kernel.org Signed-off-by: Mark Brown Stable-dep-of: 6eab70345799 ("ASoC: soc-core: Stop using of_property_read_bool() for non-boolean properties") Signed-off-by: Sasha Levin --- sound/soc/codecs/ak4613.c | 4 ++-- sound/soc/soc-core.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/ak4613.c b/sound/soc/codecs/ak4613.c index f75c19ef3551..3f790d1f11a9 100644 --- a/sound/soc/codecs/ak4613.c +++ b/sound/soc/codecs/ak4613.c @@ -840,14 +840,14 @@ static void ak4613_parse_of(struct ak4613_priv *priv, /* Input 1 - 2 */ for (i = 0; i < 2; i++) { snprintf(prop, sizeof(prop), "asahi-kasei,in%d-single-end", i + 1); - if (!of_get_property(np, prop, NULL)) + if (!of_property_read_bool(np, prop)) priv->ic |= 1 << i; } /* Output 1 - 6 */ for (i = 0; i < 6; i++) { snprintf(prop, sizeof(prop), "asahi-kasei,out%d-single-end", i + 1); - if (!of_get_property(np, prop, NULL)) + if (!of_property_read_bool(np, prop)) priv->oc |= 1 << i; } diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 6a4101dc15a5..58e07296144e 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -3143,10 +3143,10 @@ unsigned int snd_soc_daifmt_parse_format(struct device_node *np, * SND_SOC_DAIFMT_INV_MASK area */ snprintf(prop, sizeof(prop), "%sbitclock-inversion", prefix); - bit = !!of_get_property(np, prop, NULL); + bit = of_property_read_bool(np, prop); snprintf(prop, sizeof(prop), "%sframe-inversion", prefix); - frame = !!of_get_property(np, prop, NULL); + frame = of_property_read_bool(np, prop); switch ((bit << 4) + frame) { case 0x11: @@ -3183,12 +3183,12 @@ unsigned int snd_soc_daifmt_parse_clock_provider_raw(struct device_node *np, * check "[prefix]frame-master" */ snprintf(prop, sizeof(prop), "%sbitclock-master", prefix); - bit = !!of_get_property(np, prop, NULL); + bit = of_property_read_bool(np, prop); if (bit && bitclkmaster) *bitclkmaster = of_parse_phandle(np, prop, 0); snprintf(prop, sizeof(prop), "%sframe-master", prefix); - frame = !!of_get_property(np, prop, NULL); + frame = of_property_read_bool(np, prop); if (frame && framemaster) *framemaster = of_parse_phandle(np, prop, 0); From 5626b47a3e7240139fb22ba925cd7866f7105e64 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 22 Jan 2025 09:21:27 +0100 Subject: [PATCH 96/97] ASoC: soc-core: Stop using of_property_read_bool() for non-boolean properties [ Upstream commit 6eab7034579917f207ca6d8e3f4e11e85e0ab7d5 ] On R-Car: OF: /sound: Read of boolean property 'simple-audio-card,bitclock-master' with a value. OF: /sound: Read of boolean property 'simple-audio-card,frame-master' with a value. or: OF: /soc/sound@ec500000/ports/port@0/endpoint: Read of boolean property 'bitclock-master' with a value. OF: /soc/sound@ec500000/ports/port@0/endpoint: Read of boolean property 'frame-master' with a value. The use of of_property_read_bool() for non-boolean properties is deprecated in favor of of_property_present() when testing for property presence. Replace testing for presence before calling of_property_read_u32() by testing for an -EINVAL return value from the latter, to simplify the code. Signed-off-by: Geert Uytterhoeven Link: https://patch.msgid.link/db10e96fbda121e7456d70e97a013cbfc9755f4d.1737533954.git.geert+renesas@glider.be Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-core.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 58e07296144e..b13370d2ec1d 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2837,7 +2837,7 @@ int snd_soc_of_parse_pin_switches(struct snd_soc_card *card, const char *prop) unsigned int i, nb_controls; int ret; - if (!of_property_read_bool(dev->of_node, prop)) + if (!of_property_present(dev->of_node, prop)) return 0; strings = devm_kcalloc(dev, nb_controls_max, @@ -2911,23 +2911,17 @@ int snd_soc_of_parse_tdm_slot(struct device_node *np, if (rx_mask) snd_soc_of_get_slot_mask(np, "dai-tdm-slot-rx-mask", rx_mask); - if (of_property_read_bool(np, "dai-tdm-slot-num")) { - ret = of_property_read_u32(np, "dai-tdm-slot-num", &val); - if (ret) - return ret; + ret = of_property_read_u32(np, "dai-tdm-slot-num", &val); + if (ret && ret != -EINVAL) + return ret; + if (!ret && slots) + *slots = val; - if (slots) - *slots = val; - } - - if (of_property_read_bool(np, "dai-tdm-slot-width")) { - ret = of_property_read_u32(np, "dai-tdm-slot-width", &val); - if (ret) - return ret; - - if (slot_width) - *slot_width = val; - } + ret = of_property_read_u32(np, "dai-tdm-slot-width", &val); + if (ret && ret != -EINVAL) + return ret; + if (!ret && slot_width) + *slot_width = val; return 0; } @@ -3183,12 +3177,12 @@ unsigned int snd_soc_daifmt_parse_clock_provider_raw(struct device_node *np, * check "[prefix]frame-master" */ snprintf(prop, sizeof(prop), "%sbitclock-master", prefix); - bit = of_property_read_bool(np, prop); + bit = of_property_present(np, prop); if (bit && bitclkmaster) *bitclkmaster = of_parse_phandle(np, prop, 0); snprintf(prop, sizeof(prop), "%sframe-master", prefix); - frame = of_property_read_bool(np, prop); + frame = of_property_present(np, prop); if (frame && framemaster) *framemaster = of_parse_phandle(np, prop, 0); From 02b72ccb5f9df707a763d9f7163d7918d3aff0b7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 9 May 2025 09:41:46 +0200 Subject: [PATCH 97/97] Linux 6.1.138 Link: https://lore.kernel.org/r/20250507183806.987408728@linuxfoundation.org Tested-by: Pavel Machek (CIP) Tested-by: Florian Fainelli Tested-by: Shuah Khan Tested-by: Salvatore Bonaccorso Link: https://lore.kernel.org/r/20250508112609.711621924@linuxfoundation.org Tested-by: Peter Schneider Tested-by: Jon Hunter Tested-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d1994bf77e8f..d8d39fe5ef70 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 137 +SUBLEVEL = 138 EXTRAVERSION = NAME = Curry Ramen