From 33a3b7fad8273e7bcbef0f456c2070be8adcad45 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 16 Dec 2022 11:42:20 -0500 Subject: [PATCH 001/207] drm/amdgpu: skip MES for S0ix as well since it's part of GFX commit afa6646b1c5d3affd541f76bd7476e4b835a9174 upstream. It's also part of gfxoff. Cc: stable@vger.kernel.org # 6.0, 6.1 Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 913f22d41673..0be85d19a6f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3005,14 +3005,15 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) continue; } - /* skip suspend of gfx and psp for S0ix + /* skip suspend of gfx/mes and psp for S0ix * gfx is in gfxoff state, so on resume it will exit gfxoff just * like at runtime. PSP is also part of the always on hardware * so no need to suspend it. */ if (adev->in_s0ix && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)) + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES)) continue; /* XXX handle errors */ From a354a9e3fc3b5475b80395a3015644e26352223f Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 19 Dec 2022 18:32:32 +0800 Subject: [PATCH 002/207] drm/amdgpu: skip mes self test after s0i3 resume for MES IP v11.0 commit 8660495a9c5b9afeec4cc006b3b75178f0fb2f10 upstream. MES is part of gfxoff and MES suspend and resume are skipped for S0i3. But the mes_self_test call path is still in the amdgpu_device_ip_late_init. it's should also be skipped for s0ix as no hardware re-initialization happened. Besides, mes_self_test will free the BO that triggers a lot of warning messages while in the suspend state. [ 81.656085] WARNING: CPU: 2 PID: 1550 at drivers/gpu/drm/amd/amdgpu/amdgpu_object.c:425 amdgpu_bo_free_kernel+0xfc/0x110 [amdgpu] [ 81.679435] Call Trace: [ 81.679726] [ 81.679981] amdgpu_mes_remove_hw_queue+0x17a/0x230 [amdgpu] [ 81.680857] amdgpu_mes_self_test+0x390/0x430 [amdgpu] [ 81.681665] mes_v11_0_late_init+0x37/0x50 [amdgpu] [ 81.682423] amdgpu_device_ip_late_init+0x53/0x280 [amdgpu] [ 81.683257] amdgpu_device_resume+0xae/0x2a0 [amdgpu] [ 81.684043] amdgpu_pmops_resume+0x37/0x70 [amdgpu] [ 81.684818] pci_pm_resume+0x5c/0xa0 [ 81.685247] ? pci_pm_thaw+0x90/0x90 [ 81.685658] dpm_run_callback+0x4e/0x160 [ 81.686110] device_resume+0xad/0x210 [ 81.686529] async_resume+0x1e/0x40 [ 81.686931] async_run_entry_fn+0x33/0x120 [ 81.687405] process_one_work+0x21d/0x3f0 [ 81.687869] worker_thread+0x4a/0x3c0 [ 81.688293] ? process_one_work+0x3f0/0x3f0 [ 81.688777] kthread+0xff/0x130 [ 81.689157] ? kthread_complete_and_exit+0x20/0x20 [ 81.689707] ret_from_fork+0x22/0x30 [ 81.690118] [ 81.690380] ---[ end trace 0000000000000000 ]--- v2: make the comment clean and use adev->in_s0ix instead of adev->suspend Signed-off-by: Tim Huang Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0, 6.1 Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index f141fadd2d86..725876b4f02e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1339,7 +1339,8 @@ static int mes_v11_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!amdgpu_in_reset(adev) && + /* it's only intended for use in mes_self_test case, not for s0ix and reset */ + if (!amdgpu_in_reset(adev) && !adev->in_s0ix && (adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))) amdgpu_mes_self_test(adev); From fab4655990012268e82f830d04e743905fb9ba3d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 24 Oct 2022 17:23:43 +0200 Subject: [PATCH 003/207] media: stv0288: use explicitly signed char commit 7392134428c92a4cb541bd5c8f4f5c8d2e88364d upstream. With char becoming unsigned by default, and with `char` alone being ambiguous and based on architecture, signed chars need to be marked explicitly as such. Use `s8` and `u8` types here, since that's what surrounding code does. This fixes: drivers/media/dvb-frontends/stv0288.c:471 stv0288_set_frontend() warn: assigning (-9) to unsigned variable 'tm' drivers/media/dvb-frontends/stv0288.c:471 stv0288_set_frontend() warn: we never enter this loop Cc: Mauro Carvalho Chehab Cc: linux-media@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/stv0288.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/media/dvb-frontends/stv0288.c b/drivers/media/dvb-frontends/stv0288.c index 3d54a0ec86af..3ae1f3a2f142 100644 --- a/drivers/media/dvb-frontends/stv0288.c +++ b/drivers/media/dvb-frontends/stv0288.c @@ -440,9 +440,8 @@ static int stv0288_set_frontend(struct dvb_frontend *fe) struct stv0288_state *state = fe->demodulator_priv; struct dtv_frontend_properties *c = &fe->dtv_property_cache; - char tm; - unsigned char tda[3]; - u8 reg, time_out = 0; + u8 tda[3], reg, time_out = 0; + s8 tm; dprintk("%s : FE_SET_FRONTEND\n", __func__); From 189c499376d5bc6855e187205b96b45785e4131d Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Mon, 7 Nov 2022 21:22:31 +0000 Subject: [PATCH 004/207] cxl/region: Fix memdev reuse check commit f04facfb993de47e2133b2b842d72b97b1c50162 upstream. Due to a typo, the check of whether or not a memdev has already been used as a target for the region (above code piece) will always be skipped. Given a memdev with more than one HDM decoder, an interleaved region can be created that maps multiple HPAs to the same DPA. According to CXL spec 3.0 8.1.3.8.4, "Aliasing (mapping more than one Host Physical Address (HPA) to a single Device Physical Address) is forbidden." Fix this by using existing iterator for memdev reuse check. Cc: Fixes: 384e624bb211 ("cxl/region: Attach endpoint decoders") Signed-off-by: Fan Ni Link: https://lore.kernel.org/r/20221107212153.745993-1-fan.ni@samsung.com Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/cxl/core/region.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index f9ae5ad284ff..c7152b4bd9eb 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1226,7 +1226,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled_target; struct cxl_memdev *cxlmd_target; - cxled_target = p->targets[pos]; + cxled_target = p->targets[i]; if (!cxled_target) continue; From 15e7433e1dc202516b91e6e2e8c702054d59f30a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 5 Dec 2022 11:08:37 +0100 Subject: [PATCH 005/207] arm64: dts: qcom: sc8280xp: fix UFS DMA coherency commit 0953777640354dc459a22369eea488603d225dd9 upstream. The SC8280XP UFS controllers are cache coherent and must be marked as such in the devicetree to avoid potential data corruption. Fixes: 152d1faf1e2f ("arm64: dts: qcom: add SC8280XP platform") Cc: stable@vger.kernel.org # 6.0 Signed-off-by: Johan Hovold Reviewed-by: Manivannan Sadhasivam Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221205100837.29212-3-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 212d63d5cbf2..510f6d60b45b 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -855,6 +855,7 @@ required-opps = <&rpmhpd_opp_nom>; iommus = <&apps_smmu 0xe0 0x0>; + dma-coherent; clocks = <&gcc GCC_UFS_PHY_AXI_CLK>, <&gcc GCC_AGGRE_UFS_PHY_AXI_CLK>, @@ -923,6 +924,7 @@ power-domains = <&gcc UFS_CARD_GDSC>; iommus = <&apps_smmu 0x4a0 0x0>; + dma-coherent; clocks = <&gcc GCC_UFS_CARD_AXI_CLK>, <&gcc GCC_AGGRE_UFS_CARD_AXI_CLK>, From 50950c0176f62e10cb8029b160c689e77df42ae2 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 2 Dec 2022 11:18:33 +0900 Subject: [PATCH 006/207] arm64: Prohibit instrumentation on arch_stack_walk() commit 0fbcd8abf3375052cc7627cc53aba6f2eb189fbb upstream. Mark arch_stack_walk() as noinstr instead of notrace and inline functions called from arch_stack_walk() as __always_inline so that user does not put any instrumentations on it, because this function can be used from return_address() which is used by lockdep. Without this, if the kernel built with CONFIG_LOCKDEP=y, just probing arch_stack_walk() via /kprobe_events will crash the kernel on arm64. # echo p arch_stack_walk >> ${TRACEFS}/kprobe_events # echo 1 > ${TRACEFS}/events/kprobes/enable kprobes: Failed to recover from reentered kprobes. kprobes: Dump kprobe: .symbol_name = arch_stack_walk, .offset = 0, .addr = arch_stack_walk+0x0/0x1c0 ------------[ cut here ]------------ kernel BUG at arch/arm64/kernel/probes/kprobes.c:241! kprobes: Failed to recover from reentered kprobes. kprobes: Dump kprobe: .symbol_name = arch_stack_walk, .offset = 0, .addr = arch_stack_walk+0x0/0x1c0 ------------[ cut here ]------------ kernel BUG at arch/arm64/kernel/probes/kprobes.c:241! PREEMPT SMP Modules linked in: CPU: 0 PID: 17 Comm: migration/0 Tainted: G N 6.1.0-rc5+ #6 Hardware name: linux,dummy-virt (DT) Stopper: 0x0 <- 0x0 pstate: 600003c5 (nZCv DAIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : kprobe_breakpoint_handler+0x178/0x17c lr : kprobe_breakpoint_handler+0x178/0x17c sp : ffff8000080d3090 x29: ffff8000080d3090 x28: ffff0df5845798c0 x27: ffffc4f59057a774 x26: ffff0df5ffbba770 x25: ffff0df58f420f18 x24: ffff49006f641000 x23: ffffc4f590579768 x22: ffff0df58f420f18 x21: ffff8000080d31c0 x20: ffffc4f590579768 x19: ffffc4f590579770 x18: 0000000000000006 x17: 5f6b636174735f68 x16: 637261203d207264 x15: 64612e202c30203d x14: 2074657366666f2e x13: 30633178302f3078 x12: 302b6b6c61775f6b x11: 636174735f686372 x10: ffffc4f590dc5bd8 x9 : ffffc4f58eb31958 x8 : 00000000ffffefff x7 : ffffc4f590dc5bd8 x6 : 80000000fffff000 x5 : 000000000000bff4 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 0000000000000000 x1 : ffff0df5845798c0 x0 : 0000000000000064 Call trace: kprobes: Failed to recover from reentered kprobes. kprobes: Dump kprobe: .symbol_name = arch_stack_walk, .offset = 0, .addr = arch_stack_walk+0x0/0x1c0 ------------[ cut here ]------------ kernel BUG at arch/arm64/kernel/probes/kprobes.c:241! Fixes: 39ef362d2d45 ("arm64: Make return_address() use arch_stack_walk()") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) Acked-by: Mark Rutland Link: https://lore.kernel.org/r/166994751368.439920.3236636557520824664.stgit@devnote3 Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/stacktrace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 634279b3b03d..117e2c180f3c 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -23,8 +23,8 @@ * * The regs must be on a stack currently owned by the calling task. */ -static inline void unwind_init_from_regs(struct unwind_state *state, - struct pt_regs *regs) +static __always_inline void unwind_init_from_regs(struct unwind_state *state, + struct pt_regs *regs) { unwind_init_common(state, current); @@ -58,8 +58,8 @@ static __always_inline void unwind_init_from_caller(struct unwind_state *state) * duration of the unwind, or the unwind will be bogus. It is never valid to * call this for the current task. */ -static inline void unwind_init_from_task(struct unwind_state *state, - struct task_struct *task) +static __always_inline void unwind_init_from_task(struct unwind_state *state, + struct task_struct *task) { unwind_init_common(state, task); @@ -186,7 +186,7 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) : stackinfo_get_unknown(); \ }) -noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, +noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { From 70e71f0dc86f455bc9ad8bd3efeff55c4b0f9931 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 29 Nov 2022 12:41:59 +0530 Subject: [PATCH 007/207] soc: qcom: Select REMAP_MMIO for LLCC driver commit 5d2fe2d7b616b8baa18348ead857b504fc2de336 upstream. LLCC driver uses REGMAP_MMIO for accessing the hardware registers. So select the dependency in Kconfig. Without this, there will be errors while building the driver with COMPILE_TEST only: ERROR: modpost: "__devm_regmap_init_mmio_clk" [drivers/soc/qcom/llcc-qcom.ko] undefined! make[1]: *** [scripts/Makefile.modpost:126: Module.symvers] Error 1 make: *** [Makefile:1944: modpost] Error 2 Cc: # 4.19 Fixes: a3134fb09e0b ("drivers: soc: Add LLCC driver") Reported-by: Borislav Petkov Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221129071201.30024-2-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/soc/qcom/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig index 024e420f1bb7..a6164b2d7b25 100644 --- a/drivers/soc/qcom/Kconfig +++ b/drivers/soc/qcom/Kconfig @@ -63,6 +63,7 @@ config QCOM_GSBI config QCOM_LLCC tristate "Qualcomm Technologies, Inc. LLCC driver" depends on ARCH_QCOM || COMPILE_TEST + select REGMAP_MMIO help Qualcomm Technologies, Inc. platform specific Last Level Cache Controller(LLCC) driver for platforms such as, From ca6536972ea547b630688356e93e71dd8ec79116 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 29 Nov 2022 12:50:22 +0530 Subject: [PATCH 008/207] soc: qcom: Select REMAP_MMIO for ICC_BWMON driver commit a84160fbf4f2c8c5ffa588e19ea8f92eabd7ad17 upstream. ICC_BWMON driver uses REGMAP_MMIO for accessing the hardware registers. So select the dependency in Kconfig. Without this, there will be errors while building the driver with COMPILE_TEST only: ERROR: modpost: "__devm_regmap_init_mmio_clk" [drivers/soc/qcom/icc-bwmon.ko] undefined! make[1]: *** [scripts/Makefile.modpost:126: Module.symvers] Error 1 make: *** [Makefile:1944: modpost] Error 2 Cc: # 6.0 Cc: Krzysztof Kozlowski Fixes: b9c2ae6cac40 ("soc: qcom: icc-bwmon: Add bandwidth monitoring driver") Signed-off-by: Manivannan Sadhasivam Reviewed-by: Krzysztof Kozlowski Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221129072022.41962-1-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/soc/qcom/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig index a6164b2d7b25..ae504c43d9e7 100644 --- a/drivers/soc/qcom/Kconfig +++ b/drivers/soc/qcom/Kconfig @@ -237,6 +237,7 @@ config QCOM_ICC_BWMON tristate "QCOM Interconnect Bandwidth Monitor driver" depends on ARCH_QCOM || COMPILE_TEST select PM_OPP + select REGMAP_MMIO help Sets up driver monitoring bandwidth on various interconnects and based on that voting for interconnect bandwidth, adjusting their From 178d7cf89191f04cfec10f20d99d06f966e016b0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 30 Nov 2022 17:54:34 -0500 Subject: [PATCH 009/207] kest.pl: Fix grub2 menu handling for rebooting commit 26df05a8c1420ad3de314fdd407e7fc2058cc7aa upstream. grub2 has submenus where to use grub-reboot, it requires: grub-reboot X>Y where X is the main index and Y is the submenu. Thus if you have: menuentry 'Debian GNU/Linux' --class debian --class gnu-linux ... [...] } submenu 'Advanced options for Debian GNU/Linux' $menuentry_id_option ... menuentry 'Debian GNU/Linux, with Linux 6.0.0-4-amd64' --class debian --class gnu-linux ... [...] } menuentry 'Debian GNU/Linux, with Linux 6.0.0-4-amd64 (recovery mode)' --class debian --class gnu-linux ... [...] } menuentry 'Debian GNU/Linux, with Linux test' --class debian --class gnu-linux ... [...] } And wanted to boot to the "Linux test" kernel, you need to run: # grub-reboot 1>2 As 1 is the second top menu (the submenu) and 2 is the third of the sub menu entries. Have the grub.cfg parsing for grub2 handle such cases. Cc: stable@vger.kernel.org Fixes: a15ba91361d46 ("ktest: Add support for grub2") Reviewed-by: John 'Warthog9' Hawley (VMware) Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- tools/testing/ktest/ktest.pl | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 09d1578f9d66..28b7f64fa44c 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1963,7 +1963,7 @@ sub run_scp_mod { sub _get_grub_index { - my ($command, $target, $skip) = @_; + my ($command, $target, $skip, $submenu) = @_; return if (defined($grub_number) && defined($last_grub_menu) && $last_grub_menu eq $grub_menu && defined($last_machine) && @@ -1980,11 +1980,16 @@ sub _get_grub_index { my $found = 0; + my $submenu_number = 0; + while () { if (/$target/) { $grub_number++; $found = 1; last; + } elsif (defined($submenu) && /$submenu/) { + $submenu_number++; + $grub_number = -1; } elsif (/$skip/) { $grub_number++; } @@ -1993,6 +1998,9 @@ sub _get_grub_index { dodie "Could not find '$grub_menu' through $command on $machine" if (!$found); + if ($submenu_number > 0) { + $grub_number = "$submenu_number>$grub_number"; + } doprint "$grub_number\n"; $last_grub_menu = $grub_menu; $last_machine = $machine; @@ -2003,6 +2011,7 @@ sub get_grub_index { my $command; my $target; my $skip; + my $submenu; my $grub_menu_qt; if ($reboot_type !~ /^grub/) { @@ -2017,8 +2026,9 @@ sub get_grub_index { $skip = '^\s*title\s'; } elsif ($reboot_type eq "grub2") { $command = "cat $grub_file"; - $target = '^menuentry.*' . $grub_menu_qt; - $skip = '^menuentry\s|^submenu\s'; + $target = '^\s*menuentry.*' . $grub_menu_qt; + $skip = '^\s*menuentry'; + $submenu = '^\s*submenu\s'; } elsif ($reboot_type eq "grub2bls") { $command = $grub_bls_get; $target = '^title=.*' . $grub_menu_qt; @@ -2027,7 +2037,7 @@ sub get_grub_index { return; } - _get_grub_index($command, $target, $skip); + _get_grub_index($command, $target, $skip, $submenu); } sub wait_for_input { @@ -2090,7 +2100,7 @@ sub reboot_to { if ($reboot_type eq "grub") { run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'"; } elsif (($reboot_type eq "grub2") or ($reboot_type eq "grub2bls")) { - run_ssh "$grub_reboot $grub_number"; + run_ssh "$grub_reboot \"'$grub_number'\""; } elsif ($reboot_type eq "syslinux") { run_ssh "$syslinux --once \\\"$syslinux_label\\\" $syslinux_path"; } elsif (defined $reboot_script) { From 14a6e5330a96ad11b2173fd686cf6daeaeb90a4f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 2 Dec 2022 11:59:36 -0500 Subject: [PATCH 010/207] ktest.pl minconfig: Unset configs instead of just removing them commit ef784eebb56425eed6e9b16e7d47e5c00dcf9c38 upstream. After a full run of a make_min_config test, I noticed there were a lot of CONFIGs still enabled that really should not be. Looking at them, I noticed they were all defined as "default y". The issue is that the test simple removes the config and re-runs make oldconfig, which enables it again because it is set to default 'y'. Instead, explicitly disable the config with writing "# CONFIG_FOO is not set" to the file to keep it from being set again. With this change, one of my box's minconfigs went from 768 configs set, down to 521 configs set. Link: https://lkml.kernel.org/r/20221202115936.016fce23@gandalf.local.home Cc: stable@vger.kernel.org Fixes: 0a05c769a9de5 ("ktest: Added config_bisect test type") Reviewed-by: John 'Warthog9' Hawley (VMware) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- tools/testing/ktest/ktest.pl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 28b7f64fa44c..1737c59e4ff6 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -3778,9 +3778,10 @@ sub test_this_config { # .config to make sure it is missing the config that # we had before my %configs = %min_configs; - delete $configs{$config}; + $configs{$config} = "# $config is not set"; make_new_config ((values %configs), (values %keep_configs)); make_oldconfig; + delete $configs{$config}; undef %configs; assign_configs \%configs, $output_config; From 192db0943fadef77b30fcc27c34a7955b286b8f1 Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Tue, 11 Oct 2022 19:33:44 +0800 Subject: [PATCH 011/207] jbd2: use the correct print format commit d87a7b4c77a997d5388566dd511ca8e6b8e8a0a8 upstream. The print format error was found when using ftrace event: <...>-1406 [000] .... 23599442.895823: jbd2_end_commit: dev 252,8 transaction -1866216965 sync 0 head -1866217368 <...>-1406 [000] .... 23599442.896299: jbd2_start_commit: dev 252,8 transaction -1866216964 sync 0 Use the correct print format for transaction, head and tid. Fixes: 879c5e6b7cb4 ('jbd2: convert instrumentation from markers to tracepoints') Signed-off-by: Bixuan Cui Reviewed-by: Jason Yan Link: https://lore.kernel.org/r/1665488024-95172-1-git-send-email-cuibixuan@linux.alibaba.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/trace/events/jbd2.h | 44 ++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index 99f783c384bb..8f5ee380d309 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -40,7 +40,7 @@ DECLARE_EVENT_CLASS(jbd2_commit, TP_STRUCT__entry( __field( dev_t, dev ) __field( char, sync_commit ) - __field( int, transaction ) + __field( tid_t, transaction ) ), TP_fast_assign( @@ -49,7 +49,7 @@ DECLARE_EVENT_CLASS(jbd2_commit, __entry->transaction = commit_transaction->t_tid; ), - TP_printk("dev %d,%d transaction %d sync %d", + TP_printk("dev %d,%d transaction %u sync %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->transaction, __entry->sync_commit) ); @@ -97,8 +97,8 @@ TRACE_EVENT(jbd2_end_commit, TP_STRUCT__entry( __field( dev_t, dev ) __field( char, sync_commit ) - __field( int, transaction ) - __field( int, head ) + __field( tid_t, transaction ) + __field( tid_t, head ) ), TP_fast_assign( @@ -108,7 +108,7 @@ TRACE_EVENT(jbd2_end_commit, __entry->head = journal->j_tail_sequence; ), - TP_printk("dev %d,%d transaction %d sync %d head %d", + TP_printk("dev %d,%d transaction %u sync %d head %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->transaction, __entry->sync_commit, __entry->head) ); @@ -134,14 +134,14 @@ TRACE_EVENT(jbd2_submit_inode_data, ); DECLARE_EVENT_CLASS(jbd2_handle_start_class, - TP_PROTO(dev_t dev, unsigned long tid, unsigned int type, + TP_PROTO(dev_t dev, tid_t tid, unsigned int type, unsigned int line_no, int requested_blocks), TP_ARGS(dev, tid, type, line_no, requested_blocks), TP_STRUCT__entry( __field( dev_t, dev ) - __field( unsigned long, tid ) + __field( tid_t, tid ) __field( unsigned int, type ) __field( unsigned int, line_no ) __field( int, requested_blocks) @@ -155,28 +155,28 @@ DECLARE_EVENT_CLASS(jbd2_handle_start_class, __entry->requested_blocks = requested_blocks; ), - TP_printk("dev %d,%d tid %lu type %u line_no %u " + TP_printk("dev %d,%d tid %u type %u line_no %u " "requested_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, __entry->type, __entry->line_no, __entry->requested_blocks) ); DEFINE_EVENT(jbd2_handle_start_class, jbd2_handle_start, - TP_PROTO(dev_t dev, unsigned long tid, unsigned int type, + TP_PROTO(dev_t dev, tid_t tid, unsigned int type, unsigned int line_no, int requested_blocks), TP_ARGS(dev, tid, type, line_no, requested_blocks) ); DEFINE_EVENT(jbd2_handle_start_class, jbd2_handle_restart, - TP_PROTO(dev_t dev, unsigned long tid, unsigned int type, + TP_PROTO(dev_t dev, tid_t tid, unsigned int type, unsigned int line_no, int requested_blocks), TP_ARGS(dev, tid, type, line_no, requested_blocks) ); TRACE_EVENT(jbd2_handle_extend, - TP_PROTO(dev_t dev, unsigned long tid, unsigned int type, + TP_PROTO(dev_t dev, tid_t tid, unsigned int type, unsigned int line_no, int buffer_credits, int requested_blocks), @@ -184,7 +184,7 @@ TRACE_EVENT(jbd2_handle_extend, TP_STRUCT__entry( __field( dev_t, dev ) - __field( unsigned long, tid ) + __field( tid_t, tid ) __field( unsigned int, type ) __field( unsigned int, line_no ) __field( int, buffer_credits ) @@ -200,7 +200,7 @@ TRACE_EVENT(jbd2_handle_extend, __entry->requested_blocks = requested_blocks; ), - TP_printk("dev %d,%d tid %lu type %u line_no %u " + TP_printk("dev %d,%d tid %u type %u line_no %u " "buffer_credits %d requested_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, __entry->type, __entry->line_no, __entry->buffer_credits, @@ -208,7 +208,7 @@ TRACE_EVENT(jbd2_handle_extend, ); TRACE_EVENT(jbd2_handle_stats, - TP_PROTO(dev_t dev, unsigned long tid, unsigned int type, + TP_PROTO(dev_t dev, tid_t tid, unsigned int type, unsigned int line_no, int interval, int sync, int requested_blocks, int dirtied_blocks), @@ -217,7 +217,7 @@ TRACE_EVENT(jbd2_handle_stats, TP_STRUCT__entry( __field( dev_t, dev ) - __field( unsigned long, tid ) + __field( tid_t, tid ) __field( unsigned int, type ) __field( unsigned int, line_no ) __field( int, interval ) @@ -237,7 +237,7 @@ TRACE_EVENT(jbd2_handle_stats, __entry->dirtied_blocks = dirtied_blocks; ), - TP_printk("dev %d,%d tid %lu type %u line_no %u interval %d " + TP_printk("dev %d,%d tid %u type %u line_no %u interval %d " "sync %d requested_blocks %d dirtied_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, __entry->type, __entry->line_no, __entry->interval, @@ -246,14 +246,14 @@ TRACE_EVENT(jbd2_handle_stats, ); TRACE_EVENT(jbd2_run_stats, - TP_PROTO(dev_t dev, unsigned long tid, + TP_PROTO(dev_t dev, tid_t tid, struct transaction_run_stats_s *stats), TP_ARGS(dev, tid, stats), TP_STRUCT__entry( __field( dev_t, dev ) - __field( unsigned long, tid ) + __field( tid_t, tid ) __field( unsigned long, wait ) __field( unsigned long, request_delay ) __field( unsigned long, running ) @@ -279,7 +279,7 @@ TRACE_EVENT(jbd2_run_stats, __entry->blocks_logged = stats->rs_blocks_logged; ), - TP_printk("dev %d,%d tid %lu wait %u request_delay %u running %u " + TP_printk("dev %d,%d tid %u wait %u request_delay %u running %u " "locked %u flushing %u logging %u handle_count %u " "blocks %u blocks_logged %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, @@ -294,14 +294,14 @@ TRACE_EVENT(jbd2_run_stats, ); TRACE_EVENT(jbd2_checkpoint_stats, - TP_PROTO(dev_t dev, unsigned long tid, + TP_PROTO(dev_t dev, tid_t tid, struct transaction_chp_stats_s *stats), TP_ARGS(dev, tid, stats), TP_STRUCT__entry( __field( dev_t, dev ) - __field( unsigned long, tid ) + __field( tid_t, tid ) __field( unsigned long, chp_time ) __field( __u32, forced_to_close ) __field( __u32, written ) @@ -317,7 +317,7 @@ TRACE_EVENT(jbd2_checkpoint_stats, __entry->dropped = stats->cs_dropped; ), - TP_printk("dev %d,%d tid %lu chp_time %u forced_to_close %u " + TP_printk("dev %d,%d tid %u chp_time %u forced_to_close %u " "written %u dropped %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, jiffies_to_msecs(__entry->chp_time), From 254f17db4352061698c1feb6f434f3b6c23321ae Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Thu, 17 Nov 2022 12:28:26 +0000 Subject: [PATCH 012/207] perf/x86/intel/uncore: Disable I/O stacks to PMU mapping on ICX-D commit efe062705d149b20a15498cb999a9edbb8241e6f upstream. Current implementation of I/O stacks to PMU mapping doesn't support ICX-D. Detect ICX-D system to disable mapping. Fixes: 10337e95e04c ("perf/x86/intel/uncore: Enable I/O stacks to IIO PMON mapping on ICX") Signed-off-by: Alexander Antonov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221117122833.3103580-5-alexander.antonov@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/uncore.h | 1 + arch/x86/events/intel/uncore_snbep.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 2adeaf4de4df..b363fddc2a89 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -2,6 +2,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index fcd95e93f479..feef6ee6e0e1 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -5144,6 +5144,11 @@ static int icx_iio_get_topology(struct intel_uncore_type *type) static int icx_iio_set_mapping(struct intel_uncore_type *type) { + /* Detect ICX-D system. This case is not supported */ + if (boot_cpu_data.x86_model == INTEL_FAM6_ICELAKE_D) { + pmu_clear_mapping_attr(type->attr_update, &icx_iio_mapping_group); + return -EPERM; + } return pmu_iio_set_mapping(type, &icx_iio_mapping_group); } From f42462b9cabfb02f83657ba31d8e85a425f09dd6 Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Thu, 17 Nov 2022 12:28:25 +0000 Subject: [PATCH 013/207] perf/x86/intel/uncore: Clear attr_update properly commit 6532783310e2b2f50dc13f46c49aa6546cb6e7a3 upstream. Current clear_attr_update procedure in pmu_set_mapping() sets attr_update field in NULL that is not correct because intel_uncore_type pmu types can contain several groups in attr_update field. For example, SPR platform already has uncore_alias_group to update and then UPI topology group will be added in next patches. Fix current behavior and clear attr_update group related to mapping only. Fixes: bb42b3d39781 ("perf/x86/intel/uncore: Expose an Uncore unit to IIO PMON mapping") Signed-off-by: Alexander Antonov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221117122833.3103580-4-alexander.antonov@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/uncore_snbep.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index feef6ee6e0e1..8f371f3cbbd2 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3804,6 +3804,21 @@ static const struct attribute_group *skx_iio_attr_update[] = { NULL, }; +static void pmu_clear_mapping_attr(const struct attribute_group **groups, + struct attribute_group *ag) +{ + int i; + + for (i = 0; groups[i]; i++) { + if (groups[i] == ag) { + for (i++; groups[i]; i++) + groups[i - 1] = groups[i]; + groups[i - 1] = NULL; + break; + } + } +} + static int pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag) { @@ -3852,7 +3867,7 @@ clear_attrs: clear_topology: kfree(type->topology); clear_attr_update: - type->attr_update = NULL; + pmu_clear_mapping_attr(type->attr_update, ag); return ret; } From b70d118a6cdc713267494f1e1e366664f2918c2a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 10 Oct 2022 07:44:13 -0400 Subject: [PATCH 014/207] arm64: dts: qcom: sdm845-db845c: correct SPI2 pins drive strength commit 9905370560d9c29adc15f4937c5a0c0dac05f0b4 upstream. The pin configuration (done with generic pin controller helpers and as expressed by bindings) requires children nodes with either: 1. "pins" property and the actual configuration, 2. another set of nodes with above point. The qup_spi2_default pin configuration uses alreaady the second method with a "pinmux" child, so configure drive-strength similarly in "pinconf". Otherwise the PIN drive strength would not be applied. Fixes: 8d23a0040475 ("arm64: dts: qcom: db845c: add Low speed expansion i2c and spi nodes") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Douglas Anderson Reviewed-by: Neil Armstrong Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221010114417.29859-2-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sdm845-db845c.dts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts index 132417e2d11e..a3e15dedd60c 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts @@ -1123,7 +1123,10 @@ /* PINCTRL - additions to nodes defined in sdm845.dtsi */ &qup_spi2_default { - drive-strength = <16>; + pinconf { + pins = "gpio27", "gpio28", "gpio29", "gpio30"; + drive-strength = <16>; + }; }; &qup_uart3_default{ From 51b84cfd24ee52953f93fda5a20f07e3aa87959e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 4 Nov 2022 10:20:44 +0100 Subject: [PATCH 015/207] arm64: dts: qcom: sc8280xp: fix UFS reference clocks commit f446022b932aff1d6a308ca5d537ec2b512debdc upstream. There are three UFS reference clocks on SC8280XP which are used as follows: - The GCC_UFS_REF_CLKREF_CLK clock is fed to any UFS device connected to either controller. - The GCC_UFS_1_CARD_CLKREF_CLK and GCC_UFS_CARD_CLKREF_CLK clocks provide reference clocks to the two PHYs. Note that this depends on first updating the clock driver to reflect that all three clocks are sourced from CXO. Specifically, the UFS controller driver expects the device reference clock to have a valid frequency: ufshcd-qcom 1d84000.ufs: invalid ref_clk setting = 0 Fixes: 152d1faf1e2f ("arm64: dts: qcom: add SC8280XP platform") Fixes: 8d6b458ce6e9 ("arm64: dts: qcom: sc8280xp: fix ufs_card_phy ref clock") Fixes: f3aa975e230e ("arm64: dts: qcom: sc8280xp: correct ref clock for ufs_mem_phy") Link: https://lore.kernel.org/lkml/Y2OEjNAPXg5BfOxH@hovoldconsulting.com/ Cc: stable@vger.kernel.org # 5.20 Signed-off-by: Johan Hovold Reviewed-by: Brian Masney Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221104092045.17410-2-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 510f6d60b45b..9f2a136d5cbc 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -861,7 +861,7 @@ <&gcc GCC_AGGRE_UFS_PHY_AXI_CLK>, <&gcc GCC_UFS_PHY_AHB_CLK>, <&gcc GCC_UFS_PHY_UNIPRO_CORE_CLK>, - <&rpmhcc RPMH_CXO_CLK>, + <&gcc GCC_UFS_REF_CLKREF_CLK>, <&gcc GCC_UFS_PHY_TX_SYMBOL_0_CLK>, <&gcc GCC_UFS_PHY_RX_SYMBOL_0_CLK>, <&gcc GCC_UFS_PHY_RX_SYMBOL_1_CLK>; @@ -892,7 +892,7 @@ ranges; clock-names = "ref", "ref_aux"; - clocks = <&gcc GCC_UFS_REF_CLKREF_CLK>, + clocks = <&gcc GCC_UFS_CARD_CLKREF_CLK>, <&gcc GCC_UFS_PHY_PHY_AUX_CLK>; resets = <&ufs_mem_hc 0>; @@ -930,7 +930,7 @@ <&gcc GCC_AGGRE_UFS_CARD_AXI_CLK>, <&gcc GCC_UFS_CARD_AHB_CLK>, <&gcc GCC_UFS_CARD_UNIPRO_CORE_CLK>, - <&rpmhcc RPMH_CXO_CLK>, + <&gcc GCC_UFS_REF_CLKREF_CLK>, <&gcc GCC_UFS_CARD_TX_SYMBOL_0_CLK>, <&gcc GCC_UFS_CARD_RX_SYMBOL_0_CLK>, <&gcc GCC_UFS_CARD_RX_SYMBOL_1_CLK>; @@ -961,7 +961,7 @@ ranges; clock-names = "ref", "ref_aux"; - clocks = <&gcc GCC_UFS_REF_CLKREF_CLK>, + clocks = <&gcc GCC_UFS_1_CARD_CLKREF_CLK>, <&gcc GCC_UFS_CARD_PHY_AUX_CLK>; resets = <&ufs_card_hc 0>; From 39eeec3130a1367f527093b36010e6605d618564 Mon Sep 17 00:00:00 2001 From: Wenchao Chen Date: Wed, 7 Dec 2022 13:19:09 +0800 Subject: [PATCH 016/207] mmc: sdhci-sprd: Disable CLK_AUTO when the clock is less than 400K commit ff874dbc4f868af128b412a9bd92637103cf11d7 upstream. When the clock is less than 400K, some SD cards fail to initialize because CLK_AUTO is enabled. Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host controller") Signed-off-by: Wenchao Chen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221207051909.32126-1-wenchao.chen@unisoc.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-sprd.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index bec3f9e3cd3f..525f979e2a97 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -228,13 +228,15 @@ static inline void _sdhci_sprd_set_clock(struct sdhci_host *host, div = ((div & 0x300) >> 2) | ((div & 0xFF) << 8); sdhci_enable_clk(host, div); - /* enable auto gate sdhc_enable_auto_gate */ - val = sdhci_readl(host, SDHCI_SPRD_REG_32_BUSY_POSI); - mask = SDHCI_SPRD_BIT_OUTR_CLK_AUTO_EN | - SDHCI_SPRD_BIT_INNR_CLK_AUTO_EN; - if (mask != (val & mask)) { - val |= mask; - sdhci_writel(host, val, SDHCI_SPRD_REG_32_BUSY_POSI); + /* Enable CLK_AUTO when the clock is greater than 400K. */ + if (clk > 400000) { + val = sdhci_readl(host, SDHCI_SPRD_REG_32_BUSY_POSI); + mask = SDHCI_SPRD_BIT_OUTR_CLK_AUTO_EN | + SDHCI_SPRD_BIT_INNR_CLK_AUTO_EN; + if (mask != (val & mask)) { + val |= mask; + sdhci_writel(host, val, SDHCI_SPRD_REG_32_BUSY_POSI); + } } } From 9ebb4f4eab19116619e40d0aa8fbba955512eea1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 09:13:41 +0100 Subject: [PATCH 017/207] phy: qcom-qmp-combo: fix out-of-bounds clock access commit d8a5b59c5fc75c99ba17e3eb1a8f580d8d172b28 upstream. The SM8250 only uses three clocks but the DP configuration erroneously described four clocks. In case the DP part of the PHY is initialised before the USB part, this would lead to uninitialised memory beyond the bulk-clocks array to be treated as a clock pointer as the clocks are requested based on the USB configuration. Fixes: aff188feb5e1 ("phy: qcom-qmp: add support for sm8250-usb3-dp phy") Cc: stable@vger.kernel.org # 5.13 Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114081346.5116-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index ba9d761ec49a..8e46d7a66edd 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1328,8 +1328,8 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .clk_list = qmp_v4_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), + .clk_list = qmp_v4_sm8250_usbphy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l), .reset_list = msm8996_usb3phy_reset_l, .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, From 9d455cb89dc576f752beebcadc823511882fb321 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 5 Dec 2022 15:33:31 +0800 Subject: [PATCH 018/207] drm/amd/pm: update SMU13.0.0 reported maximum shader clock commit 7a18e089eff02f17eaee49fc18641f5d16a8284b upstream. Update the reported maximum shader clock to the value which can be guarded to be achieved on all cards. This is to align with Window setting. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x Signed-off-by: Greg Kroah-Hartman --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 70 ++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index f0121d171630..dbb901ec5986 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -517,6 +517,23 @@ static int smu_v13_0_0_set_default_dpm_table(struct smu_context *smu) dpm_table); if (ret) return ret; + + /* + * Update the reported maximum shader clock to the value + * which can be guarded to be achieved on all cards. This + * is aligned with Window setting. And considering that value + * might be not the peak frequency the card can achieve, it + * is normal some real-time clock frequency can overtake this + * labelled maximum clock frequency(for example in pp_dpm_sclk + * sysfs output). + */ + if (skutable->DriverReportedClocks.GameClockAc && + (dpm_table->dpm_levels[dpm_table->count - 1].value > + skutable->DriverReportedClocks.GameClockAc)) { + dpm_table->dpm_levels[dpm_table->count - 1].value = + skutable->DriverReportedClocks.GameClockAc; + dpm_table->max = skutable->DriverReportedClocks.GameClockAc; + } } else { dpm_table->count = 1; dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100; @@ -779,6 +796,57 @@ static int smu_v13_0_0_get_smu_metrics_data(struct smu_context *smu, return ret; } +static int smu_v13_0_0_get_dpm_ultimate_freq(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *min, + uint32_t *max) +{ + struct smu_13_0_dpm_context *dpm_context = + smu->smu_dpm.dpm_context; + struct smu_13_0_dpm_table *dpm_table; + + switch (clk_type) { + case SMU_MCLK: + case SMU_UCLK: + /* uclk dpm table */ + dpm_table = &dpm_context->dpm_tables.uclk_table; + break; + case SMU_GFXCLK: + case SMU_SCLK: + /* gfxclk dpm table */ + dpm_table = &dpm_context->dpm_tables.gfx_table; + break; + case SMU_SOCCLK: + /* socclk dpm table */ + dpm_table = &dpm_context->dpm_tables.soc_table; + break; + case SMU_FCLK: + /* fclk dpm table */ + dpm_table = &dpm_context->dpm_tables.fclk_table; + break; + case SMU_VCLK: + case SMU_VCLK1: + /* vclk dpm table */ + dpm_table = &dpm_context->dpm_tables.vclk_table; + break; + case SMU_DCLK: + case SMU_DCLK1: + /* dclk dpm table */ + dpm_table = &dpm_context->dpm_tables.dclk_table; + break; + default: + dev_err(smu->adev->dev, "Unsupported clock type!\n"); + return -EINVAL; + } + + if (min) + *min = dpm_table->min; + if (max) + *max = dpm_table->max; + + return 0; +} + static int smu_v13_0_0_read_sensor(struct smu_context *smu, enum amd_pp_sensors sensor, void *data, @@ -1813,7 +1881,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_enabled_mask = smu_cmn_get_enabled_mask, .dpm_set_vcn_enable = smu_v13_0_set_vcn_enable, .dpm_set_jpeg_enable = smu_v13_0_set_jpeg_enable, - .get_dpm_ultimate_freq = smu_v13_0_get_dpm_ultimate_freq, + .get_dpm_ultimate_freq = smu_v13_0_0_get_dpm_ultimate_freq, .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values, .read_sensor = smu_v13_0_0_read_sensor, .feature_is_enabled = smu_cmn_feature_is_enabled, From ed00567b170fc461d8755003e5cf4e0b008aff90 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 5 Dec 2022 14:53:34 +0800 Subject: [PATCH 019/207] drm/amd/pm: correct SMU13.0.0 pstate profiling clock settings commit 32a7819ff8e25375c7515aaae5cfcb8c44a461b7 upstream. Correct the pstate standard/peak profiling mode clock settings for SMU13.0.0. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x Signed-off-by: Greg Kroah-Hartman --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index dbb901ec5986..bf745a7e67d3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1349,9 +1349,17 @@ static int smu_v13_0_0_populate_umd_state_clk(struct smu_context *smu) &dpm_context->dpm_tables.fclk_table; struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *pptable = table_context->driver_pptable; + DriverReportedClocks_t driver_clocks = + pptable->SkuTable.DriverReportedClocks; pstate_table->gfxclk_pstate.min = gfx_table->min; - pstate_table->gfxclk_pstate.peak = gfx_table->max; + if (driver_clocks.GameClockAc && + (driver_clocks.GameClockAc < gfx_table->max)) + pstate_table->gfxclk_pstate.peak = driver_clocks.GameClockAc; + else + pstate_table->gfxclk_pstate.peak = gfx_table->max; pstate_table->uclk_pstate.min = mem_table->min; pstate_table->uclk_pstate.peak = mem_table->max; @@ -1368,12 +1376,12 @@ static int smu_v13_0_0_populate_umd_state_clk(struct smu_context *smu) pstate_table->fclk_pstate.min = fclk_table->min; pstate_table->fclk_pstate.peak = fclk_table->max; - /* - * For now, just use the mininum clock frequency. - * TODO: update them when the real pstate settings available - */ - pstate_table->gfxclk_pstate.standard = gfx_table->min; - pstate_table->uclk_pstate.standard = mem_table->min; + if (driver_clocks.BaseClockAc && + driver_clocks.BaseClockAc < gfx_table->max) + pstate_table->gfxclk_pstate.standard = driver_clocks.BaseClockAc; + else + pstate_table->gfxclk_pstate.standard = gfx_table->max; + pstate_table->uclk_pstate.standard = mem_table->max; pstate_table->socclk_pstate.standard = soc_table->min; pstate_table->vclk_pstate.standard = vclk_table->min; pstate_table->dclk_pstate.standard = dclk_table->min; From 712b0938166a68b7685ef7ce911a52051ba21329 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 18 Nov 2022 15:06:09 -0500 Subject: [PATCH 020/207] btrfs: fix uninitialized parent in insert_state commit d7c9e1be2876f63fb2178a24e0c1d5733ff98d47 upstream. I don't know how this isn't caught when we build this in the kernel, but while syncing extent-io-tree.c into btrfs-progs I got an error because parent could potentially be uninitialized when we link in a new node, specifically when the extent_io_tree is empty. This means we could have garbage in the parent color. I don't know what the ramifications are of that, but it's probably not great, so fix this by initializing parent to NULL. I spot checked all of our other usages in btrfs and we appear to be doing the correct thing everywhere else. Fixes: c7e118cf98c7 ("btrfs: open code rbtree search in insert_state") CC: stable@vger.kernel.org # 6.0+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-io-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c index 3676580c2d97..7b93719a486c 100644 --- a/fs/btrfs/extent-io-tree.c +++ b/fs/btrfs/extent-io-tree.c @@ -397,7 +397,7 @@ static int insert_state(struct extent_io_tree *tree, u32 bits, struct extent_changeset *changeset) { struct rb_node **node; - struct rb_node *parent; + struct rb_node *parent = NULL; const u64 end = state->end; set_state_bits(tree, state, bits, changeset); From 169a4cf46882974d4db6d85eb623ec898e51bbc0 Mon Sep 17 00:00:00 2001 From: void0red Date: Wed, 23 Nov 2022 22:39:45 +0800 Subject: [PATCH 021/207] btrfs: fix extent map use-after-free when handling missing device in read_one_chunk commit 1742e1c90c3da344f3bb9b1f1309b3f47482756a upstream. Store the error code before freeing the extent_map. Though it's reference counted structure, in that function it's the first and last allocation so this would lead to a potential use-after-free. The error can happen eg. when chunk is stored on a missing device and the degraded mount option is missing. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=216721 Reported-by: eriri <1527030098@qq.com> Fixes: adfb69af7d8c ("btrfs: add_missing_dev() should return the actual error") CC: stable@vger.kernel.org # 4.9+ Signed-off-by: void0red Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 635f45f1a2ef..dba087ad40ea 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7241,8 +7241,9 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, map->stripes[i].dev = handle_missing_device(fs_info, devid, uuid); if (IS_ERR(map->stripes[i].dev)) { + ret = PTR_ERR(map->stripes[i].dev); free_extent_map(em); - return PTR_ERR(map->stripes[i].dev); + return ret; } } From a94b90ac1f251d1007c0c43ee289a61b50f2505f Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 14 Dec 2022 15:05:08 -0800 Subject: [PATCH 022/207] btrfs: fix resolving backrefs for inline extent followed by prealloc commit 560840afc3e63bbe5d9c5ef6b2ecf8f3589adff6 upstream. If a file consists of an inline extent followed by a regular or prealloc extent, then a legitimate attempt to resolve a logical address in the non-inline region will result in add_all_parents reading the invalid offset field of the inline extent. If the inline extent item is placed in the leaf eb s.t. it is the first item, attempting to access the offset field will not only be meaningless, it will go past the end of the eb and cause this panic: [17.626048] BTRFS warning (device dm-2): bad eb member end: ptr 0x3fd4 start 30834688 member offset 16377 size 8 [17.631693] general protection fault, probably for non-canonical address 0x5088000000000: 0000 [#1] SMP PTI [17.635041] CPU: 2 PID: 1267 Comm: btrfs Not tainted 5.12.0-07246-g75175d5adc74-dirty #199 [17.637969] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [17.641995] RIP: 0010:btrfs_get_64+0xe7/0x110 [17.649890] RSP: 0018:ffffc90001f73a08 EFLAGS: 00010202 [17.651652] RAX: 0000000000000001 RBX: ffff88810c42d000 RCX: 0000000000000000 [17.653921] RDX: 0005088000000000 RSI: ffffc90001f73a0f RDI: 0000000000000001 [17.656174] RBP: 0000000000000ff9 R08: 0000000000000007 R09: c0000000fffeffff [17.658441] R10: ffffc90001f73790 R11: ffffc90001f73788 R12: ffff888106afe918 [17.661070] R13: 0000000000003fd4 R14: 0000000000003f6f R15: cdcdcdcdcdcdcdcd [17.663617] FS: 00007f64e7627d80(0000) GS:ffff888237c80000(0000) knlGS:0000000000000000 [17.666525] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [17.668664] CR2: 000055d4a39152e8 CR3: 000000010c596002 CR4: 0000000000770ee0 [17.671253] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [17.673634] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [17.676034] PKRU: 55555554 [17.677004] Call Trace: [17.677877] add_all_parents+0x276/0x480 [17.679325] find_parent_nodes+0xfae/0x1590 [17.680771] btrfs_find_all_leafs+0x5e/0xa0 [17.682217] iterate_extent_inodes+0xce/0x260 [17.683809] ? btrfs_inode_flags_to_xflags+0x50/0x50 [17.685597] ? iterate_inodes_from_logical+0xa1/0xd0 [17.687404] iterate_inodes_from_logical+0xa1/0xd0 [17.689121] ? btrfs_inode_flags_to_xflags+0x50/0x50 [17.691010] btrfs_ioctl_logical_to_ino+0x131/0x190 [17.692946] btrfs_ioctl+0x104a/0x2f60 [17.694384] ? selinux_file_ioctl+0x182/0x220 [17.695995] ? __x64_sys_ioctl+0x84/0xc0 [17.697394] __x64_sys_ioctl+0x84/0xc0 [17.698697] do_syscall_64+0x33/0x40 [17.700017] entry_SYSCALL_64_after_hwframe+0x44/0xae [17.701753] RIP: 0033:0x7f64e72761b7 [17.709355] RSP: 002b:00007ffefb067f58 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [17.712088] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f64e72761b7 [17.714667] RDX: 00007ffefb067fb0 RSI: 00000000c0389424 RDI: 0000000000000003 [17.717386] RBP: 00007ffefb06d188 R08: 000055d4a390d2b0 R09: 00007f64e7340a60 [17.719938] R10: 0000000000000231 R11: 0000000000000246 R12: 0000000000000001 [17.722383] R13: 0000000000000000 R14: 00000000c0389424 R15: 000055d4a38fd2a0 [17.724839] Modules linked in: Fix the bug by detecting the inline extent item in add_all_parents and skipping to the next extent item. CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/backref.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 18374a6d05bd..18cf801ab590 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -433,6 +433,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, u64 wanted_disk_byte = ref->wanted_disk_byte; u64 count = 0; u64 data_offset; + u8 type; if (level != 0) { eb = path->nodes[level]; @@ -487,6 +488,9 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, continue; } fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + type = btrfs_file_extent_type(eb, fi); + if (type == BTRFS_FILE_EXTENT_INLINE) + goto next; disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); data_offset = btrfs_file_extent_offset(eb, fi); From bacb7e1d42f4a48067d1ee579b8bf4dc133b8adc Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 8 Nov 2022 13:37:55 +0100 Subject: [PATCH 023/207] ARM: ux500: do not directly dereference __iomem commit 65b0e307a1a9193571db12910f382f84195a3d29 upstream. Sparse reports that calling add_device_randomness() on `uid` is a violation of address spaces. And indeed the next usage uses readl() properly, but that was left out when passing it toadd_device_ randomness(). So instead copy the whole thing to the stack first. Fixes: 4040d10a3d44 ("ARM: ux500: add DB serial number to entropy pool") Cc: Linus Walleij Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/202210230819.loF90KDh-lkp@intel.com/ Reported-by: kernel test robot Signed-off-by: Jason A. Donenfeld Link: https://lore.kernel.org/r/20221108123755.207438-1-Jason@zx2c4.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/soc/ux500/ux500-soc-id.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/soc/ux500/ux500-soc-id.c b/drivers/soc/ux500/ux500-soc-id.c index a9472e0e5d61..27d6e25a0115 100644 --- a/drivers/soc/ux500/ux500-soc-id.c +++ b/drivers/soc/ux500/ux500-soc-id.c @@ -167,20 +167,18 @@ ATTRIBUTE_GROUPS(ux500_soc); static const char *db8500_read_soc_id(struct device_node *backupram) { void __iomem *base; - void __iomem *uid; const char *retstr; + u32 uid[5]; base = of_iomap(backupram, 0); if (!base) return NULL; - uid = base + 0x1fc0; + memcpy_fromio(uid, base + 0x1fc0, sizeof(uid)); /* Throw these device-specific numbers into the entropy pool */ - add_device_randomness(uid, 0x14); + add_device_randomness(uid, sizeof(uid)); retstr = kasprintf(GFP_KERNEL, "%08x%08x%08x%08x%08x", - readl((u32 *)uid+0), - readl((u32 *)uid+1), readl((u32 *)uid+2), - readl((u32 *)uid+3), readl((u32 *)uid+4)); + uid[0], uid[1], uid[2], uid[3], uid[4]); iounmap(base); return retstr; } From 0f9327484c6ff078c281c7b1c094ade43eb84eb0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 30 Sep 2022 21:20:38 +0200 Subject: [PATCH 024/207] arm64: dts: qcom: sdm850-samsung-w737: correct I2C12 pins drive strength commit 3638ea010c37e1e6d93474c4b3368f403600413f upstream. The pin configuration (done with generic pin controller helpers and as expressed by bindings) requires children nodes with either: 1. "pins" property and the actual configuration, 2. another set of nodes with above point. The qup_i2c12_default pin configuration used second method - with a "pinmux" child. Fixes: d4b341269efb ("arm64: dts: qcom: Add support for Samsung Galaxy Book2") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220930192039.240486-2-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sdm850-samsung-w737.dts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sdm850-samsung-w737.dts b/arch/arm64/boot/dts/qcom/sdm850-samsung-w737.dts index f954fe5cb61a..d028a7eb364a 100644 --- a/arch/arm64/boot/dts/qcom/sdm850-samsung-w737.dts +++ b/arch/arm64/boot/dts/qcom/sdm850-samsung-w737.dts @@ -415,8 +415,10 @@ }; &qup_i2c12_default { - drive-strength = <2>; - bias-disable; + pinmux { + drive-strength = <2>; + bias-disable; + }; }; &qup_uart6_default { From 346ac4a116cbad784f95ef9a1ab195dbe19230b0 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 8 Oct 2022 20:42:54 -0600 Subject: [PATCH 025/207] random: use rejection sampling for uniform bounded random integers commit e9a688bcb19348862afe30d7c85bc37c4c293471 upstream. Until the very recent commits, many bounded random integers were calculated using `get_random_u32() % max_plus_one`, which not only incurs the price of a division -- indicating performance mostly was not a real issue -- but also does not result in a uniformly distributed output if max_plus_one is not a power of two. Recent commits moved to using `prandom_u32_max(max_plus_one)`, which replaces the division with a faster multiplication, but still does not solve the issue with non-uniform output. For some users, maybe this isn't a problem, and for others, maybe it is, but for the majority of users, probably the question has never been posed and analyzed, and nobody thought much about it, probably assuming random is random is random. In other words, the unthinking expectation of most users is likely that the resultant numbers are uniform. So we implement here an efficient way of generating uniform bounded random integers. Through use of compile-time evaluation, and avoiding divisions as much as possible, this commit introduces no measurable overhead. At least for hot-path uses tested, any potential difference was lost in the noise. On both clang and gcc, code generation is pretty small. The new function, get_random_u32_below(), lives in random.h, rather than prandom.h, and has a "get_random_xxx" function name, because it is suitable for all uses, including cryptography. In order to be efficient, we implement a kernel-specific variant of Daniel Lemire's algorithm from "Fast Random Integer Generation in an Interval", linked below. The kernel's variant takes advantage of constant folding to avoid divisions entirely in the vast majority of cases, works on both 32-bit and 64-bit architectures, and requests a minimal amount of bytes from the RNG. Link: https://arxiv.org/pdf/1805.10941.pdf Cc: stable@vger.kernel.org # to ease future backports that use this api Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 22 ++++++++++++++++++++++ include/linux/prandom.h | 18 ++---------------- include/linux/random.h | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 16 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 69754155300e..6f323344d0b9 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -160,6 +160,7 @@ EXPORT_SYMBOL(wait_for_random_bytes); * u8 get_random_u8() * u16 get_random_u16() * u32 get_random_u32() + * u32 get_random_u32_below(u32 ceil) * u64 get_random_u64() * unsigned long get_random_long() * @@ -510,6 +511,27 @@ DEFINE_BATCHED_ENTROPY(u16) DEFINE_BATCHED_ENTROPY(u32) DEFINE_BATCHED_ENTROPY(u64) +u32 __get_random_u32_below(u32 ceil) +{ + /* + * This is the slow path for variable ceil. It is still fast, most of + * the time, by doing traditional reciprocal multiplication and + * opportunistically comparing the lower half to ceil itself, before + * falling back to computing a larger bound, and then rejecting samples + * whose lower half would indicate a range indivisible by ceil. The use + * of `-ceil % ceil` is analogous to `2^32 % ceil`, but is computable + * in 32-bits. + */ + u64 mult = (u64)ceil * get_random_u32(); + if (unlikely((u32)mult < ceil)) { + u32 bound = -ceil % ceil; + while (unlikely((u32)mult < bound)) + mult = (u64)ceil * get_random_u32(); + } + return mult >> 32; +} +EXPORT_SYMBOL(__get_random_u32_below); + #ifdef CONFIG_SMP /* * This function is called when the CPU is coming up, with entry diff --git a/include/linux/prandom.h b/include/linux/prandom.h index e0a0759dd09c..1f4a0de7b019 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -23,24 +23,10 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); #define prandom_init_once(pcpu_state) \ DO_ONCE(prandom_seed_full_state, (pcpu_state)) -/** - * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro) - * @ep_ro: right open interval endpoint - * - * Returns a pseudo-random number that is in interval [0, ep_ro). This is - * useful when requesting a random index of an array containing ep_ro elements, - * for example. The result is somewhat biased when ep_ro is not a power of 2, - * so do not use this for cryptographic purposes. - * - * Returns: pseudo-random number in interval [0, ep_ro) - */ +/* Deprecated: use get_random_u32_below() instead. */ static inline u32 prandom_u32_max(u32 ep_ro) { - if (__builtin_constant_p(ep_ro <= 1U << 8) && ep_ro <= 1U << 8) - return (get_random_u8() * ep_ro) >> 8; - if (__builtin_constant_p(ep_ro <= 1U << 16) && ep_ro <= 1U << 16) - return (get_random_u16() * ep_ro) >> 16; - return ((u64)get_random_u32() * ep_ro) >> 32; + return get_random_u32_below(ep_ro); } /* diff --git a/include/linux/random.h b/include/linux/random.h index 147a5e0d0b8e..3a82c0a8bc46 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -51,6 +51,46 @@ static inline unsigned long get_random_long(void) #endif } +u32 __get_random_u32_below(u32 ceil); + +/* + * Returns a random integer in the interval [0, ceil), with uniform + * distribution, suitable for all uses. Fastest when ceil is a constant, but + * still fast for variable ceil as well. + */ +static inline u32 get_random_u32_below(u32 ceil) +{ + if (!__builtin_constant_p(ceil)) + return __get_random_u32_below(ceil); + + /* + * For the fast path, below, all operations on ceil are precomputed by + * the compiler, so this incurs no overhead for checking pow2, doing + * divisions, or branching based on integer size. The resultant + * algorithm does traditional reciprocal multiplication (typically + * optimized by the compiler into shifts and adds), rejecting samples + * whose lower half would indicate a range indivisible by ceil. + */ + BUILD_BUG_ON_MSG(!ceil, "get_random_u32_below() must take ceil > 0"); + if (ceil <= 1) + return 0; + for (;;) { + if (ceil <= 1U << 8) { + u32 mult = ceil * get_random_u8(); + if (likely(is_power_of_2(ceil) || (u8)mult >= (1U << 8) % ceil)) + return mult >> 8; + } else if (ceil <= 1U << 16) { + u32 mult = ceil * get_random_u16(); + if (likely(is_power_of_2(ceil) || (u16)mult >= (1U << 16) % ceil)) + return mult >> 16; + } else { + u64 mult = (u64)ceil * get_random_u32(); + if (likely(is_power_of_2(ceil) || (u32)mult >= -ceil % ceil)) + return mult >> 32; + } + } +} + /* * On 64-bit architectures, protect against non-terminated C string overflows * by zeroing out the first byte of the canary; this leaves 56 bits of entropy. From 38f99d0b028473e975336932f6feef99c7e3956a Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Wed, 10 Aug 2022 23:19:09 +0100 Subject: [PATCH 026/207] x86/fpu/xstate: Fix XSTATE_WARN_ON() to emit relevant diagnostics commit 48280042f2c6e3ac2cfb1d8b752ab4a7e0baea24 upstream. "XSAVE consistency problem" has been reported under Xen, but that's the extent of my divination skills. Modify XSTATE_WARN_ON() to force the caller to provide relevant diagnostic information, and modify each caller suitably. For check_xstate_against_struct(), this removes a double WARN() where one will do perfectly fine. CC stable as this has been wonky debugging for 7 years and it is good to have there too. Signed-off-by: Andrew Cooper Signed-off-by: Borislav Petkov Cc: Link: https://lore.kernel.org/r/20220810221909.12768-1-andrew.cooper3@citrix.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/xstate.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 59e543b95a3c..c2dde46a538e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -440,8 +440,8 @@ static void __init __xstate_dump_leaves(void) } } -#define XSTATE_WARN_ON(x) do { \ - if (WARN_ONCE(x, "XSAVE consistency problem, dumping leaves")) { \ +#define XSTATE_WARN_ON(x, fmt, ...) do { \ + if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) { \ __xstate_dump_leaves(); \ } \ } while (0) @@ -554,8 +554,7 @@ static bool __init check_xstate_against_struct(int nr) (nr >= XFEATURE_MAX) || (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) || ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_RSRVD_COMP_16))) { - WARN_ONCE(1, "no structure for xstate: %d\n", nr); - XSTATE_WARN_ON(1); + XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr); return false; } return true; @@ -598,12 +597,13 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) * XSAVES. */ if (!xsaves && xfeature_is_supervisor(i)) { - XSTATE_WARN_ON(1); + XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i); return false; } } size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted); - XSTATE_WARN_ON(size != kernel_size); + XSTATE_WARN_ON(size != kernel_size, + "size %u != kernel_size %u\n", size, kernel_size); return size == kernel_size; } From 833cdf903d2eed6a34ac1ffdc76d40534d0afccf Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 30 Sep 2022 21:20:37 +0200 Subject: [PATCH 027/207] arm64: dts: qcom: sdm850-lenovo-yoga-c630: correct I2C12 pins drive strength commit fd49776d8f458bba5499384131eddc0b8bcaf50c upstream. The pin configuration (done with generic pin controller helpers and as expressed by bindings) requires children nodes with either: 1. "pins" property and the actual configuration, 2. another set of nodes with above point. The qup_i2c12_default pin configuration used second method - with a "pinmux" child. Fixes: 44acee207844 ("arm64: dts: qcom: Add Lenovo Yoga C630") Cc: Signed-off-by: Krzysztof Kozlowski Tested-by: Steev Klimaszewski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220930192039.240486-1-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts index be59a8ba9c1f..74f43da51fa5 100644 --- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts +++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts @@ -487,8 +487,10 @@ }; &qup_i2c12_default { - drive-strength = <2>; - bias-disable; + pinmux { + drive-strength = <2>; + bias-disable; + }; }; &qup_uart6_default { From b8b9b0b857b948e52b6d871d47ab9ef74d0cbee7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 1 Dec 2022 14:03:24 -0800 Subject: [PATCH 028/207] cxl/region: Fix missing probe failure commit bf3e5da8cb43a671b32fc125fa81b8f6a3677192 upstream. cxl_region_probe() allows for regions not in the 'commit' state to be enabled. Fail probe when the region is not committed otherwise the kernel may indicate that an address range is active when none of the decoders are active. Fixes: 8d48817df6ac ("cxl/region: Add region driver boiler plate") Cc: Reviewed-by: Davidlohr Bueso Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/166993220462.1995348.1698008475198427361.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/cxl/core/region.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index c7152b4bd9eb..c4f32c32dfd5 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1923,6 +1923,9 @@ static int cxl_region_probe(struct device *dev) */ up_read(&cxl_region_rwsem); + if (rc) + return rc; + switch (cxlr->mode) { case CXL_DECODER_PMEM: return devm_cxl_add_pmem_region(cxlr); From ea27cc32450e871f8931c021311558d8f5b11fde Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 18 Oct 2022 10:36:30 -0500 Subject: [PATCH 029/207] EDAC/mc_sysfs: Increase legacy channel support to 12 commit 25836ce1df827cb4830291cb2325067efb46753a upstream. Newer AMD systems, such as Genoa, can support up to 12 channels per EDAC "mc" device. These are detected by the device's EDAC module, and the current EDAC interface is properly enumerated. However, the legacy EDAC sysfs interface provides device attributes only for channels 0 to 7. Therefore, channels 8 to 11 will not be visible in the legacy interface. This was overlooked in the initial support for AMD Genoa. Add additional device attributes so that up to 12 channels are visible in the legacy EDAC sysfs interface. Fixes: e2be5955a886 ("EDAC/amd64: Add support for AMD Family 19h Models 10h-1Fh and A0h-AFh") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: Link: https://lore.kernel.org/r/20221018153630.14664-1-yazen.ghannam@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/edac/edac_mc_sysfs.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 0a638c97702a..15f63452a9be 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -298,6 +298,14 @@ DEVICE_CHANNEL(ch6_dimm_label, S_IRUGO | S_IWUSR, channel_dimm_label_show, channel_dimm_label_store, 6); DEVICE_CHANNEL(ch7_dimm_label, S_IRUGO | S_IWUSR, channel_dimm_label_show, channel_dimm_label_store, 7); +DEVICE_CHANNEL(ch8_dimm_label, S_IRUGO | S_IWUSR, + channel_dimm_label_show, channel_dimm_label_store, 8); +DEVICE_CHANNEL(ch9_dimm_label, S_IRUGO | S_IWUSR, + channel_dimm_label_show, channel_dimm_label_store, 9); +DEVICE_CHANNEL(ch10_dimm_label, S_IRUGO | S_IWUSR, + channel_dimm_label_show, channel_dimm_label_store, 10); +DEVICE_CHANNEL(ch11_dimm_label, S_IRUGO | S_IWUSR, + channel_dimm_label_show, channel_dimm_label_store, 11); /* Total possible dynamic DIMM Label attribute file table */ static struct attribute *dynamic_csrow_dimm_attr[] = { @@ -309,6 +317,10 @@ static struct attribute *dynamic_csrow_dimm_attr[] = { &dev_attr_legacy_ch5_dimm_label.attr.attr, &dev_attr_legacy_ch6_dimm_label.attr.attr, &dev_attr_legacy_ch7_dimm_label.attr.attr, + &dev_attr_legacy_ch8_dimm_label.attr.attr, + &dev_attr_legacy_ch9_dimm_label.attr.attr, + &dev_attr_legacy_ch10_dimm_label.attr.attr, + &dev_attr_legacy_ch11_dimm_label.attr.attr, NULL }; @@ -329,6 +341,14 @@ DEVICE_CHANNEL(ch6_ce_count, S_IRUGO, channel_ce_count_show, NULL, 6); DEVICE_CHANNEL(ch7_ce_count, S_IRUGO, channel_ce_count_show, NULL, 7); +DEVICE_CHANNEL(ch8_ce_count, S_IRUGO, + channel_ce_count_show, NULL, 8); +DEVICE_CHANNEL(ch9_ce_count, S_IRUGO, + channel_ce_count_show, NULL, 9); +DEVICE_CHANNEL(ch10_ce_count, S_IRUGO, + channel_ce_count_show, NULL, 10); +DEVICE_CHANNEL(ch11_ce_count, S_IRUGO, + channel_ce_count_show, NULL, 11); /* Total possible dynamic ce_count attribute file table */ static struct attribute *dynamic_csrow_ce_count_attr[] = { @@ -340,6 +360,10 @@ static struct attribute *dynamic_csrow_ce_count_attr[] = { &dev_attr_legacy_ch5_ce_count.attr.attr, &dev_attr_legacy_ch6_ce_count.attr.attr, &dev_attr_legacy_ch7_ce_count.attr.attr, + &dev_attr_legacy_ch8_ce_count.attr.attr, + &dev_attr_legacy_ch9_ce_count.attr.attr, + &dev_attr_legacy_ch10_ce_count.attr.attr, + &dev_attr_legacy_ch11_ce_count.attr.attr, NULL }; From 9edfbb64ca2b3f88143534c704194699dbadfdb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Fri, 9 Sep 2022 12:39:01 +0200 Subject: [PATCH 030/207] selftests: Use optional USERCFLAGS and USERLDFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit de3ee3f63400a23954e7c1ad1cb8c20f29ab6fe3 upstream. This change enables to extend CFLAGS and LDFLAGS from command line, e.g. to extend compiler checks: make USERCFLAGS=-Werror USERLDFLAGS=-static USERCFLAGS and USERLDFLAGS are documented in Documentation/kbuild/makefiles.rst and Documentation/kbuild/kbuild.rst This should be backported (down to 5.10) to improve previous kernel versions testing as well. Cc: Shuah Khan Cc: stable@vger.kernel.org Signed-off-by: Mickaël Salaün Link: https://lore.kernel.org/r/20220909103901.1503436-1-mic@digikod.net Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/lib.mk | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index a3ea3d4a206d..291144c284fb 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -123,6 +123,11 @@ endef clean: $(CLEAN) +# Enables to extend CFLAGS and LDFLAGS from command line, e.g. +# make USERCFLAGS=-Werror USERLDFLAGS=-static +CFLAGS += $(USERCFLAGS) +LDFLAGS += $(USERLDFLAGS) + # When make O= with kselftest target from main level # the following aren't defined. # From ef3ad7f447a2a8c156e40ea284a8985372be4254 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 21 Jun 2022 15:59:43 +0000 Subject: [PATCH 031/207] x86/MCE/AMD: Clear DFR errors found in THR handler commit bc1b705b0eee4c645ad8b3bbff3c8a66e9688362 upstream. AMD's MCA Thresholding feature counts errors of all severity levels, not just correctable errors. If a deferred error causes the threshold limit to be reached (it was the error that caused the overflow), then both a deferred error interrupt and a thresholding interrupt will be triggered. The order of the interrupts is not guaranteed. If the threshold interrupt handler is executed first, then it will clear MCA_STATUS for the error. It will not check or clear MCA_DESTAT which also holds a copy of the deferred error. When the deferred error interrupt handler runs it will not find an error in MCA_STATUS, but it will find the error in MCA_DESTAT. This will cause two errors to be logged. Check for deferred errors when handling a threshold interrupt. If a bank contains a deferred error, then clear the bank's MCA_DESTAT register. Define a new helper function to do the deferred error check and clearing of MCA_DESTAT. [ bp: Simplify, convert comment to passive voice. ] Fixes: 37d43acfd79f ("x86/mce/AMD: Redo error logging from APIC LVT interrupt handlers") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220621155943.33623-1-yazen.ghannam@amd.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mce/amd.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 1c87501e0fa3..10fb5b5c9efa 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -788,6 +788,24 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc) return status & MCI_STATUS_DEFERRED; } +static bool _log_error_deferred(unsigned int bank, u32 misc) +{ + if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS), + mca_msr_reg(bank, MCA_ADDR), misc)) + return false; + + /* + * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers. + * Return true here to avoid accessing these registers. + */ + if (!mce_flags.smca) + return true; + + /* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */ + wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0); + return true; +} + /* * We have three scenarios for checking for Deferred errors: * @@ -799,20 +817,9 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc) */ static void log_error_deferred(unsigned int bank) { - bool defrd; - - defrd = _log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS), - mca_msr_reg(bank, MCA_ADDR), 0); - - if (!mce_flags.smca) + if (_log_error_deferred(bank, 0)) return; - /* Clear MCA_DESTAT if we logged the deferred error from MCA_STATUS. */ - if (defrd) { - wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0); - return; - } - /* * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check * for a valid error. @@ -832,7 +839,7 @@ static void amd_deferred_error_interrupt(void) static void log_error_thresholding(unsigned int bank, u64 misc) { - _log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS), mca_msr_reg(bank, MCA_ADDR), misc); + _log_error_deferred(bank, misc); } static void log_and_reset_block(struct threshold_block *block) From 6088d8783f7b656dff34392532f94ae45fb2605d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 19 Oct 2022 23:19:35 -0600 Subject: [PATCH 032/207] random: add helpers for random numbers with given floor or range commit 7f576b2593a978451416424e75f69ad1e3ae4efe upstream. Now that we have get_random_u32_below(), it's nearly trivial to make inline helpers to compute get_random_u32_above() and get_random_u32_inclusive(), which will help clean up open coded loops and manual computations throughout the tree. One snag is that in order to make get_random_u32_inclusive() operate on closed intervals, we have to do some (unlikely) special case handling if get_random_u32_inclusive(0, U32_MAX) is called. The least expensive way of doing this is actually to adjust the slowpath of get_random_u32_below() to have its undefined 0 result just return the output of get_random_u32(). We can make this basically free by calling get_random_u32() before the branch, so that the branch latency gets interleaved. Cc: stable@vger.kernel.org # to ease future backports that use this api Reviewed-by: Kees Cook Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 18 +++++++++++++++++- include/linux/random.h | 25 +++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 6f323344d0b9..f5868dddbb61 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -161,6 +161,8 @@ EXPORT_SYMBOL(wait_for_random_bytes); * u16 get_random_u16() * u32 get_random_u32() * u32 get_random_u32_below(u32 ceil) + * u32 get_random_u32_above(u32 floor) + * u32 get_random_u32_inclusive(u32 floor, u32 ceil) * u64 get_random_u64() * unsigned long get_random_long() * @@ -522,7 +524,21 @@ u32 __get_random_u32_below(u32 ceil) * of `-ceil % ceil` is analogous to `2^32 % ceil`, but is computable * in 32-bits. */ - u64 mult = (u64)ceil * get_random_u32(); + u32 rand = get_random_u32(); + u64 mult; + + /* + * This function is technically undefined for ceil == 0, and in fact + * for the non-underscored constant version in the header, we build bug + * on that. But for the non-constant case, it's convenient to have that + * evaluate to being a straight call to get_random_u32(), so that + * get_random_u32_inclusive() can work over its whole range without + * undefined behavior. + */ + if (unlikely(!ceil)) + return rand; + + mult = (u64)ceil * rand; if (unlikely((u32)mult < ceil)) { u32 bound = -ceil % ceil; while (unlikely((u32)mult < bound)) diff --git a/include/linux/random.h b/include/linux/random.h index 3a82c0a8bc46..bd954ecbef90 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -91,6 +91,31 @@ static inline u32 get_random_u32_below(u32 ceil) } } +/* + * Returns a random integer in the interval (floor, U32_MAX], with uniform + * distribution, suitable for all uses. Fastest when floor is a constant, but + * still fast for variable floor as well. + */ +static inline u32 get_random_u32_above(u32 floor) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(floor) && floor == U32_MAX, + "get_random_u32_above() must take floor < U32_MAX"); + return floor + 1 + get_random_u32_below(U32_MAX - floor); +} + +/* + * Returns a random integer in the interval [floor, ceil], with uniform + * distribution, suitable for all uses. Fastest when floor and ceil are + * constant, but still fast for variable floor and ceil as well. + */ +static inline u32 get_random_u32_inclusive(u32 floor, u32 ceil) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(floor) && __builtin_constant_p(ceil) && + (floor > ceil || ceil - floor == U32_MAX), + "get_random_u32_inclusive() must take floor <= ceil"); + return floor + get_random_u32_below(ceil - floor + 1); +} + /* * On 64-bit architectures, protect against non-terminated C string overflows * by zeroing out the first byte of the canary; this leaves 56 bits of entropy. From 5052a96b21049b99534f82aee982769e2c1f2d5f Mon Sep 17 00:00:00 2001 From: Kant Fan Date: Tue, 25 Oct 2022 15:21:09 +0800 Subject: [PATCH 033/207] PM/devfreq: governor: Add a private governor_data for governor commit 5fdded8448924e3631d466eea499b11606c43640 upstream. The member void *data in the structure devfreq can be overwrite by governor_userspace. For example: 1. The device driver assigned the devfreq governor to simple_ondemand by the function devfreq_add_device() and init the devfreq member void *data to a pointer of a static structure devfreq_simple_ondemand_data by the function devfreq_add_device(). 2. The user changed the devfreq governor to userspace by the command "echo userspace > /sys/class/devfreq/.../governor". 3. The governor userspace alloced a dynamic memory for the struct userspace_data and assigend the member void *data of devfreq to this memory by the function userspace_init(). 4. The user changed the devfreq governor back to simple_ondemand by the command "echo simple_ondemand > /sys/class/devfreq/.../governor". 5. The governor userspace exited and assigned the member void *data in the structure devfreq to NULL by the function userspace_exit(). 6. The governor simple_ondemand fetched the static information of devfreq_simple_ondemand_data in the function devfreq_simple_ondemand_func() but the member void *data of devfreq was assigned to NULL by the function userspace_exit(). 7. The information of upthreshold and downdifferential is lost and the governor simple_ondemand can't work correctly. The member void *data in the structure devfreq is designed for a static pointer used in a governor and inited by the function devfreq_add_device(). This patch add an element named governor_data in the devfreq structure which can be used by a governor(E.g userspace) who want to assign a private data to do some private things. Fixes: ce26c5bb9569 ("PM / devfreq: Add basic governors") Cc: stable@vger.kernel.org # 5.10+ Reviewed-by: Chanwoo Choi Acked-by: MyungJoo Ham Signed-off-by: Kant Fan Signed-off-by: Chanwoo Choi Signed-off-by: Greg Kroah-Hartman --- drivers/devfreq/devfreq.c | 6 ++---- drivers/devfreq/governor_userspace.c | 12 ++++++------ include/linux/devfreq.h | 7 ++++--- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 63347a5ae599..8c5f6f7fca11 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -776,8 +776,7 @@ static void remove_sysfs_files(struct devfreq *devfreq, * @dev: the device to add devfreq feature. * @profile: device-specific profile to run devfreq. * @governor_name: name of the policy to choose frequency. - * @data: private data for the governor. The devfreq framework does not - * touch this value. + * @data: devfreq driver pass to governors, governor should not change it. */ struct devfreq *devfreq_add_device(struct device *dev, struct devfreq_dev_profile *profile, @@ -1011,8 +1010,7 @@ static void devm_devfreq_dev_release(struct device *dev, void *res) * @dev: the device to add devfreq feature. * @profile: device-specific profile to run devfreq. * @governor_name: name of the policy to choose frequency. - * @data: private data for the governor. The devfreq framework does not - * touch this value. + * @data: devfreq driver pass to governors, governor should not change it. * * This function manages automatically the memory of devfreq device using device * resource management and simplify the free operation for memory of devfreq diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c index ab9db7adb3ad..d69672ccacc4 100644 --- a/drivers/devfreq/governor_userspace.c +++ b/drivers/devfreq/governor_userspace.c @@ -21,7 +21,7 @@ struct userspace_data { static int devfreq_userspace_func(struct devfreq *df, unsigned long *freq) { - struct userspace_data *data = df->data; + struct userspace_data *data = df->governor_data; if (data->valid) *freq = data->user_frequency; @@ -40,7 +40,7 @@ static ssize_t set_freq_store(struct device *dev, struct device_attribute *attr, int err = 0; mutex_lock(&devfreq->lock); - data = devfreq->data; + data = devfreq->governor_data; sscanf(buf, "%lu", &wanted); data->user_frequency = wanted; @@ -60,7 +60,7 @@ static ssize_t set_freq_show(struct device *dev, int err = 0; mutex_lock(&devfreq->lock); - data = devfreq->data; + data = devfreq->governor_data; if (data->valid) err = sprintf(buf, "%lu\n", data->user_frequency); @@ -91,7 +91,7 @@ static int userspace_init(struct devfreq *devfreq) goto out; } data->valid = false; - devfreq->data = data; + devfreq->governor_data = data; err = sysfs_create_group(&devfreq->dev.kobj, &dev_attr_group); out: @@ -107,8 +107,8 @@ static void userspace_exit(struct devfreq *devfreq) if (devfreq->dev.kobj.sd) sysfs_remove_group(&devfreq->dev.kobj, &dev_attr_group); - kfree(devfreq->data); - devfreq->data = NULL; + kfree(devfreq->governor_data); + devfreq->governor_data = NULL; } static int devfreq_userspace_handler(struct devfreq *devfreq, diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 34aab4dd336c..4dc7cda4fd46 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -152,8 +152,8 @@ struct devfreq_stats { * @max_state: count of entry present in the frequency table. * @previous_freq: previously configured frequency value. * @last_status: devfreq user device info, performance statistics - * @data: Private data of the governor. The devfreq framework does not - * touch this. + * @data: devfreq driver pass to governors, governor should not change it. + * @governor_data: private data for governors, devfreq core doesn't touch it. * @user_min_freq_req: PM QoS minimum frequency request from user (via sysfs) * @user_max_freq_req: PM QoS maximum frequency request from user (via sysfs) * @scaling_min_freq: Limit minimum frequency requested by OPP interface @@ -193,7 +193,8 @@ struct devfreq { unsigned long previous_freq; struct devfreq_dev_status last_status; - void *data; /* private data for governors */ + void *data; + void *governor_data; struct dev_pm_qos_request user_min_freq_req; struct dev_pm_qos_request user_max_freq_req; From e7c0c943ed675b66d4bbb16c51c6a3bb58da047e Mon Sep 17 00:00:00 2001 From: Yongqiang Liu Date: Thu, 10 Nov 2022 14:23:07 +0000 Subject: [PATCH 034/207] cpufreq: Init completion before kobject_init_and_add() commit 5c51054896bcce1d33d39fead2af73fec24f40b6 upstream. In cpufreq_policy_alloc(), it will call uninitialed completion in cpufreq_sysfs_release() when kobject_init_and_add() fails. And that will cause a crash such as the following page fault in complete: BUG: unable to handle page fault for address: fffffffffffffff8 [..] RIP: 0010:complete+0x98/0x1f0 [..] Call Trace: kobject_put+0x1be/0x4c0 cpufreq_online.cold+0xee/0x1fd cpufreq_add_dev+0x183/0x1e0 subsys_interface_register+0x3f5/0x4e0 cpufreq_register_driver+0x3b7/0x670 acpi_cpufreq_init+0x56c/0x1000 [acpi_cpufreq] do_one_initcall+0x13d/0x780 do_init_module+0x1c3/0x630 load_module+0x6e67/0x73b0 __do_sys_finit_module+0x181/0x240 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 4ebe36c94aed ("cpufreq: Fix kobject memleak") Signed-off-by: Yongqiang Liu Acked-by: Viresh Kumar Cc: 5.2+ # 5.2+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 69b3d61852ac..7e56a42750ea 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1207,6 +1207,7 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) goto err_free_rcpumask; + init_completion(&policy->kobj_unregister); ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, cpufreq_global_kobject, "policy%u", cpu); if (ret) { @@ -1245,7 +1246,6 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) init_rwsem(&policy->rwsem); spin_lock_init(&policy->transition_lock); init_waitqueue_head(&policy->transition_wait); - init_completion(&policy->kobj_unregister); INIT_WORK(&policy->update, handle_update); policy->cpu = cpu; From 67ffc6dc3002572fa2bd982d95736b08b3ee18e0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 26 Nov 2022 03:17:17 +0000 Subject: [PATCH 035/207] ext2: unbugger ext2_empty_dir() commit 27e714c007e4ad01837bf0fac5c11913a38d7695 upstream. In 27cfa258951a "ext2: fix fs corruption when trying to remove a non-empty directory with IO error" a funny thing has happened: - page = ext2_get_page(inode, i, dir_has_error, &page_addr); + page = ext2_get_page(inode, i, 0, &page_addr); - if (IS_ERR(page)) { - dir_has_error = 1; - continue; - } + if (IS_ERR(page)) + goto not_empty; And at not_empty: we hit ext2_put_page(page, page_addr), which does put_page(page). Which, unless I'm very mistaken, should oops immediately when given ERR_PTR(-E...) as page. OK, shit happens, insufficiently tested patches included. But when commit in question describes the fault-injection test that exercised that particular failure exit... Ow. CC: stable@vger.kernel.org Fixes: 27cfa258951a ("ext2: fix fs corruption when trying to remove a non-empty directory with IO error") Signed-off-by: Al Viro Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext2/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 8f597753ac12..5202eddfc3c0 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -679,7 +679,7 @@ int ext2_empty_dir (struct inode * inode) page = ext2_get_page(inode, i, 0, &page_addr); if (IS_ERR(page)) - goto not_empty; + return 0; kaddr = page_addr; de = (ext2_dirent *)kaddr; From bff2698aab6f9b691e1e19adc4ca73e4149c8256 Mon Sep 17 00:00:00 2001 From: Smitha T Murthy Date: Wed, 7 Sep 2022 16:02:27 +0530 Subject: [PATCH 036/207] media: s5p-mfc: Fix to handle reference queue during finishing commit d8a46bc4e1e0446459daa77c4ce14218d32dacf9 upstream. On receiving last buffer driver puts MFC to MFCINST_FINISHING state which in turn skips transferring of frame from SRC to REF queue. This causes driver to stop MFC encoding and last frame is lost. This patch guarantees safe handling of frames during MFCINST_FINISHING and correct clearing of workbit to avoid early stopping of encoding. Fixes: af9357467810 ("[media] MFC: Add MFC 5.1 V4L2 driver") Cc: stable@vger.kernel.org Cc: linux-fsd@tesla.com Signed-off-by: Smitha T Murthy Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/samsung/s5p-mfc/s5p_mfc_enc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/samsung/s5p-mfc/s5p_mfc_enc.c b/drivers/media/platform/samsung/s5p-mfc/s5p_mfc_enc.c index b65e506665af..f62703cebb77 100644 --- a/drivers/media/platform/samsung/s5p-mfc/s5p_mfc_enc.c +++ b/drivers/media/platform/samsung/s5p-mfc/s5p_mfc_enc.c @@ -1218,6 +1218,7 @@ static int enc_post_frame_start(struct s5p_mfc_ctx *ctx) unsigned long mb_y_addr, mb_c_addr; int slice_type; unsigned int strm_size; + bool src_ready; slice_type = s5p_mfc_hw_call(dev->mfc_ops, get_enc_slice_type, dev); strm_size = s5p_mfc_hw_call(dev->mfc_ops, get_enc_strm_size, dev); @@ -1257,7 +1258,8 @@ static int enc_post_frame_start(struct s5p_mfc_ctx *ctx) } } } - if ((ctx->src_queue_cnt > 0) && (ctx->state == MFCINST_RUNNING)) { + if (ctx->src_queue_cnt > 0 && (ctx->state == MFCINST_RUNNING || + ctx->state == MFCINST_FINISHING)) { mb_entry = list_entry(ctx->src_queue.next, struct s5p_mfc_buf, list); if (mb_entry->flags & MFC_BUF_FLAG_USED) { @@ -1288,7 +1290,13 @@ static int enc_post_frame_start(struct s5p_mfc_ctx *ctx) vb2_set_plane_payload(&mb_entry->b->vb2_buf, 0, strm_size); vb2_buffer_done(&mb_entry->b->vb2_buf, VB2_BUF_STATE_DONE); } - if ((ctx->src_queue_cnt == 0) || (ctx->dst_queue_cnt == 0)) + + src_ready = true; + if (ctx->state == MFCINST_RUNNING && ctx->src_queue_cnt == 0) + src_ready = false; + if (ctx->state == MFCINST_FINISHING && ctx->ref_queue_cnt == 0) + src_ready = false; + if (!src_ready || ctx->dst_queue_cnt == 0) clear_work_bit(ctx); return 0; From bd1b72f0c39a0d791a087b4e643701a48328ba8e Mon Sep 17 00:00:00 2001 From: Smitha T Murthy Date: Wed, 7 Sep 2022 16:02:26 +0530 Subject: [PATCH 037/207] media: s5p-mfc: Clear workbit to handle error condition commit d3f3c2fe54e30b0636496d842ffbb5ad3a547f9b upstream. During error on CLOSE_INSTANCE command, ctx_work_bits was not getting cleared. During consequent mfc execution NULL pointer dereferencing of this context led to kernel panic. This patch fixes this issue by making sure to clear ctx_work_bits always. Fixes: 818cd91ab8c6 ("[media] s5p-mfc: Extract open/close MFC instance commands") Cc: stable@vger.kernel.org Cc: linux-fsd@tesla.com Signed-off-by: Smitha T Murthy Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/samsung/s5p-mfc/s5p_mfc_ctrl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/samsung/s5p-mfc/s5p_mfc_ctrl.c b/drivers/media/platform/samsung/s5p-mfc/s5p_mfc_ctrl.c index 72d70984e99a..6d3c92045c05 100644 --- a/drivers/media/platform/samsung/s5p-mfc/s5p_mfc_ctrl.c +++ b/drivers/media/platform/samsung/s5p-mfc/s5p_mfc_ctrl.c @@ -468,8 +468,10 @@ void s5p_mfc_close_mfc_inst(struct s5p_mfc_dev *dev, struct s5p_mfc_ctx *ctx) s5p_mfc_hw_call(dev->mfc_ops, try_run, dev); /* Wait until instance is returned or timeout occurred */ if (s5p_mfc_wait_for_done_ctx(ctx, - S5P_MFC_R2H_CMD_CLOSE_INSTANCE_RET, 0)) + S5P_MFC_R2H_CMD_CLOSE_INSTANCE_RET, 0)){ + clear_work_bit_irqsave(ctx); mfc_err("Err returning instance\n"); + } /* Free resources */ s5p_mfc_hw_call(dev->mfc_ops, release_codec_buffers, ctx); From f845a9d33b4e25215b3e3c31751debf57d5daf89 Mon Sep 17 00:00:00 2001 From: Smitha T Murthy Date: Wed, 7 Sep 2022 16:02:25 +0530 Subject: [PATCH 038/207] media: s5p-mfc: Fix in register read and write for H264 commit 06710cd5d2436135046898d7e4b9408c8bb99446 upstream. Few of the H264 encoder registers written were not getting reflected since the read values were not stored and getting overwritten. Fixes: 6a9c6f681257 ("[media] s5p-mfc: Add variants to access mfc registers") Cc: stable@vger.kernel.org Cc: linux-fsd@tesla.com Signed-off-by: Smitha T Murthy Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- .../platform/samsung/s5p-mfc/s5p_mfc_opr_v6.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/media/platform/samsung/s5p-mfc/s5p_mfc_opr_v6.c b/drivers/media/platform/samsung/s5p-mfc/s5p_mfc_opr_v6.c index 8227004f6746..c0df5ac9fcff 100644 --- a/drivers/media/platform/samsung/s5p-mfc/s5p_mfc_opr_v6.c +++ b/drivers/media/platform/samsung/s5p-mfc/s5p_mfc_opr_v6.c @@ -1060,7 +1060,7 @@ static int s5p_mfc_set_enc_params_h264(struct s5p_mfc_ctx *ctx) } /* aspect ratio VUI */ - readl(mfc_regs->e_h264_options); + reg = readl(mfc_regs->e_h264_options); reg &= ~(0x1 << 5); reg |= ((p_h264->vui_sar & 0x1) << 5); writel(reg, mfc_regs->e_h264_options); @@ -1083,7 +1083,7 @@ static int s5p_mfc_set_enc_params_h264(struct s5p_mfc_ctx *ctx) /* intra picture period for H.264 open GOP */ /* control */ - readl(mfc_regs->e_h264_options); + reg = readl(mfc_regs->e_h264_options); reg &= ~(0x1 << 4); reg |= ((p_h264->open_gop & 0x1) << 4); writel(reg, mfc_regs->e_h264_options); @@ -1097,23 +1097,23 @@ static int s5p_mfc_set_enc_params_h264(struct s5p_mfc_ctx *ctx) } /* 'WEIGHTED_BI_PREDICTION' for B is disable */ - readl(mfc_regs->e_h264_options); + reg = readl(mfc_regs->e_h264_options); reg &= ~(0x3 << 9); writel(reg, mfc_regs->e_h264_options); /* 'CONSTRAINED_INTRA_PRED_ENABLE' is disable */ - readl(mfc_regs->e_h264_options); + reg = readl(mfc_regs->e_h264_options); reg &= ~(0x1 << 14); writel(reg, mfc_regs->e_h264_options); /* ASO */ - readl(mfc_regs->e_h264_options); + reg = readl(mfc_regs->e_h264_options); reg &= ~(0x1 << 6); reg |= ((p_h264->aso & 0x1) << 6); writel(reg, mfc_regs->e_h264_options); /* hier qp enable */ - readl(mfc_regs->e_h264_options); + reg = readl(mfc_regs->e_h264_options); reg &= ~(0x1 << 8); reg |= ((p_h264->open_gop & 0x1) << 8); writel(reg, mfc_regs->e_h264_options); @@ -1134,7 +1134,7 @@ static int s5p_mfc_set_enc_params_h264(struct s5p_mfc_ctx *ctx) writel(reg, mfc_regs->e_h264_num_t_layer); /* frame packing SEI generation */ - readl(mfc_regs->e_h264_options); + reg = readl(mfc_regs->e_h264_options); reg &= ~(0x1 << 25); reg |= ((p_h264->sei_frame_packing & 0x1) << 25); writel(reg, mfc_regs->e_h264_options); From 7ac7830af689a81d68d25f3009a0a13542b22a65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 15 Dec 2022 00:02:53 +0100 Subject: [PATCH 039/207] bpf: Resolve fext program type when checking map compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1c123c567fb138ebd187480b7fc0610fcb0851f5 ] The bpf_prog_map_compatible() check makes sure that BPF program types are not mixed inside BPF map types that can contain programs (tail call maps, cpumaps and devmaps). It does this by setting the fields of the map->owner struct to the values of the first program being checked against, and rejecting any subsequent programs if the values don't match. One of the values being set in the map owner struct is the program type, and since the code did not resolve the prog type for fext programs, the map owner type would be set to PROG_TYPE_EXT and subsequent loading of programs of the target type into the map would fail. This bug is seen in particular for XDP programs that are loaded as PROG_TYPE_EXT using libxdp; these cannot insert programs into devmaps and cpumaps because the check fails as described above. Fix the bug by resolving the fext program type to its target program type as elsewhere in the verifier. v3: - Add Yonghong's ACK Fixes: f45d5b6ce2e8 ("bpf: generalise tail call map compatibility check") Acked-by: Yonghong Song Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20221214230254.790066-1-toke@redhat.com Signed-off-by: Martin KaFai Lau Signed-off-by: Sasha Levin --- include/linux/bpf_verifier.h | 2 +- kernel/bpf/core.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 9e1e6965f407..0eb8f035b3d9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -642,7 +642,7 @@ static inline u32 type_flag(u32 type) } /* only use after check_attach_btf_id() */ -static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) +static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog) { return prog->type == BPF_PROG_TYPE_EXT ? prog->aux->dst_prog->type : prog->type; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 25a54e04560e..17ab3e15ac25 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2088,6 +2088,7 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx, bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp) { + enum bpf_prog_type prog_type = resolve_prog_type(fp); bool ret; if (fp->kprobe_override) @@ -2098,12 +2099,12 @@ bool bpf_prog_map_compatible(struct bpf_map *map, /* There's no owner yet where we could check for * compatibility. */ - map->owner.type = fp->type; + map->owner.type = prog_type; map->owner.jited = fp->jited; map->owner.xdp_has_frags = fp->aux->xdp_has_frags; ret = true; } else { - ret = map->owner.type == fp->type && + ret = map->owner.type == prog_type && map->owner.jited == fp->jited && map->owner.xdp_has_frags == fp->aux->xdp_has_frags; } From ed32831a17e5c4ab9665bebb6b2c5ad19fb57e5e Mon Sep 17 00:00:00 2001 From: Philipp Jungkamp Date: Mon, 5 Dec 2022 17:37:13 +0100 Subject: [PATCH 040/207] ALSA: patch_realtek: Fix Dell Inspiron Plus 16 [ Upstream commit 2912cdda734d9136615ed05636d9fcbca2a7a3c5 ] The Dell Inspiron Plus 16, in both laptop and 2in1 form factor, has top speakers connected on NID 0x17, which the codec reports as unconnected. These speakers should be connected to the DAC on NID 0x03. Signed-off-by: Philipp Jungkamp Link: https://lore.kernel.org/r/20221205163713.7476-1-p.jungkamp@gmx.net Signed-off-by: Takashi Iwai Stable-dep-of: a4517c4f3423 ("ALSA: hda/realtek: Apply dual codec fixup for Dell Latitude laptops") Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 37 +++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f5f640851fdc..e443d88f627f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6903,6 +6903,34 @@ static void alc287_fixup_yoga9_14iap7_bass_spk_pin(struct hda_codec *codec, } } +static void alc295_fixup_dell_inspiron_top_speakers(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static const struct hda_pintbl pincfgs[] = { + { 0x14, 0x90170151 }, + { 0x17, 0x90170150 }, + { } + }; + static const hda_nid_t conn[] = { 0x02, 0x03 }; + static const hda_nid_t preferred_pairs[] = { + 0x14, 0x02, + 0x17, 0x03, + 0x21, 0x02, + 0 + }; + struct alc_spec *spec = codec->spec; + + alc_fixup_no_shutup(codec, fix, action); + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + snd_hda_apply_pincfgs(codec, pincfgs); + snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); + spec->gen.preferred_dacs = preferred_pairs; + break; + } +} + enum { ALC269_FIXUP_GPIO2, ALC269_FIXUP_SONY_VAIO, @@ -7146,6 +7174,7 @@ enum { ALC287_FIXUP_LEGION_16ITHG6, ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK, ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN, + ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -9095,6 +9124,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK, }, + [ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc295_fixup_dell_inspiron_top_speakers, + .chained = true, + .chain_id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -9195,6 +9230,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0a9e, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0b19, "Dell XPS 15 9520", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0b1a, "Dell Precision 5570", ALC289_FIXUP_DUAL_SPK), + SND_PCI_QUIRK(0x1028, 0x0b37, "Dell Inspiron 16 Plus 7620 2-in-1", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), + SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From 3ff594fb490f8be4e894557723611b11010d7e55 Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Mon, 26 Dec 2022 19:43:03 +0800 Subject: [PATCH 041/207] ALSA: hda/realtek: Apply dual codec fixup for Dell Latitude laptops [ Upstream commit a4517c4f3423c7c448f2c359218f97c1173523a1 ] The Dell Latiture 3340/3440/3540 laptops with Realtek ALC3204 have dual codecs and need the ALC1220_FIXUP_GB_DUAL_CODECS to fix the conflicts of Master controls. The existing headset mic fixup for Dell is also required to enable the jack sense and the headset mic. Introduce a new fixup to fix the dual codec and headset mic issues for particular Dell laptops since other old Dell laptops with the same codec configuration are already well handled by the fixup in alc269_fallback_pin_fixup_tbl[]. Signed-off-by: Chris Chiu Cc: Link: https://lore.kernel.org/r/20221226114303.4027500-1-chris.chiu@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e443d88f627f..3794b522c222 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7175,6 +7175,7 @@ enum { ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK, ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN, ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS, + ALC236_FIXUP_DELL_DUAL_CODECS, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -9130,6 +9131,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, }, + [ALC236_FIXUP_DELL_DUAL_CODECS] = { + .type = HDA_FIXUP_PINS, + .v.func = alc1220_fixup_gb_dual_codecs, + .chained = true, + .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -9232,6 +9239,12 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0b1a, "Dell Precision 5570", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0b37, "Dell Inspiron 16 Plus 7620 2-in-1", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), + SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), + SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), + SND_PCI_QUIRK(0x1028, 0x0c1b, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), + SND_PCI_QUIRK(0x1028, 0x0c1c, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS), + SND_PCI_QUIRK(0x1028, 0x0c1d, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), + SND_PCI_QUIRK(0x1028, 0x0c1e, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From f48a74ccb51990eb719fc467693be8d70fb85dc6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 7 Nov 2022 13:43:22 +0100 Subject: [PATCH 042/207] platform/x86: thinkpad_acpi: Fix max_brightness of thinklight MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit db5e2a4ca0a7a5fe54f410590292ea2e91de6798 ] Thinklight has only two values, on/off so it's reasonable for max_brightness to be 0 and 1 as if you write anything between 0 and 255 it will be 255 anyway so there's no point for it to be 255. This may look like it is a userspace API change, but writes with a value larget then the new max_brightness will still be accepted, these will be silently clamped to the new max_brightness by led_set_brightness_nosleep(). So no userspace API problems are expected. Reported-by: Michał Szczepaniak Link: https://lore.kernel.org/platform-driver-x86/55400326-e64f-5444-94e5-22b8214d00b6@gmail.com/ Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/thinkpad_acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 8476dfef4e62..a1d91736a03b 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -5572,6 +5572,7 @@ static enum led_brightness light_sysfs_get(struct led_classdev *led_cdev) static struct tpacpi_led_classdev tpacpi_led_thinklight = { .led_classdev = { .name = "tpacpi::thinklight", + .max_brightness = 1, .brightness_set_blocking = &light_sysfs_set, .brightness_get = &light_sysfs_get, } From 317499df9d702d8c4123661b044a79d1dcc92ee5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eray=20Or=C3=A7unus?= Date: Sat, 29 Oct 2022 15:03:06 +0300 Subject: [PATCH 043/207] platform/x86: ideapad-laptop: Revert "check for touchpad support in _CFG" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5831882880e9a1749553e78f9d8369fe33116aaf ] Last 8 bit of _CFG started being used in later IdeaPads, thus 30th bit doesn't always show whether device supports touchpad or touchpad switch. Remove checking bit 30 of _CFG, so older IdeaPads like S10-3 can switch touchpad again via touchpad attribute. This reverts commit b3ed1b7fe378 ("platform/x86: ideapad-laptop: check for touchpad support in _CFG"). Signed-off-by: Eray Orçunus Acked-by: Ike Panhc Link: https://lore.kernel.org/r/20221029120311.11152-2-erayorcunus@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/ideapad-laptop.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 3ea8fc6a9ca3..7192e0d2a14f 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -46,11 +46,10 @@ static const char *const ideapad_wmi_fnesc_events[] = { #endif enum { - CFG_CAP_BT_BIT = 16, - CFG_CAP_3G_BIT = 17, - CFG_CAP_WIFI_BIT = 18, - CFG_CAP_CAM_BIT = 19, - CFG_CAP_TOUCHPAD_BIT = 30, + CFG_CAP_BT_BIT = 16, + CFG_CAP_3G_BIT = 17, + CFG_CAP_WIFI_BIT = 18, + CFG_CAP_CAM_BIT = 19, }; enum { @@ -386,8 +385,6 @@ static int debugfs_cfg_show(struct seq_file *s, void *data) seq_puts(s, " wifi"); if (test_bit(CFG_CAP_CAM_BIT, &priv->cfg)) seq_puts(s, " camera"); - if (test_bit(CFG_CAP_TOUCHPAD_BIT, &priv->cfg)) - seq_puts(s, " touchpad"); seq_puts(s, "\n"); seq_puts(s, "Graphics: "); @@ -680,8 +677,7 @@ static umode_t ideapad_is_visible(struct kobject *kobj, else if (attr == &dev_attr_fn_lock.attr) supported = priv->features.fn_lock; else if (attr == &dev_attr_touchpad.attr) - supported = priv->features.touchpad_ctrl_via_ec && - test_bit(CFG_CAP_TOUCHPAD_BIT, &priv->cfg); + supported = priv->features.touchpad_ctrl_via_ec; else if (attr == &dev_attr_usb_charging.attr) supported = priv->features.usb_charging; From 32755a5da122f53c3d82e1ef9585999f2e89f62c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eray=20Or=C3=A7unus?= Date: Sat, 29 Oct 2022 15:03:09 +0300 Subject: [PATCH 044/207] platform/x86: ideapad-laptop: Add new _CFG bit numbers for future use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit be5dd7d8359de9fb22115a63f09981cdf689db4f ] Later IdeaPads report various things in last 8 bits of _CFG, at least 5 of them represent supported on-screen-displays. Add those bit numbers to the enum, and use CFG_OSD_ as prefix of their names. Also expose the values of these bits to debugfs, since they can be useful. Signed-off-by: Eray Orçunus Acked-by: Ike Panhc Link: https://lore.kernel.org/r/20221029120311.11152-5-erayorcunus@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/ideapad-laptop.c | 33 +++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 7192e0d2a14f..125b4534424f 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -46,10 +46,22 @@ static const char *const ideapad_wmi_fnesc_events[] = { #endif enum { - CFG_CAP_BT_BIT = 16, - CFG_CAP_3G_BIT = 17, - CFG_CAP_WIFI_BIT = 18, - CFG_CAP_CAM_BIT = 19, + CFG_CAP_BT_BIT = 16, + CFG_CAP_3G_BIT = 17, + CFG_CAP_WIFI_BIT = 18, + CFG_CAP_CAM_BIT = 19, + + /* + * These are OnScreenDisplay support bits that can be useful to determine + * whether a hotkey exists/should show OSD. But they aren't particularly + * meaningful since they were introduced later, i.e. 2010 IdeaPads + * don't have these, but they still have had OSD for hotkeys. + */ + CFG_OSD_NUMLK_BIT = 27, + CFG_OSD_CAPSLK_BIT = 28, + CFG_OSD_MICMUTE_BIT = 29, + CFG_OSD_TOUCHPAD_BIT = 30, + CFG_OSD_CAM_BIT = 31, }; enum { @@ -387,6 +399,19 @@ static int debugfs_cfg_show(struct seq_file *s, void *data) seq_puts(s, " camera"); seq_puts(s, "\n"); + seq_puts(s, "OSD support:"); + if (test_bit(CFG_OSD_NUMLK_BIT, &priv->cfg)) + seq_puts(s, " num-lock"); + if (test_bit(CFG_OSD_CAPSLK_BIT, &priv->cfg)) + seq_puts(s, " caps-lock"); + if (test_bit(CFG_OSD_MICMUTE_BIT, &priv->cfg)) + seq_puts(s, " mic-mute"); + if (test_bit(CFG_OSD_TOUCHPAD_BIT, &priv->cfg)) + seq_puts(s, " touchpad"); + if (test_bit(CFG_OSD_CAM_BIT, &priv->cfg)) + seq_puts(s, " camera"); + seq_puts(s, "\n"); + seq_puts(s, "Graphics: "); switch (priv->cfg & 0x700) { case 0x100: From 33c25c3146837f2ac7e465fdfd5dedd85d202ca8 Mon Sep 17 00:00:00 2001 From: Philipp Jungkamp Date: Wed, 16 Nov 2022 12:06:47 +0100 Subject: [PATCH 045/207] platform/x86: ideapad-laptop: support for more special keys in WMI [ Upstream commit f32e02417614d3588a3954dab2a70320c43d1010 ] The event data of the WMI event 0xD0, which is assumed to be the fn_lock, is used to indicate several special keys on newer Yoga 7/9 laptops. The notify_id 0xD0 is non-unique in the DSDT of the Yoga 9 14IAP7, this causes wmi_get_event_data() to report wrong values. Port the ideapad-laptop WMI code to the wmi bus infrastructure which does not suffer from the shortcomings of wmi_get_event_data(). Signed-off-by: Philipp Jungkamp Link: https://lore.kernel.org/r/20221116110647.3438-1-p.jungkamp@gmx.net Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/ideapad-laptop.c | 261 ++++++++++++++++++++------ 1 file changed, 202 insertions(+), 59 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 125b4534424f..13e3ae731fd8 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -30,6 +30,7 @@ #include #include #include +#include #include @@ -37,14 +38,6 @@ #define IDEAPAD_RFKILL_DEV_NUM 3 -#if IS_ENABLED(CONFIG_ACPI_WMI) -static const char *const ideapad_wmi_fnesc_events[] = { - "26CAB2E5-5CF1-46AE-AAC3-4A12B6BA50E6", /* Yoga 3 */ - "56322276-8493-4CE8-A783-98C991274F5E", /* Yoga 700 */ - "8FC0DE0C-B4E4-43FD-B0F3-8871711C1294", /* Legion 5 */ -}; -#endif - enum { CFG_CAP_BT_BIT = 16, CFG_CAP_3G_BIT = 17, @@ -141,7 +134,6 @@ struct ideapad_private { struct ideapad_dytc_priv *dytc; struct dentry *debug; unsigned long cfg; - const char *fnesc_guid; struct { bool conservation_mode : 1; bool dytc : 1; @@ -182,6 +174,42 @@ MODULE_PARM_DESC(set_fn_lock_led, "Enable driver based updates of the fn-lock LED on fn-lock changes. " "If you need this please report this to: platform-driver-x86@vger.kernel.org"); +/* + * shared data + */ + +static struct ideapad_private *ideapad_shared; +static DEFINE_MUTEX(ideapad_shared_mutex); + +static int ideapad_shared_init(struct ideapad_private *priv) +{ + int ret; + + mutex_lock(&ideapad_shared_mutex); + + if (!ideapad_shared) { + ideapad_shared = priv; + ret = 0; + } else { + dev_warn(&priv->adev->dev, "found multiple platform devices\n"); + ret = -EINVAL; + } + + mutex_unlock(&ideapad_shared_mutex); + + return ret; +} + +static void ideapad_shared_exit(struct ideapad_private *priv) +{ + mutex_lock(&ideapad_shared_mutex); + + if (ideapad_shared == priv) + ideapad_shared = NULL; + + mutex_unlock(&ideapad_shared_mutex); +} + /* * ACPI Helpers */ @@ -1110,6 +1138,8 @@ static void ideapad_sysfs_exit(struct ideapad_private *priv) /* * input device */ +#define IDEAPAD_WMI_KEY 0x100 + static const struct key_entry ideapad_keymap[] = { { KE_KEY, 6, { KEY_SWITCHVIDEOMODE } }, { KE_KEY, 7, { KEY_CAMERA } }, @@ -1123,6 +1153,28 @@ static const struct key_entry ideapad_keymap[] = { { KE_KEY, 66, { KEY_TOUCHPAD_OFF } }, { KE_KEY, 67, { KEY_TOUCHPAD_ON } }, { KE_KEY, 128, { KEY_ESC } }, + + /* + * WMI keys + */ + + /* FnLock (handled by the firmware) */ + { KE_IGNORE, 0x02 | IDEAPAD_WMI_KEY }, + /* Esc (handled by the firmware) */ + { KE_IGNORE, 0x03 | IDEAPAD_WMI_KEY }, + /* Customizable Lenovo Hotkey ("star" with 'S' inside) */ + { KE_KEY, 0x01 | IDEAPAD_WMI_KEY, { KEY_FAVORITES } }, + /* Dark mode toggle */ + { KE_KEY, 0x13 | IDEAPAD_WMI_KEY, { KEY_PROG1 } }, + /* Sound profile switch */ + { KE_KEY, 0x12 | IDEAPAD_WMI_KEY, { KEY_PROG2 } }, + /* Lenovo Virtual Background application */ + { KE_KEY, 0x28 | IDEAPAD_WMI_KEY, { KEY_PROG3 } }, + /* Lenovo Support */ + { KE_KEY, 0x27 | IDEAPAD_WMI_KEY, { KEY_HELP } }, + /* Refresh Rate Toggle */ + { KE_KEY, 0x0a | IDEAPAD_WMI_KEY, { KEY_DISPLAYTOGGLE } }, + { KE_END }, }; @@ -1526,33 +1578,6 @@ static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data) } } -#if IS_ENABLED(CONFIG_ACPI_WMI) -static void ideapad_wmi_notify(u32 value, void *context) -{ - struct ideapad_private *priv = context; - unsigned long result; - - switch (value) { - case 128: - ideapad_input_report(priv, value); - break; - case 208: - if (!priv->features.set_fn_lock_led) - break; - - if (!eval_hals(priv->adev->handle, &result)) { - bool state = test_bit(HALS_FNLOCK_STATE_BIT, &result); - - exec_sals(priv->adev->handle, state ? SALS_FNLOCK_ON : SALS_FNLOCK_OFF); - } - break; - default: - dev_info(&priv->platform_device->dev, - "Unknown WMI event: %u\n", value); - } -} -#endif - /* On some models we need to call exec_sals(SALS_FNLOCK_ON/OFF) to set the LED */ static const struct dmi_system_id set_fn_lock_led_list[] = { { @@ -1643,6 +1668,118 @@ static void ideapad_check_features(struct ideapad_private *priv) } } +#if IS_ENABLED(CONFIG_ACPI_WMI) +/* + * WMI driver + */ +enum ideapad_wmi_event_type { + IDEAPAD_WMI_EVENT_ESC, + IDEAPAD_WMI_EVENT_FN_KEYS, +}; + +struct ideapad_wmi_private { + enum ideapad_wmi_event_type event; +}; + +static int ideapad_wmi_probe(struct wmi_device *wdev, const void *context) +{ + struct ideapad_wmi_private *wpriv; + + wpriv = devm_kzalloc(&wdev->dev, sizeof(*wpriv), GFP_KERNEL); + if (!wpriv) + return -ENOMEM; + + *wpriv = *(const struct ideapad_wmi_private *)context; + + dev_set_drvdata(&wdev->dev, wpriv); + return 0; +} + +static void ideapad_wmi_notify(struct wmi_device *wdev, union acpi_object *data) +{ + struct ideapad_wmi_private *wpriv = dev_get_drvdata(&wdev->dev); + struct ideapad_private *priv; + unsigned long result; + + mutex_lock(&ideapad_shared_mutex); + + priv = ideapad_shared; + if (!priv) + goto unlock; + + switch (wpriv->event) { + case IDEAPAD_WMI_EVENT_ESC: + ideapad_input_report(priv, 128); + break; + case IDEAPAD_WMI_EVENT_FN_KEYS: + if (priv->features.set_fn_lock_led && + !eval_hals(priv->adev->handle, &result)) { + bool state = test_bit(HALS_FNLOCK_STATE_BIT, &result); + + exec_sals(priv->adev->handle, state ? SALS_FNLOCK_ON : SALS_FNLOCK_OFF); + } + + if (data->type != ACPI_TYPE_INTEGER) { + dev_warn(&wdev->dev, + "WMI event data is not an integer\n"); + break; + } + + dev_dbg(&wdev->dev, "WMI fn-key event: 0x%llx\n", + data->integer.value); + + ideapad_input_report(priv, + data->integer.value | IDEAPAD_WMI_KEY); + + break; + } +unlock: + mutex_unlock(&ideapad_shared_mutex); +} + +static const struct ideapad_wmi_private ideapad_wmi_context_esc = { + .event = IDEAPAD_WMI_EVENT_ESC +}; + +static const struct ideapad_wmi_private ideapad_wmi_context_fn_keys = { + .event = IDEAPAD_WMI_EVENT_FN_KEYS +}; + +static const struct wmi_device_id ideapad_wmi_ids[] = { + { "26CAB2E5-5CF1-46AE-AAC3-4A12B6BA50E6", &ideapad_wmi_context_esc }, /* Yoga 3 */ + { "56322276-8493-4CE8-A783-98C991274F5E", &ideapad_wmi_context_esc }, /* Yoga 700 */ + { "8FC0DE0C-B4E4-43FD-B0F3-8871711C1294", &ideapad_wmi_context_fn_keys }, /* Legion 5 */ + {}, +}; +MODULE_DEVICE_TABLE(wmi, ideapad_wmi_ids); + +static struct wmi_driver ideapad_wmi_driver = { + .driver = { + .name = "ideapad_wmi", + }, + .id_table = ideapad_wmi_ids, + .probe = ideapad_wmi_probe, + .notify = ideapad_wmi_notify, +}; + +static int ideapad_wmi_driver_register(void) +{ + return wmi_driver_register(&ideapad_wmi_driver); +} + +static void ideapad_wmi_driver_unregister(void) +{ + return wmi_driver_unregister(&ideapad_wmi_driver); +} + +#else +static inline int ideapad_wmi_driver_register(void) { return 0; } +static inline void ideapad_wmi_driver_unregister(void) { } +#endif + +/* + * ACPI driver + */ static int ideapad_acpi_add(struct platform_device *pdev) { struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); @@ -1724,30 +1861,16 @@ static int ideapad_acpi_add(struct platform_device *pdev) goto notification_failed; } -#if IS_ENABLED(CONFIG_ACPI_WMI) - for (i = 0; i < ARRAY_SIZE(ideapad_wmi_fnesc_events); i++) { - status = wmi_install_notify_handler(ideapad_wmi_fnesc_events[i], - ideapad_wmi_notify, priv); - if (ACPI_SUCCESS(status)) { - priv->fnesc_guid = ideapad_wmi_fnesc_events[i]; - break; - } - } - - if (ACPI_FAILURE(status) && status != AE_NOT_EXIST) { - err = -EIO; - goto notification_failed_wmi; - } -#endif + err = ideapad_shared_init(priv); + if (err) + goto shared_init_failed; return 0; -#if IS_ENABLED(CONFIG_ACPI_WMI) -notification_failed_wmi: +shared_init_failed: acpi_remove_notify_handler(priv->adev->handle, ACPI_DEVICE_NOTIFY, ideapad_acpi_notify); -#endif notification_failed: ideapad_backlight_exit(priv); @@ -1773,10 +1896,7 @@ static int ideapad_acpi_remove(struct platform_device *pdev) struct ideapad_private *priv = dev_get_drvdata(&pdev->dev); int i; -#if IS_ENABLED(CONFIG_ACPI_WMI) - if (priv->fnesc_guid) - wmi_remove_notify_handler(priv->fnesc_guid); -#endif + ideapad_shared_exit(priv); acpi_remove_notify_handler(priv->adev->handle, ACPI_DEVICE_NOTIFY, @@ -1828,7 +1948,30 @@ static struct platform_driver ideapad_acpi_driver = { }, }; -module_platform_driver(ideapad_acpi_driver); +static int __init ideapad_laptop_init(void) +{ + int err; + + err = ideapad_wmi_driver_register(); + if (err) + return err; + + err = platform_driver_register(&ideapad_acpi_driver); + if (err) { + ideapad_wmi_driver_unregister(); + return err; + } + + return 0; +} +module_init(ideapad_laptop_init) + +static void __exit ideapad_laptop_exit(void) +{ + ideapad_wmi_driver_unregister(); + platform_driver_unregister(&ideapad_acpi_driver); +} +module_exit(ideapad_laptop_exit) MODULE_AUTHOR("David Woodhouse "); MODULE_DESCRIPTION("IdeaPad ACPI Extras"); From 923c1922bdbe44bdbebe57b5b976d40f68853653 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 14 Nov 2022 15:44:58 +0100 Subject: [PATCH 046/207] ACPI: video: Simplify __acpi_video_get_backlight_type() [ Upstream commit a5df42521f328b45c9d89c13740e747be08ac66e ] Simplify __acpi_video_get_backlight_type() removing a nested if which makes the flow harder to follow. This also results in having only 1 exit point with return acpi_backlight_native instead of 2. Note this drops the (video_caps & ACPI_VIDEO_BACKLIGHT) check from the if (acpi_osi_is_win8() && native_available) return native path. Windows 8's hardware certification requirements include that there must be ACPI video bus backlight control, so the ACPI_VIDEO_BACKLIGHT check is redundant. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/video_detect.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 13f10fbcd7f0..0c17ee93f861 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -734,6 +734,16 @@ static bool google_cros_ec_present(void) return acpi_dev_found("GOOG0004") || acpi_dev_found("GOOG000C"); } +/* + * Windows 8 and newer no longer use the ACPI video interface, so it often + * does not work. So on win8+ systems prefer native brightness control. + * Chromebooks should always prefer native backlight control. + */ +static bool prefer_native_over_acpi_video(void) +{ + return acpi_osi_is_win8() || google_cros_ec_present(); +} + /* * Determine which type of backlight interface to use on this system, * First check cmdline, then dmi quirks, then do autodetect. @@ -779,26 +789,14 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native) if (apple_gmux_backlight_present()) return acpi_backlight_apple_gmux; - /* Chromebooks should always prefer native backlight control. */ - if (google_cros_ec_present() && native_available) - return acpi_backlight_native; + /* Use ACPI video if available, except when native should be preferred. */ + if ((video_caps & ACPI_VIDEO_BACKLIGHT) && + !(native_available && prefer_native_over_acpi_video())) + return acpi_backlight_video; - /* On systems with ACPI video use either native or ACPI video. */ - if (video_caps & ACPI_VIDEO_BACKLIGHT) { - /* - * Windows 8 and newer no longer use the ACPI video interface, - * so it often does not work. If the ACPI tables are written - * for win8 and native brightness ctl is available, use that. - * - * The native check deliberately is inside the if acpi-video - * block on older devices without acpi-video support native - * is usually not the best choice. - */ - if (acpi_osi_is_win8() && native_available) - return acpi_backlight_native; - else - return acpi_backlight_video; - } + /* Use native if available */ + if (native_available && prefer_native_over_acpi_video()) + return acpi_backlight_native; /* No ACPI video (old hw), use vendor specific fw methods. */ return acpi_backlight_vendor; From 8af3d02aae3ecc23ee84d7078d1ebb3df7557dda Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 14 Nov 2022 15:44:59 +0100 Subject: [PATCH 047/207] ACPI: video: Prefer native over vendor [ Upstream commit fb1836c91317e0770950260dfa91eb9b2170cb27 ] When available prefer native backlight control over vendor backlight control. Testing has shown that there are quite a few laptop models which rely on native backlight control (they don't have ACPI video bus backlight control) and on which acpi_osi_is_win8() returns false. Currently __acpi_video_get_backlight_type() returns vendor on these laptops, leading to an empty /sys/class/backlight. As a workaround for this acpi_video_backlight_use_native() has been temporarily changed to always return true. This re-introduces the problem of having multiple backlight devices under /sys/class/backlight for a single panel. Change __acpi_video_get_backlight_type() to prefer native over vendor when available. So that it returns native on these models. And change acpi_video_backlight_use_native() back to only return true when __acpi_video_get_backlight_type() returns native. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/video_detect.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 0c17ee93f861..76b7e7f8894e 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -795,10 +795,10 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native) return acpi_backlight_video; /* Use native if available */ - if (native_available && prefer_native_over_acpi_video()) + if (native_available) return acpi_backlight_native; - /* No ACPI video (old hw), use vendor specific fw methods. */ + /* No ACPI video/native (old hw), use vendor specific fw methods. */ return acpi_backlight_vendor; } @@ -810,18 +810,6 @@ EXPORT_SYMBOL(acpi_video_get_backlight_type); bool acpi_video_backlight_use_native(void) { - /* - * Call __acpi_video_get_backlight_type() to let it know that - * a native backlight is available. - */ - __acpi_video_get_backlight_type(true); - - /* - * For now just always return true. There is a whole bunch of laptop - * models where (video_caps & ACPI_VIDEO_BACKLIGHT) is false causing - * __acpi_video_get_backlight_type() to return vendor, while these - * models only have a native backlight control. - */ - return true; + return __acpi_video_get_backlight_type(true) == acpi_backlight_native; } EXPORT_SYMBOL(acpi_video_backlight_use_native); From c234ccc68afec76b83ee519c91713f27d7550d24 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 17 Nov 2022 12:02:39 +0100 Subject: [PATCH 048/207] platform/x86: ideapad-laptop: Refactor ideapad_sync_touchpad_state() [ Upstream commit 289a59895e7a380cdc7fe2780d3073f4b9237020 ] Add an error exit for read_ec_data() failing instead of putting the main body in an if (success) block. Signed-off-by: Hans de Goede Reviewed-by: Jiaxun Yang Tested-by: Jiaxun Yang Tested-by: Maxim Mikityanskiy Link: https://lore.kernel.org/r/20221117110244.67811-2-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/ideapad-laptop.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 13e3ae731fd8..dcb3a82024da 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1490,23 +1490,26 @@ static void ideapad_kbd_bl_exit(struct ideapad_private *priv) static void ideapad_sync_touchpad_state(struct ideapad_private *priv) { unsigned long value; + unsigned char param; + int ret; if (!priv->features.touchpad_ctrl_via_ec) return; /* Without reading from EC touchpad LED doesn't switch state */ - if (!read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &value)) { - unsigned char param; - /* - * Some IdeaPads don't really turn off touchpad - they only - * switch the LED state. We (de)activate KBC AUX port to turn - * touchpad off and on. We send KEY_TOUCHPAD_OFF and - * KEY_TOUCHPAD_ON to not to get out of sync with LED - */ - i8042_command(¶m, value ? I8042_CMD_AUX_ENABLE : I8042_CMD_AUX_DISABLE); - ideapad_input_report(priv, value ? 67 : 66); - sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad"); - } + ret = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &value); + if (ret) + return; + + /* + * Some IdeaPads don't really turn off touchpad - they only + * switch the LED state. We (de)activate KBC AUX port to turn + * touchpad off and on. We send KEY_TOUCHPAD_OFF and + * KEY_TOUCHPAD_ON to not to get out of sync with LED + */ + i8042_command(¶m, value ? I8042_CMD_AUX_ENABLE : I8042_CMD_AUX_DISABLE); + ideapad_input_report(priv, value ? 67 : 66); + sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad"); } static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data) From 2380e9493538cd75026718fa9262d64167924998 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 17 Nov 2022 12:02:40 +0100 Subject: [PATCH 049/207] platform/x86: ideapad-laptop: Do not send KEY_TOUCHPAD* events on probe / resume [ Upstream commit f4dd8c44bb831ff885680bc77111fa39c193a93f ] The sending of KEY_TOUCHPAD* events is causing spurious touchpad OSD showing on resume. Disable the sending of events on probe / resume to fix this. Signed-off-by: Hans de Goede Reviewed-by: Jiaxun Yang Tested-by: Jiaxun Yang Tested-by: Maxim Mikityanskiy Link: https://lore.kernel.org/r/20221117110244.67811-3-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/ideapad-laptop.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index dcb3a82024da..eb0b1ec32c13 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1487,7 +1487,7 @@ static void ideapad_kbd_bl_exit(struct ideapad_private *priv) /* * module init/exit */ -static void ideapad_sync_touchpad_state(struct ideapad_private *priv) +static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_events) { unsigned long value; unsigned char param; @@ -1508,8 +1508,11 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv) * KEY_TOUCHPAD_ON to not to get out of sync with LED */ i8042_command(¶m, value ? I8042_CMD_AUX_ENABLE : I8042_CMD_AUX_DISABLE); - ideapad_input_report(priv, value ? 67 : 66); - sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad"); + + if (send_events) { + ideapad_input_report(priv, value ? 67 : 66); + sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad"); + } } static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data) @@ -1550,7 +1553,7 @@ static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data) ideapad_sync_rfk_state(priv); break; case 5: - ideapad_sync_touchpad_state(priv); + ideapad_sync_touchpad_state(priv, true); break; case 4: ideapad_backlight_notify_brightness(priv); @@ -1840,7 +1843,7 @@ static int ideapad_acpi_add(struct platform_device *pdev) ideapad_register_rfkill(priv, i); ideapad_sync_rfk_state(priv); - ideapad_sync_touchpad_state(priv); + ideapad_sync_touchpad_state(priv, false); err = ideapad_dytc_profile_init(priv); if (err) { @@ -1925,7 +1928,7 @@ static int ideapad_acpi_resume(struct device *dev) struct ideapad_private *priv = dev_get_drvdata(dev); ideapad_sync_rfk_state(priv); - ideapad_sync_touchpad_state(priv); + ideapad_sync_touchpad_state(priv, false); if (priv->dytc) dytc_profile_refresh(priv); From f5820b413962d0a7edf4ba97f62430f3f9ee60ff Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 17 Nov 2022 12:02:41 +0100 Subject: [PATCH 050/207] platform/x86: ideapad-laptop: Only toggle ps2 aux port on/off on select models [ Upstream commit c69e7d843d2c34b80b8731a5dc57c34ea04a3edf ] Recently there have been multiple patches to disable the ideapad-laptop's touchpad control code, because it is causing issues on various laptops: Commit d69cd7eea93e ("platform/x86: ideapad-laptop: Disable touchpad_switch for ELAN0634") Commit a231224a601c ("platform/x86: ideapad-laptop: Disable touchpad_switch") The turning on/off of the ps2 aux port was added specifically for the IdeaPad Z570, where the EC does toggle the touchpad on/off LED and toggles the value returned by reading VPCCMD_R_TOUCHPAD, but it does not actually turn on/off the touchpad. The ideapad-laptop code really should not be messing with the i8042 controller on all devices just for this special case. Add a new ctrl_ps2_aux_port flag set based on a DMI based allow-list for devices which need this workaround, populating it with just the Ideapad Z570 for now. This also adds a module parameter so that this behavior can easily be enabled on other models which may need it. Signed-off-by: Hans de Goede Reviewed-by: Jiaxun Yang Tested-by: Jiaxun Yang Tested-by: Maxim Mikityanskiy Link: https://lore.kernel.org/r/20221117110244.67811-4-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/ideapad-laptop.c | 29 ++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index eb0b1ec32c13..1d86fb988d56 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -143,6 +143,7 @@ struct ideapad_private { bool hw_rfkill_switch : 1; bool kbd_bl : 1; bool touchpad_ctrl_via_ec : 1; + bool ctrl_ps2_aux_port : 1; bool usb_charging : 1; } features; struct { @@ -174,6 +175,12 @@ MODULE_PARM_DESC(set_fn_lock_led, "Enable driver based updates of the fn-lock LED on fn-lock changes. " "If you need this please report this to: platform-driver-x86@vger.kernel.org"); +static bool ctrl_ps2_aux_port; +module_param(ctrl_ps2_aux_port, bool, 0444); +MODULE_PARM_DESC(ctrl_ps2_aux_port, + "Enable driver based PS/2 aux port en-/dis-abling on touchpad on/off toggle. " + "If you need this please report this to: platform-driver-x86@vger.kernel.org"); + /* * shared data */ @@ -1507,7 +1514,8 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_ * touchpad off and on. We send KEY_TOUCHPAD_OFF and * KEY_TOUCHPAD_ON to not to get out of sync with LED */ - i8042_command(¶m, value ? I8042_CMD_AUX_ENABLE : I8042_CMD_AUX_DISABLE); + if (priv->features.ctrl_ps2_aux_port) + i8042_command(¶m, value ? I8042_CMD_AUX_ENABLE : I8042_CMD_AUX_DISABLE); if (send_events) { ideapad_input_report(priv, value ? 67 : 66); @@ -1615,6 +1623,23 @@ static const struct dmi_system_id hw_rfkill_list[] = { {} }; +/* + * On some models the EC toggles the touchpad muted LED on touchpad toggle + * hotkey presses, but the EC does not actually disable the touchpad itself. + * On these models the driver needs to explicitly enable/disable the i8042 + * (PS/2) aux port. + */ +static const struct dmi_system_id ctrl_ps2_aux_port_list[] = { + { + /* Lenovo Ideapad Z570 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Ideapad Z570"), + }, + }, + {} +}; + static const struct dmi_system_id no_touchpad_switch_list[] = { { .ident = "Lenovo Yoga 3 Pro 1370", @@ -1642,6 +1667,8 @@ static void ideapad_check_features(struct ideapad_private *priv) set_fn_lock_led || dmi_check_system(set_fn_lock_led_list); priv->features.hw_rfkill_switch = hw_rfkill_switch || dmi_check_system(hw_rfkill_list); + priv->features.ctrl_ps2_aux_port = + ctrl_ps2_aux_port || dmi_check_system(ctrl_ps2_aux_port_list); /* Most ideapads with ELAN0634 touchpad don't use EC touchpad switch */ if (acpi_dev_present("ELAN0634", NULL, -1)) From bda3399b5abbaec5a173da6194a5d0d2c84e2bf6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 17 Nov 2022 12:02:42 +0100 Subject: [PATCH 051/207] platform/x86: ideapad-laptop: Send KEY_TOUCHPAD_TOGGLE on some models [ Upstream commit 5829f8a897e4f030cd2d32a930eea8954ab5dcd3 ] On recent Ideapad models the EC does not control the touchpad at all, so instead of sending KEY_TOUCHPAD_ON/ _OFF on touchpad toggle hotkey events, ideapad-laptop should send KEY_TOUCHPAD_TOGGLE and let userspace handle the toggling. Check for this by checking if the value read from VPCCMD_R_TOUCHPAD actually changes when receiving a touchpad-toggle hotkey event; and if it does not change send KEY_TOUCHPAD_TOGGLE to userspace to let userspace enable/disable the touchpad in software. Note this also drops the priv->features.touchpad_ctrl_via_ec check from ideapad_sync_touchpad_state() so that KEY_TOUCHPAD_TOGGLE will be send on laptops where this is not set too. This can be safely dropped now because the i8042_command(I8042_CMD_AUX_ENABLE/_DISABLE) call is now guarded by its own feature flag. Signed-off-by: Hans de Goede Reviewed-by: Jiaxun Yang Tested-by: Jiaxun Yang Tested-by: Maxim Mikityanskiy Link: https://lore.kernel.org/r/20221117110244.67811-5-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/ideapad-laptop.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 1d86fb988d56..9b36cfddd36f 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -134,6 +134,7 @@ struct ideapad_private { struct ideapad_dytc_priv *dytc; struct dentry *debug; unsigned long cfg; + unsigned long r_touchpad_val; struct { bool conservation_mode : 1; bool dytc : 1; @@ -650,6 +651,8 @@ static ssize_t touchpad_show(struct device *dev, if (err) return err; + priv->r_touchpad_val = result; + return sysfs_emit(buf, "%d\n", !!result); } @@ -669,6 +672,8 @@ static ssize_t touchpad_store(struct device *dev, if (err) return err; + priv->r_touchpad_val = state; + return count; } @@ -1159,6 +1164,7 @@ static const struct key_entry ideapad_keymap[] = { { KE_KEY, 65, { KEY_PROG4 } }, { KE_KEY, 66, { KEY_TOUCHPAD_OFF } }, { KE_KEY, 67, { KEY_TOUCHPAD_ON } }, + { KE_KEY, 68, { KEY_TOUCHPAD_TOGGLE } }, { KE_KEY, 128, { KEY_ESC } }, /* @@ -1500,9 +1506,6 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_ unsigned char param; int ret; - if (!priv->features.touchpad_ctrl_via_ec) - return; - /* Without reading from EC touchpad LED doesn't switch state */ ret = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &value); if (ret) @@ -1518,9 +1521,20 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_ i8042_command(¶m, value ? I8042_CMD_AUX_ENABLE : I8042_CMD_AUX_DISABLE); if (send_events) { - ideapad_input_report(priv, value ? 67 : 66); - sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad"); + /* + * On older models the EC controls the touchpad and toggles it + * on/off itself, in this case we report KEY_TOUCHPAD_ON/_OFF. + * If the EC did not toggle, report KEY_TOUCHPAD_TOGGLE. + */ + if (value != priv->r_touchpad_val) { + ideapad_input_report(priv, value ? 67 : 66); + sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad"); + } else { + ideapad_input_report(priv, 68); + } } + + priv->r_touchpad_val = value; } static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data) From 009ac0049dc3fd1610d90f5273cd948ef79ac641 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 17 Nov 2022 12:02:43 +0100 Subject: [PATCH 052/207] platform/x86: ideapad-laptop: Stop writing VPCCMD_W_TOUCHPAD at probe time [ Upstream commit a10ba160d427e78ffa2ab15a86cacaec291fa58a ] Commit d69cd7eea93e ("platform/x86: ideapad-laptop: Disable touchpad_switch for ELAN0634") from Janary 2021 added a flag hiding the touchpad sysfs-attr and disabling ideapad_sync_touchpad_state() because some devices "do not use EC to switch touchpad". At the same time this added a write(VPCCMD_W_TOUCHPAD, 1) call at probe time on these same devices. This seems to be copied from the rfkill code which does something similar when hw rfkill support is disabled. But for the rfkill code this is known to be necessary on some models, where as for the touchpad control no motivation is given for doing this and prior to this patch there were no reports of needing to do this. So this seems unnecessary; and it is best to avoid poking the hardware unnecessary to avoid unwanted side effects, so remove this. Signed-off-by: Hans de Goede Reviewed-by: Jiaxun Yang Tested-by: Jiaxun Yang Tested-by: Maxim Mikityanskiy Link: https://lore.kernel.org/r/20221117110244.67811-6-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/ideapad-laptop.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 9b36cfddd36f..fc3d47a75944 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1875,10 +1875,6 @@ static int ideapad_acpi_add(struct platform_device *pdev) if (!priv->features.hw_rfkill_switch) write_ec_cmd(priv->adev->handle, VPCCMD_W_RF, 1); - /* The same for Touchpad */ - if (!priv->features.touchpad_ctrl_via_ec) - write_ec_cmd(priv->adev->handle, VPCCMD_W_TOUCHPAD, 1); - for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++) if (test_bit(ideapad_rfk_data[i].cfgbit, &priv->cfg)) ideapad_register_rfkill(priv, i); From c455aa7cc9cf1205b4d614560cfd508797d8681d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 22 Nov 2022 09:00:14 +0200 Subject: [PATCH 053/207] platform/x86: intel-uncore-freq: add Emerald Rapids support [ Upstream commit 9c252ecf30360cb7b4dbcc275aebe5642174fd39 ] Make Intel uncore frequency driver support Emerald Rapids by adding its CPU model to the match table. Emerald Rapids uncore frequency control is the same as in Sapphire Rapids. Signed-off-by: Artem Bityutskiy Acked-by: Srinivas Pandruvada Acked-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c index 8f9c571d7257..00ac7e381441 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c @@ -203,6 +203,7 @@ static const struct x86_cpu_id intel_uncore_cpu_ids[] = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, NULL), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, NULL), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_uncore_cpu_ids); From 838e48fa1b1b4dcdaaccb75fb2de034d2740ea62 Mon Sep 17 00:00:00 2001 From: Vitaly Rodionov Date: Mon, 5 Dec 2022 14:57:13 +0000 Subject: [PATCH 054/207] ALSA: hda/cirrus: Add extra 10 ms delay to allow PLL settle and lock. [ Upstream commit 9fb9fa18fb50d1a33a1bd947681fce96fc2c8db6 ] New HW platforms with multiple CS42L42 parts, faster CPU and i2c requre some extra delay to allow PLL to settle and lock. Adding extra 10ms delay. Signed-off-by: Vitaly Rodionov Link: https://lore.kernel.org/r/20221205145713.23852-1-vitalyr@opensource.cirrus.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_cs8409.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c index 754aa8ddd2e4..0ba1fbcbb21e 100644 --- a/sound/pci/hda/patch_cs8409.c +++ b/sound/pci/hda/patch_cs8409.c @@ -888,7 +888,7 @@ static void cs42l42_resume(struct sub_codec *cs42l42) /* Initialize CS42L42 companion codec */ cs8409_i2c_bulk_write(cs42l42, cs42l42->init_seq, cs42l42->init_seq_num); - usleep_range(20000, 25000); + usleep_range(30000, 35000); /* Clear interrupts, by reading interrupt status registers */ cs8409_i2c_bulk_read(cs42l42, irq_regs, ARRAY_SIZE(irq_regs)); From 5a2798eda1a8f1038de74013c3b4669d17123059 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 8 Dec 2022 12:02:24 +0100 Subject: [PATCH 055/207] platform/x86: x86-android-tablets: Add Medion Lifetab S10346 data [ Upstream commit 902ce18ab1f4444ff9d49865bea35a07adcc03fd ] The Medion Lifetab S10346 is a x86 ACPI tablet which ships with Android x86 as factory OS. Its DSDT contains a bunch of I2C devices which are not actually there, causing various resource conflicts. Enumeration of these is skipped through the acpi_quirk_skip_i2c_client_enumeration(). Add support for manually instantiating the I2C devices which are actually present on this tablet by adding the necessary device info to the x86-android-tablets module. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221208110224.107354-1-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/x86-android-tablets.c | 92 ++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/drivers/platform/x86/x86-android-tablets.c b/drivers/platform/x86/x86-android-tablets.c index 4acd6fa8d43b..f04e06eeb958 100644 --- a/drivers/platform/x86/x86-android-tablets.c +++ b/drivers/platform/x86/x86-android-tablets.c @@ -987,6 +987,88 @@ static void lenovo_yoga_tab2_830_1050_exit(void) } } +/* Medion Lifetab S10346 tablets have an Android factory img with everything hardcoded */ +static const char * const medion_lifetab_s10346_accel_mount_matrix[] = { + "0", "1", "0", + "1", "0", "0", + "0", "0", "1" +}; + +static const struct property_entry medion_lifetab_s10346_accel_props[] = { + PROPERTY_ENTRY_STRING_ARRAY("mount-matrix", medion_lifetab_s10346_accel_mount_matrix), + { } +}; + +static const struct software_node medion_lifetab_s10346_accel_node = { + .properties = medion_lifetab_s10346_accel_props, +}; + +/* Note the LCD panel is mounted upside down, this is correctly indicated in the VBT */ +static const struct property_entry medion_lifetab_s10346_touchscreen_props[] = { + PROPERTY_ENTRY_BOOL("touchscreen-inverted-x"), + PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), + { } +}; + +static const struct software_node medion_lifetab_s10346_touchscreen_node = { + .properties = medion_lifetab_s10346_touchscreen_props, +}; + +static const struct x86_i2c_client_info medion_lifetab_s10346_i2c_clients[] __initconst = { + { + /* kxtj21009 accel */ + .board_info = { + .type = "kxtj21009", + .addr = 0x0f, + .dev_name = "kxtj21009", + .swnode = &medion_lifetab_s10346_accel_node, + }, + .adapter_path = "\\_SB_.I2C3", + .irq_data = { + .type = X86_ACPI_IRQ_TYPE_GPIOINT, + .chip = "INT33FC:02", + .index = 23, + .trigger = ACPI_EDGE_SENSITIVE, + .polarity = ACPI_ACTIVE_HIGH, + }, + }, { + /* goodix touchscreen */ + .board_info = { + .type = "GDIX1001:00", + .addr = 0x14, + .dev_name = "goodix_ts", + .swnode = &medion_lifetab_s10346_touchscreen_node, + }, + .adapter_path = "\\_SB_.I2C4", + .irq_data = { + .type = X86_ACPI_IRQ_TYPE_APIC, + .index = 0x44, + .trigger = ACPI_EDGE_SENSITIVE, + .polarity = ACPI_ACTIVE_LOW, + }, + }, +}; + +static struct gpiod_lookup_table medion_lifetab_s10346_goodix_gpios = { + .dev_id = "i2c-goodix_ts", + .table = { + GPIO_LOOKUP("INT33FC:01", 26, "reset", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("INT33FC:02", 3, "irq", GPIO_ACTIVE_HIGH), + { } + }, +}; + +static struct gpiod_lookup_table * const medion_lifetab_s10346_gpios[] = { + &medion_lifetab_s10346_goodix_gpios, + NULL +}; + +static const struct x86_dev_info medion_lifetab_s10346_info __initconst = { + .i2c_client_info = medion_lifetab_s10346_i2c_clients, + .i2c_client_count = ARRAY_SIZE(medion_lifetab_s10346_i2c_clients), + .gpiod_lookup_tables = medion_lifetab_s10346_gpios, +}; + /* Nextbook Ares 8 tablets have an Android factory img with everything hardcoded */ static const char * const nextbook_ares8_accel_mount_matrix[] = { "0", "-1", "0", @@ -1245,6 +1327,16 @@ static const struct dmi_system_id x86_android_tablet_ids[] __initconst = { }, .driver_data = (void *)&lenovo_yoga_tab2_830_1050_info, }, + { + /* Medion Lifetab S10346 */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"), + /* Above strings are much too generic, also match on BIOS date */ + DMI_MATCH(DMI_BIOS_DATE, "10/22/2015"), + }, + .driver_data = (void *)&medion_lifetab_s10346_info, + }, { /* Nextbook Ares 8 */ .matches = { From c693355b9735df17e20fbe63355045e414e404bb Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 27 Nov 2022 19:24:58 +0100 Subject: [PATCH 056/207] platform/x86: x86-android-tablets: Add Lenovo Yoga Tab 3 (YT3-X90F) charger + fuel-gauge data [ Upstream commit b6c14ff1deaafd30036ec36d5205acd5a578b1cd ] The Lenovo Yoga Tab 3 (YT3-X90F) is an Intel Cherry Trail based tablet which ships with Android as Factory OS. Its DSDT contains a bunch of I2C devices which are not actually there, causing various resource conflicts. Use acpi_quirk_skip_i2c_client_enumeration() to not enumerate these. The YT3-X90F has quite a bit of exotic hardware, this adds initial support by manually instantiating the i2c-clients for the 2 charger + 2 fuel-gauge chips used for the 2 batteries. Support for other parts of the hw will be added by follow-up patches. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221127182458.104528-1-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/x86-android-tablets.c | 135 ++++++++++++++++++++- 1 file changed, 134 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/x86-android-tablets.c b/drivers/platform/x86/x86-android-tablets.c index f04e06eeb958..bbfae1395e18 100644 --- a/drivers/platform/x86/x86-android-tablets.c +++ b/drivers/platform/x86/x86-android-tablets.c @@ -5,7 +5,7 @@ * devices typically have a bunch of things hardcoded, rather than specified * in their DSDT. * - * Copyright (C) 2021 Hans de Goede + * Copyright (C) 2021-2022 Hans de Goede */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -987,6 +987,130 @@ static void lenovo_yoga_tab2_830_1050_exit(void) } } +/* Lenovo Yoga Tab 3 Pro YT3-X90F */ + +/* + * There are 2 batteries, with 2 bq27500 fuel-gauges and 2 bq25892 chargers, + * "bq25890-charger-1" is instantiated from: drivers/i2c/busses/i2c-cht-wc.c. + */ +static const char * const lenovo_yt3_bq25892_0_suppliers[] = { "cht_wcove_pwrsrc" }; +static const char * const bq25890_1_psy[] = { "bq25890-charger-1" }; + +static const struct property_entry fg_bq25890_1_supply_props[] = { + PROPERTY_ENTRY_STRING_ARRAY("supplied-from", bq25890_1_psy), + { } +}; + +static const struct software_node fg_bq25890_1_supply_node = { + .properties = fg_bq25890_1_supply_props, +}; + +/* bq25892 charger settings for the flat lipo battery behind the screen */ +static const struct property_entry lenovo_yt3_bq25892_0_props[] = { + PROPERTY_ENTRY_STRING_ARRAY("supplied-from", lenovo_yt3_bq25892_0_suppliers), + PROPERTY_ENTRY_STRING("linux,power-supply-name", "bq25892-second-chrg"), + PROPERTY_ENTRY_U32("linux,iinlim-percentage", 40), + PROPERTY_ENTRY_BOOL("linux,skip-reset"), + /* Values taken from Android Factory Image */ + PROPERTY_ENTRY_U32("ti,charge-current", 2048000), + PROPERTY_ENTRY_U32("ti,battery-regulation-voltage", 4352000), + PROPERTY_ENTRY_U32("ti,termination-current", 128000), + PROPERTY_ENTRY_U32("ti,precharge-current", 128000), + PROPERTY_ENTRY_U32("ti,minimum-sys-voltage", 3700000), + PROPERTY_ENTRY_U32("ti,boost-voltage", 4998000), + PROPERTY_ENTRY_U32("ti,boost-max-current", 500000), + PROPERTY_ENTRY_BOOL("ti,use-ilim-pin"), + { } +}; + +static const struct software_node lenovo_yt3_bq25892_0_node = { + .properties = lenovo_yt3_bq25892_0_props, +}; + +static const struct x86_i2c_client_info lenovo_yt3_i2c_clients[] __initconst = { + { + /* bq27500 fuel-gauge for the flat lipo battery behind the screen */ + .board_info = { + .type = "bq27500", + .addr = 0x55, + .dev_name = "bq27500_0", + .swnode = &fg_bq25890_supply_node, + }, + .adapter_path = "\\_SB_.PCI0.I2C1", + }, { + /* bq25892 charger for the flat lipo battery behind the screen */ + .board_info = { + .type = "bq25892", + .addr = 0x6b, + .dev_name = "bq25892_0", + .swnode = &lenovo_yt3_bq25892_0_node, + }, + .adapter_path = "\\_SB_.PCI0.I2C1", + .irq_data = { + .type = X86_ACPI_IRQ_TYPE_GPIOINT, + .chip = "INT33FF:01", + .index = 5, + .trigger = ACPI_EDGE_SENSITIVE, + .polarity = ACPI_ACTIVE_LOW, + }, + }, { + /* bq27500 fuel-gauge for the round li-ion cells in the hinge */ + .board_info = { + .type = "bq27500", + .addr = 0x55, + .dev_name = "bq27500_1", + .swnode = &fg_bq25890_1_supply_node, + }, + .adapter_path = "\\_SB_.PCI0.I2C2", + } +}; + +static int __init lenovo_yt3_init(void) +{ + struct gpio_desc *gpiod; + int ret; + + /* + * The "bq25892_0" charger IC has its /CE (Charge-Enable) and OTG pins + * connected to GPIOs, rather then having them hardwired to the correct + * values as is normally done. + * + * The bq25890_charger driver controls these through I2C, but this only + * works if not overridden by the pins. Set these pins here: + * 1. Set /CE to 0 to allow charging. + * 2. Set OTG to 0 disable V5 boost output since the 5V boost output of + * the main "bq25892_1" charger is used when necessary. + */ + + /* /CE pin */ + ret = x86_android_tablet_get_gpiod("INT33FF:02", 22, &gpiod); + if (ret < 0) + return ret; + + /* + * The gpio_desc returned by x86_android_tablet_get_gpiod() is a "raw" + * gpio_desc, that is there is no way to pass lookup-flags like + * GPIO_ACTIVE_LOW. Set the GPIO to 0 here to enable charging since + * the /CE pin is active-low, but not marked as such in the gpio_desc. + */ + gpiod_set_value(gpiod, 0); + + /* OTG pin */ + ret = x86_android_tablet_get_gpiod("INT33FF:03", 19, &gpiod); + if (ret < 0) + return ret; + + gpiod_set_value(gpiod, 0); + + return 0; +} + +static const struct x86_dev_info lenovo_yt3_info __initconst = { + .i2c_client_info = lenovo_yt3_i2c_clients, + .i2c_client_count = ARRAY_SIZE(lenovo_yt3_i2c_clients), + .init = lenovo_yt3_init, +}; + /* Medion Lifetab S10346 tablets have an Android factory img with everything hardcoded */ static const char * const medion_lifetab_s10346_accel_mount_matrix[] = { "0", "1", "0", @@ -1327,6 +1451,15 @@ static const struct dmi_system_id x86_android_tablet_ids[] __initconst = { }, .driver_data = (void *)&lenovo_yoga_tab2_830_1050_info, }, + { + /* Lenovo Yoga Tab 3 Pro YT3-X90F */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Blade3-10A-001"), + }, + .driver_data = (void *)&lenovo_yt3_info, + }, { /* Medion Lifetab S10346 */ .matches = { From ca4c62ec488bc8e4b989a765221123b862c483d5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 27 Nov 2022 23:19:28 +0100 Subject: [PATCH 057/207] platform/x86: x86-android-tablets: Add Advantech MICA-071 extra button [ Upstream commit b03ae77e7e057f4b3b858f10c840557e71448a91 ] The Advantech MICA-071 is a standard Windows tablet, but it has an extra "quick launch" button which is not described in the ACPI tables in anyway. Use the x86-android-tablets infra to create a gpio-button device for this. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221127221928.123660-1-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/x86-android-tablets.c | 58 ++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/drivers/platform/x86/x86-android-tablets.c b/drivers/platform/x86/x86-android-tablets.c index bbfae1395e18..123a4618db55 100644 --- a/drivers/platform/x86/x86-android-tablets.c +++ b/drivers/platform/x86/x86-android-tablets.c @@ -265,6 +265,56 @@ static struct gpiod_lookup_table int3496_gpo2_pin22_gpios = { }, }; +/* + * Advantech MICA-071 + * This is a standard Windows tablet, but it has an extra "quick launch" button + * which is not described in the ACPI tables in anyway. + * Use the x86-android-tablets infra to create a gpio-button device for this. + */ +static struct gpio_keys_button advantech_mica_071_button = { + .code = KEY_PROG1, + /* .gpio gets filled in by advantech_mica_071_init() */ + .active_low = true, + .desc = "prog1_key", + .type = EV_KEY, + .wakeup = false, + .debounce_interval = 50, +}; + +static const struct gpio_keys_platform_data advantech_mica_071_button_pdata __initconst = { + .buttons = &advantech_mica_071_button, + .nbuttons = 1, + .name = "prog1_key", +}; + +static const struct platform_device_info advantech_mica_071_pdevs[] __initconst = { + { + .name = "gpio-keys", + .id = PLATFORM_DEVID_AUTO, + .data = &advantech_mica_071_button_pdata, + .size_data = sizeof(advantech_mica_071_button_pdata), + }, +}; + +static int __init advantech_mica_071_init(void) +{ + struct gpio_desc *gpiod; + int ret; + + ret = x86_android_tablet_get_gpiod("INT33FC:00", 2, &gpiod); + if (ret < 0) + return ret; + advantech_mica_071_button.gpio = desc_to_gpio(gpiod); + + return 0; +} + +static const struct x86_dev_info advantech_mica_071_info __initconst = { + .pdev_info = advantech_mica_071_pdevs, + .pdev_count = ARRAY_SIZE(advantech_mica_071_pdevs), + .init = advantech_mica_071_init, +}; + /* Asus ME176C and TF103C tablets shared data */ static struct gpio_keys_button asus_me176c_tf103c_lid = { .code = SW_LID, @@ -1385,6 +1435,14 @@ static const struct x86_dev_info xiaomi_mipad2_info __initconst = { }; static const struct dmi_system_id x86_android_tablet_ids[] __initconst = { + { + /* Advantech MICA-071 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Advantech"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MICA-071"), + }, + .driver_data = (void *)&advantech_mica_071_info, + }, { /* Asus MeMO Pad 7 ME176C */ .matches = { From 9e456d460f1b4a756cb9c5aebe7609f2439ccb7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Thu, 24 Nov 2022 18:49:32 +0100 Subject: [PATCH 058/207] HID: Ignore HP Envy x360 eu0009nv stylus battery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cec827d658dd5c287ea8925737d45f0a60e47422 ] Battery status is reported for the HP Envy x360 eu0009nv stylus even though it does not have battery. Prevent it from always reporting the battery as low (1%). Link: https://gitlab.freedesktop.org/libinput/libinput/-/issues/823 Reported-by: Ioannis Iliopoulos Tested-by: Ioannis Iliopoulos Signed-off-by: José Expósito Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-input.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index e27fb27a36bf..82713ef3aaa6 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -412,6 +412,7 @@ #define USB_DEVICE_ID_HP_X2_10_COVER 0x0755 #define I2C_DEVICE_ID_HP_ENVY_X360_15 0x2d05 #define I2C_DEVICE_ID_HP_ENVY_X360_15T_DR100 0x29CF +#define I2C_DEVICE_ID_HP_ENVY_X360_EU0009NV 0x2CF9 #define I2C_DEVICE_ID_HP_SPECTRE_X360_15 0x2817 #define USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN 0x2544 #define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index d728a94c642e..3ee5a9fea20e 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -380,6 +380,8 @@ static const struct hid_device_id hid_battery_quirks[] = { HID_BATTERY_QUIRK_IGNORE }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15T_DR100), HID_BATTERY_QUIRK_IGNORE }, + { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_EU0009NV), + HID_BATTERY_QUIRK_IGNORE }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_15), HID_BATTERY_QUIRK_IGNORE }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN), From bb641476f581d0523597a2f25737c6a8e4d1f5e6 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Thu, 15 Dec 2022 16:30:37 +0100 Subject: [PATCH 059/207] ALSA: usb-audio: Add new quirk FIXED_RATE for JBL Quantum810 Wireless [ Upstream commit fd28941cff1cd9d8ffa59fe11eb64148e09b6ed6 ] It seems that the firmware is broken and does not accept the UAC_EP_CS_ATTR_SAMPLE_RATE URB. There is only one rate (48000Hz) available in the descriptors for the output endpoint. Create a new quirk QUIRK_FLAG_FIXED_RATE to skip the rate setup when only one rate is available (fixed). BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=216798 Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20221215153037.1163786-1-perex@perex.cz Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/card.h | 1 + sound/usb/endpoint.c | 16 ++++++++++------ sound/usb/endpoint.h | 3 ++- sound/usb/implicit.c | 6 +++++- sound/usb/implicit.h | 2 +- sound/usb/pcm.c | 36 +++++++++++++++++++++++++++++++++--- sound/usb/pcm.h | 2 ++ sound/usb/quirks.c | 2 ++ sound/usb/usbaudio.h | 4 ++++ 9 files changed, 60 insertions(+), 12 deletions(-) diff --git a/sound/usb/card.h b/sound/usb/card.h index 40061550105a..6ec95b2edf86 100644 --- a/sound/usb/card.h +++ b/sound/usb/card.h @@ -131,6 +131,7 @@ struct snd_usb_endpoint { bool lowlatency_playback; /* low-latency playback mode */ bool need_setup; /* (re-)need for hw_params? */ bool need_prepare; /* (re-)need for prepare? */ + bool fixed_rate; /* skip rate setup */ /* for hw constraints */ const struct audioformat *cur_audiofmt; diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 4aaf0784940b..419302e2057e 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -769,7 +769,8 @@ struct snd_usb_endpoint * snd_usb_endpoint_open(struct snd_usb_audio *chip, const struct audioformat *fp, const struct snd_pcm_hw_params *params, - bool is_sync_ep) + bool is_sync_ep, + bool fixed_rate) { struct snd_usb_endpoint *ep; int ep_num = is_sync_ep ? fp->sync_ep : fp->endpoint; @@ -825,6 +826,7 @@ snd_usb_endpoint_open(struct snd_usb_audio *chip, ep->implicit_fb_sync = fp->implicit_fb; ep->need_setup = true; ep->need_prepare = true; + ep->fixed_rate = fixed_rate; usb_audio_dbg(chip, " channels=%d, rate=%d, format=%s, period_bytes=%d, periods=%d, implicit_fb=%d\n", ep->cur_channels, ep->cur_rate, @@ -1413,11 +1415,13 @@ static int init_sample_rate(struct snd_usb_audio *chip, if (clock && !clock->need_setup) return 0; - err = snd_usb_init_sample_rate(chip, ep->cur_audiofmt, rate); - if (err < 0) { - if (clock) - clock->rate = 0; /* reset rate */ - return err; + if (!ep->fixed_rate) { + err = snd_usb_init_sample_rate(chip, ep->cur_audiofmt, rate); + if (err < 0) { + if (clock) + clock->rate = 0; /* reset rate */ + return err; + } } if (clock) diff --git a/sound/usb/endpoint.h b/sound/usb/endpoint.h index e67ea28faa54..924f4351588c 100644 --- a/sound/usb/endpoint.h +++ b/sound/usb/endpoint.h @@ -14,7 +14,8 @@ struct snd_usb_endpoint * snd_usb_endpoint_open(struct snd_usb_audio *chip, const struct audioformat *fp, const struct snd_pcm_hw_params *params, - bool is_sync_ep); + bool is_sync_ep, + bool fixed_rate); void snd_usb_endpoint_close(struct snd_usb_audio *chip, struct snd_usb_endpoint *ep); int snd_usb_endpoint_set_params(struct snd_usb_audio *chip, diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index f3e8484b3d9c..41ac7185b42b 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -15,6 +15,7 @@ #include "usbaudio.h" #include "card.h" #include "helper.h" +#include "pcm.h" #include "implicit.h" enum { @@ -455,7 +456,8 @@ const struct audioformat * snd_usb_find_implicit_fb_sync_format(struct snd_usb_audio *chip, const struct audioformat *target, const struct snd_pcm_hw_params *params, - int stream) + int stream, + bool *fixed_rate) { struct snd_usb_substream *subs; const struct audioformat *fp, *sync_fmt = NULL; @@ -483,6 +485,8 @@ snd_usb_find_implicit_fb_sync_format(struct snd_usb_audio *chip, } } + if (fixed_rate) + *fixed_rate = snd_usb_pcm_has_fixed_rate(subs); return sync_fmt; } diff --git a/sound/usb/implicit.h b/sound/usb/implicit.h index ccb415a0ea86..7f1577b6c4d3 100644 --- a/sound/usb/implicit.h +++ b/sound/usb/implicit.h @@ -9,6 +9,6 @@ const struct audioformat * snd_usb_find_implicit_fb_sync_format(struct snd_usb_audio *chip, const struct audioformat *target, const struct snd_pcm_hw_params *params, - int stream); + int stream, bool *fixed_rate); #endif /* __USBAUDIO_IMPLICIT_H */ diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 9557bd4d1bbc..99a66d0ef5b2 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -157,6 +157,31 @@ find_substream_format(struct snd_usb_substream *subs, true, subs); } +bool snd_usb_pcm_has_fixed_rate(struct snd_usb_substream *subs) +{ + const struct audioformat *fp; + struct snd_usb_audio *chip = subs->stream->chip; + int rate = -1; + + if (!(chip->quirk_flags & QUIRK_FLAG_FIXED_RATE)) + return false; + list_for_each_entry(fp, &subs->fmt_list, list) { + if (fp->rates & SNDRV_PCM_RATE_CONTINUOUS) + return false; + if (fp->nr_rates < 1) + continue; + if (fp->nr_rates > 1) + return false; + if (rate < 0) { + rate = fp->rate_table[0]; + continue; + } + if (rate != fp->rate_table[0]) + return false; + } + return true; +} + static int init_pitch_v1(struct snd_usb_audio *chip, int ep) { struct usb_device *dev = chip->dev; @@ -450,12 +475,14 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream, struct snd_usb_audio *chip = subs->stream->chip; const struct audioformat *fmt; const struct audioformat *sync_fmt; + bool fixed_rate, sync_fixed_rate; int ret; ret = snd_media_start_pipeline(subs); if (ret) return ret; + fixed_rate = snd_usb_pcm_has_fixed_rate(subs); fmt = find_substream_format(subs, hw_params); if (!fmt) { usb_audio_dbg(chip, @@ -469,7 +496,8 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream, if (fmt->implicit_fb) { sync_fmt = snd_usb_find_implicit_fb_sync_format(chip, fmt, hw_params, - !substream->stream); + !substream->stream, + &sync_fixed_rate); if (!sync_fmt) { usb_audio_dbg(chip, "cannot find sync format: ep=0x%x, iface=%d:%d, format=%s, rate=%d, channels=%d\n", @@ -482,6 +510,7 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream, } } else { sync_fmt = fmt; + sync_fixed_rate = fixed_rate; } ret = snd_usb_lock_shutdown(chip); @@ -499,7 +528,7 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream, close_endpoints(chip, subs); } - subs->data_endpoint = snd_usb_endpoint_open(chip, fmt, hw_params, false); + subs->data_endpoint = snd_usb_endpoint_open(chip, fmt, hw_params, false, fixed_rate); if (!subs->data_endpoint) { ret = -EINVAL; goto unlock; @@ -508,7 +537,8 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream, if (fmt->sync_ep) { subs->sync_endpoint = snd_usb_endpoint_open(chip, sync_fmt, hw_params, - fmt == sync_fmt); + fmt == sync_fmt, + sync_fixed_rate); if (!subs->sync_endpoint) { ret = -EINVAL; goto unlock; diff --git a/sound/usb/pcm.h b/sound/usb/pcm.h index 493a4e34d78d..388fe2ba346d 100644 --- a/sound/usb/pcm.h +++ b/sound/usb/pcm.h @@ -6,6 +6,8 @@ void snd_usb_set_pcm_ops(struct snd_pcm *pcm, int stream); int snd_usb_pcm_suspend(struct snd_usb_stream *as); int snd_usb_pcm_resume(struct snd_usb_stream *as); +bool snd_usb_pcm_has_fixed_rate(struct snd_usb_substream *as); + int snd_usb_init_pitch(struct snd_usb_audio *chip, const struct audioformat *fmt); void snd_usb_preallocate_buffer(struct snd_usb_substream *subs); diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 58b37bfc885c..3d13fdf7590c 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2152,6 +2152,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */ QUIRK_FLAG_IFACE_SKIP_CLOSE), + DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ + QUIRK_FLAG_FIXED_RATE), /* Vendor matches */ VENDOR_FLG(0x045e, /* MS Lifecam */ diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index 2aba508a4831..f5a8dca66457 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -175,6 +175,9 @@ extern bool snd_usb_skip_validation; * QUIRK_FLAG_FORCE_IFACE_RESET * Force an interface reset whenever stopping & restarting a stream * (e.g. after xrun) + * QUIRK_FLAG_FIXED_RATE + * Do not set PCM rate (frequency) when only one rate is available + * for the given endpoint. */ #define QUIRK_FLAG_GET_SAMPLE_RATE (1U << 0) @@ -198,5 +201,6 @@ extern bool snd_usb_skip_validation; #define QUIRK_FLAG_SKIP_IMPLICIT_FB (1U << 18) #define QUIRK_FLAG_IFACE_SKIP_CLOSE (1U << 19) #define QUIRK_FLAG_FORCE_IFACE_RESET (1U << 20) +#define QUIRK_FLAG_FIXED_RATE (1U << 21) #endif /* __USBAUDIO_H */ From 9e72fec62f315117e195b38c5c89b1cd53e9f703 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 27 Oct 2022 16:45:11 -0400 Subject: [PATCH 060/207] fs: dlm: fix sock release if listen fails commit 08ae0547e75ec3d062b6b6b9cf4830c730df68df upstream. This patch fixes a double sock_release() call when the listen() is called for the dlm lowcomms listen socket. The caller of dlm_listen_for_all should never care about releasing the socket if dlm_listen_for_all() fails, it's done now only once if listen() fails. Cc: stable@vger.kernel.org Fixes: 2dc6b1158c28 ("fs: dlm: introduce generic listen") Signed-off-by: Alexander Aring Signed-off-by: David Teigland Signed-off-by: Greg Kroah-Hartman --- fs/dlm/lowcomms.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 59f64c596233..2cb9f3b49e05 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1820,7 +1820,7 @@ static int dlm_listen_for_all(void) result = sock->ops->listen(sock, 5); if (result < 0) { dlm_close_sock(&listen_con.sock); - goto out; + return result; } return 0; @@ -2023,7 +2023,6 @@ fail_listen: dlm_proto_ops = NULL; fail_proto_ops: dlm_allow_conn = 0; - dlm_close_sock(&listen_con.sock); work_stop(); fail_local: deinit_local(); From 5a52012ff8b0c14354085bd86759ed81c1873acf Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 27 Oct 2022 16:45:12 -0400 Subject: [PATCH 061/207] fs: dlm: retry accept() until -EAGAIN or error returns commit f0f4bb431bd543ed7bebbaea3ce326cfcd5388bc upstream. This patch fixes a race if we get two times an socket data ready event while the listen connection worker is queued. Currently it will be served only once but we need to do it (in this case twice) until we hit -EAGAIN which tells us there is no pending accept going on. This patch wraps an do while loop until we receive a return value which is different than 0 as it was done before commit d11ccd451b65 ("fs: dlm: listen socket out of connection hash"). Cc: stable@vger.kernel.org Fixes: d11ccd451b65 ("fs: dlm: listen socket out of connection hash") Signed-off-by: Alexander Aring Signed-off-by: David Teigland Signed-off-by: Greg Kroah-Hartman --- fs/dlm/lowcomms.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 2cb9f3b49e05..871d4e9f49fb 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1543,7 +1543,11 @@ static void process_recv_sockets(struct work_struct *work) static void process_listen_recv_socket(struct work_struct *work) { - accept_from_sock(&listen_con); + int ret; + + do { + ret = accept_from_sock(&listen_con); + } while (!ret); } static void dlm_connect(struct connection *con) From 993aad51b6ce978f687c1f77906e13c9bef70cef Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 9 Dec 2022 16:28:07 -0800 Subject: [PATCH 062/207] mptcp: netlink: fix some error return code commit e0fe1123ab2b07d2cd5475660bd0b4e6993ffaa7 upstream. Fix to return negative error code -EINVAL from some error handling case instead of 0, as done elsewhere in those functions. Fixes: 9ab4807c84a4 ("mptcp: netlink: Add MPTCP_PM_CMD_ANNOUNCE") Fixes: 702c2f646d42 ("mptcp: netlink: allow userspace-driven subflow establishment") Cc: stable@vger.kernel.org Reviewed-by: Matthieu Baerts Signed-off-by: Wei Yongjun Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_userspace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 9e82250cbb70..0430415357ba 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -156,6 +156,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) if (addr_val.addr.id == 0 || !(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { GENL_SET_ERR_MSG(info, "invalid addr id or flags"); + err = -EINVAL; goto announce_err; } @@ -282,6 +283,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) if (addr_l.id == 0) { NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local addr id"); + err = -EINVAL; goto create_err; } @@ -395,11 +397,13 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) if (addr_l.family != addr_r.family) { GENL_SET_ERR_MSG(info, "address families do not match"); + err = -EINVAL; goto destroy_err; } if (!addr_l.port || !addr_r.port) { GENL_SET_ERR_MSG(info, "missing local or remote port"); + err = -EINVAL; goto destroy_err; } From 01a3015206857df81f2db16c59cef2de2257f266 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Fri, 9 Dec 2022 16:28:08 -0800 Subject: [PATCH 063/207] mptcp: remove MPTCP 'ifdef' in TCP SYN cookies commit 3fff88186f047627bb128d65155f42517f8e448f upstream. To ease the maintenance, it is often recommended to avoid having #ifdef preprocessor conditions. Here the section related to CONFIG_MPTCP was quite short but the next commit needs to add more code around. It is then cleaner to move specific MPTCP code to functions located in net/mptcp directory. Now that mptcp_subflow_request_sock_ops structure can be static, it can also be marked as "read only after init". Suggested-by: Paolo Abeni Reviewed-by: Mat Martineau Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/net/mptcp.h | 12 ++++++++++-- net/ipv4/syncookies.c | 7 +++---- net/mptcp/subflow.c | 12 +++++++++++- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 412479ebf5ad..3c5c68618fcc 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -97,8 +97,6 @@ struct mptcp_out_options { }; #ifdef CONFIG_MPTCP -extern struct request_sock_ops mptcp_subflow_request_sock_ops; - void mptcp_init(void); static inline bool sk_is_mptcp(const struct sock *sk) @@ -188,6 +186,9 @@ void mptcp_seq_show(struct seq_file *seq); int mptcp_subflow_init_cookie_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb); +struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk_listener, + bool attach_listener); __be32 mptcp_get_reset_option(const struct sk_buff *skb); @@ -274,6 +275,13 @@ static inline int mptcp_subflow_init_cookie_req(struct request_sock *req, return 0; /* TCP fallback */ } +static inline struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk_listener, + bool attach_listener) +{ + return NULL; +} + static inline __be32 mptcp_reset_option(const struct sk_buff *skb) { return htonl(0u); } #endif /* CONFIG_MPTCP */ diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 942d2dfa1115..26fb97d1d4d9 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -288,12 +288,11 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, struct tcp_request_sock *treq; struct request_sock *req; -#ifdef CONFIG_MPTCP if (sk_is_mptcp(sk)) - ops = &mptcp_subflow_request_sock_ops; -#endif + req = mptcp_subflow_reqsk_alloc(ops, sk, false); + else + req = inet_reqsk_alloc(ops, sk, false); - req = inet_reqsk_alloc(ops, sk, false); if (!req) return NULL; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 2159b5f9988f..3f670f2d5c5c 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -529,7 +529,7 @@ static int subflow_v6_rebuild_header(struct sock *sk) } #endif -struct request_sock_ops mptcp_subflow_request_sock_ops; +static struct request_sock_ops mptcp_subflow_request_sock_ops __ro_after_init; static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops __ro_after_init; static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) @@ -582,6 +582,16 @@ drop: } #endif +struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk_listener, + bool attach_listener) +{ + ops = &mptcp_subflow_request_sock_ops; + + return inet_reqsk_alloc(ops, sk_listener, attach_listener); +} +EXPORT_SYMBOL(mptcp_subflow_reqsk_alloc); + /* validate hmac received in third ACK */ static bool subflow_hmac_valid(const struct request_sock *req, const struct mptcp_options_received *mp_opt) From d3901b9004a02d66ba4e1477bf16d65fe708a007 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Fri, 9 Dec 2022 16:28:09 -0800 Subject: [PATCH 064/207] mptcp: dedicated request sock for subflow in v6 commit 34b21d1ddc8ace77a8fa35c1b1e06377209e0dae upstream. tcp_request_sock_ops structure is specific to IPv4. It should then not be used with MPTCP subflows on top of IPv6. For example, it contains the 'family' field, initialised to AF_INET. This 'family' field is used by TCP FastOpen code to generate the cookie but also by TCP Metrics, SELinux and SYN Cookies. Using the wrong family will not lead to crashes but displaying/using/checking wrong things. Note that 'send_reset' callback from request_sock_ops structure is used in some error paths. It is then also important to use the correct one for IPv4 or IPv6. The slab name can also be different in IPv4 and IPv6, it will be used when printing some log messages. The slab pointer will anyway be the same because the object size is the same for both v4 and v6. A BUILD_BUG_ON() has also been added to make sure this size is the same. Fixes: cec37a6e41aa ("mptcp: Handle MP_CAPABLE options for outgoing connections") Reviewed-by: Mat Martineau Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/subflow.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 3f670f2d5c5c..30524dd7d0ec 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -529,7 +529,7 @@ static int subflow_v6_rebuild_header(struct sock *sk) } #endif -static struct request_sock_ops mptcp_subflow_request_sock_ops __ro_after_init; +static struct request_sock_ops mptcp_subflow_v4_request_sock_ops __ro_after_init; static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops __ro_after_init; static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) @@ -542,7 +542,7 @@ static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) goto drop; - return tcp_conn_request(&mptcp_subflow_request_sock_ops, + return tcp_conn_request(&mptcp_subflow_v4_request_sock_ops, &subflow_request_sock_ipv4_ops, sk, skb); drop: @@ -551,6 +551,7 @@ drop: } #if IS_ENABLED(CONFIG_MPTCP_IPV6) +static struct request_sock_ops mptcp_subflow_v6_request_sock_ops __ro_after_init; static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init; static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init; static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init; @@ -573,7 +574,7 @@ static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) return 0; } - return tcp_conn_request(&mptcp_subflow_request_sock_ops, + return tcp_conn_request(&mptcp_subflow_v6_request_sock_ops, &subflow_request_sock_ipv6_ops, sk, skb); drop: @@ -586,7 +587,12 @@ struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *op struct sock *sk_listener, bool attach_listener) { - ops = &mptcp_subflow_request_sock_ops; + if (ops->family == AF_INET) + ops = &mptcp_subflow_v4_request_sock_ops; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + else if (ops->family == AF_INET6) + ops = &mptcp_subflow_v6_request_sock_ops; +#endif return inet_reqsk_alloc(ops, sk_listener, attach_listener); } @@ -1914,7 +1920,6 @@ static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = { static int subflow_ops_init(struct request_sock_ops *subflow_ops) { subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock); - subflow_ops->slab_name = "request_sock_subflow"; subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name, subflow_ops->obj_size, 0, @@ -1931,9 +1936,10 @@ static int subflow_ops_init(struct request_sock_ops *subflow_ops) void __init mptcp_subflow_init(void) { - mptcp_subflow_request_sock_ops = tcp_request_sock_ops; - if (subflow_ops_init(&mptcp_subflow_request_sock_ops) != 0) - panic("MPTCP: failed to init subflow request sock ops\n"); + mptcp_subflow_v4_request_sock_ops = tcp_request_sock_ops; + mptcp_subflow_v4_request_sock_ops.slab_name = "request_sock_subflow_v4"; + if (subflow_ops_init(&mptcp_subflow_v4_request_sock_ops) != 0) + panic("MPTCP: failed to init subflow v4 request sock ops\n"); subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops; subflow_request_sock_ipv4_ops.route_req = subflow_v4_route_req; @@ -1948,6 +1954,18 @@ void __init mptcp_subflow_init(void) tcp_prot_override.release_cb = tcp_release_cb_override; #if IS_ENABLED(CONFIG_MPTCP_IPV6) + /* In struct mptcp_subflow_request_sock, we assume the TCP request sock + * structures for v4 and v6 have the same size. It should not changed in + * the future but better to make sure to be warned if it is no longer + * the case. + */ + BUILD_BUG_ON(sizeof(struct tcp_request_sock) != sizeof(struct tcp6_request_sock)); + + mptcp_subflow_v6_request_sock_ops = tcp6_request_sock_ops; + mptcp_subflow_v6_request_sock_ops.slab_name = "request_sock_subflow_v6"; + if (subflow_ops_init(&mptcp_subflow_v6_request_sock_ops) != 0) + panic("MPTCP: failed to init subflow v6 request sock ops\n"); + subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops; subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req; From 1922ea6b0ae2ea0c9a09be0eafafe1cd1069d259 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Fri, 9 Dec 2022 16:28:10 -0800 Subject: [PATCH 065/207] mptcp: use proper req destructor for IPv6 commit d3295fee3c756ece33ac0d935e172e68c0a4161b upstream. Before, only the destructor from TCP request sock in IPv4 was called even if the subflow was IPv6. It is important to use the right destructor to avoid memory leaks with some advanced IPv6 features, e.g. when the request socks contain specific IPv6 options. Fixes: 79c0949e9a09 ("mptcp: Add key generation and token tree") Reviewed-by: Mat Martineau Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/subflow.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 30524dd7d0ec..613f515fedf0 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -45,7 +45,6 @@ static void subflow_req_destructor(struct request_sock *req) sock_put((struct sock *)subflow_req->msk); mptcp_token_destroy_request(req); - tcp_request_sock_ops.destructor(req); } static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2, @@ -550,6 +549,12 @@ drop: return 0; } +static void subflow_v4_req_destructor(struct request_sock *req) +{ + subflow_req_destructor(req); + tcp_request_sock_ops.destructor(req); +} + #if IS_ENABLED(CONFIG_MPTCP_IPV6) static struct request_sock_ops mptcp_subflow_v6_request_sock_ops __ro_after_init; static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init; @@ -581,6 +586,12 @@ drop: tcp_listendrop(sk); return 0; /* don't send reset */ } + +static void subflow_v6_req_destructor(struct request_sock *req) +{ + subflow_req_destructor(req); + tcp6_request_sock_ops.destructor(req); +} #endif struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops, @@ -1929,8 +1940,6 @@ static int subflow_ops_init(struct request_sock_ops *subflow_ops) if (!subflow_ops->slab) return -ENOMEM; - subflow_ops->destructor = subflow_req_destructor; - return 0; } @@ -1938,6 +1947,8 @@ void __init mptcp_subflow_init(void) { mptcp_subflow_v4_request_sock_ops = tcp_request_sock_ops; mptcp_subflow_v4_request_sock_ops.slab_name = "request_sock_subflow_v4"; + mptcp_subflow_v4_request_sock_ops.destructor = subflow_v4_req_destructor; + if (subflow_ops_init(&mptcp_subflow_v4_request_sock_ops) != 0) panic("MPTCP: failed to init subflow v4 request sock ops\n"); @@ -1963,6 +1974,8 @@ void __init mptcp_subflow_init(void) mptcp_subflow_v6_request_sock_ops = tcp6_request_sock_ops; mptcp_subflow_v6_request_sock_ops.slab_name = "request_sock_subflow_v6"; + mptcp_subflow_v6_request_sock_ops.destructor = subflow_v6_req_destructor; + if (subflow_ops_init(&mptcp_subflow_v6_request_sock_ops) != 0) panic("MPTCP: failed to init subflow v6 request sock ops\n"); From f74b7c5a85e22cd9091845e0d62a1dd89d0f855f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 30 Nov 2022 13:26:32 -0500 Subject: [PATCH 066/207] dm cache: Fix ABBA deadlock between shrink_slab and dm_cache_metadata_abort commit 352b837a5541690d4f843819028cf2b8be83d424 upstream. Same ABBA deadlock pattern fixed in commit 4b60f452ec51 ("dm thin: Fix ABBA deadlock between shrink_slab and dm_pool_abort_metadata") to DM-cache's metadata. Reported-by: Zhihao Cheng Cc: stable@vger.kernel.org Fixes: 028ae9f76f29 ("dm cache: add fail io mode and needs_check flag") Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-metadata.c | 54 +++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index ab13b7380265..83a5975bcc72 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -551,11 +551,13 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd, return r; } -static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd) +static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd, + bool destroy_bm) { dm_sm_destroy(cmd->metadata_sm); dm_tm_destroy(cmd->tm); - dm_block_manager_destroy(cmd->bm); + if (destroy_bm) + dm_block_manager_destroy(cmd->bm); } typedef unsigned long (*flags_mutator)(unsigned long); @@ -826,7 +828,7 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, cmd2 = lookup(bdev); if (cmd2) { mutex_unlock(&table_lock); - __destroy_persistent_data_objects(cmd); + __destroy_persistent_data_objects(cmd, true); kfree(cmd); return cmd2; } @@ -874,7 +876,7 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd) mutex_unlock(&table_lock); if (!cmd->fail_io) - __destroy_persistent_data_objects(cmd); + __destroy_persistent_data_objects(cmd, true); kfree(cmd); } } @@ -1807,14 +1809,52 @@ int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result) int dm_cache_metadata_abort(struct dm_cache_metadata *cmd) { - int r; + int r = -EINVAL; + struct dm_block_manager *old_bm = NULL, *new_bm = NULL; + + /* fail_io is double-checked with cmd->root_lock held below */ + if (unlikely(cmd->fail_io)) + return r; + + /* + * Replacement block manager (new_bm) is created and old_bm destroyed outside of + * cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of + * shrinker associated with the block manager's bufio client vs cmd root_lock). + * - must take shrinker_rwsem without holding cmd->root_lock + */ + new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, + CACHE_MAX_CONCURRENT_LOCKS); WRITE_LOCK(cmd); - __destroy_persistent_data_objects(cmd); - r = __create_persistent_data_objects(cmd, false); + if (cmd->fail_io) { + WRITE_UNLOCK(cmd); + goto out; + } + + __destroy_persistent_data_objects(cmd, false); + old_bm = cmd->bm; + if (IS_ERR(new_bm)) { + DMERR("could not create block manager during abort"); + cmd->bm = NULL; + r = PTR_ERR(new_bm); + goto out_unlock; + } + + cmd->bm = new_bm; + r = __open_or_format_metadata(cmd, false); + if (r) { + cmd->bm = NULL; + goto out_unlock; + } + new_bm = NULL; +out_unlock: if (r) cmd->fail_io = true; WRITE_UNLOCK(cmd); + dm_block_manager_destroy(old_bm); +out: + if (new_bm && !IS_ERR(new_bm)) + dm_block_manager_destroy(new_bm); return r; } From cdf7a39bcc427febbfe3c3b9fe829825ead96c27 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Wed, 30 Nov 2022 21:31:34 +0800 Subject: [PATCH 067/207] dm thin: Fix ABBA deadlock between shrink_slab and dm_pool_abort_metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8111964f1b8524c4bb56b02cd9c7a37725ea21fd upstream. Following concurrent processes: P1(drop cache) P2(kworker) drop_caches_sysctl_handler drop_slab shrink_slab down_read(&shrinker_rwsem) - LOCK A do_shrink_slab super_cache_scan prune_icache_sb dispose_list evict ext4_evict_inode ext4_clear_inode ext4_discard_preallocations ext4_mb_load_buddy_gfp ext4_mb_init_cache ext4_read_block_bitmap_nowait ext4_read_bh_nowait submit_bh dm_submit_bio do_worker process_deferred_bios commit metadata_operation_failed dm_pool_abort_metadata down_write(&pmd->root_lock) - LOCK B __destroy_persistent_data_objects dm_block_manager_destroy dm_bufio_client_destroy unregister_shrinker down_write(&shrinker_rwsem) thin_map | dm_thin_find_block ↓ down_read(&pmd->root_lock) --> ABBA deadlock , which triggers hung task: [ 76.974820] INFO: task kworker/u4:3:63 blocked for more than 15 seconds. [ 76.976019] Not tainted 6.1.0-rc4-00011-g8f17dd350364-dirty #910 [ 76.978521] task:kworker/u4:3 state:D stack:0 pid:63 ppid:2 [ 76.978534] Workqueue: dm-thin do_worker [ 76.978552] Call Trace: [ 76.978564] __schedule+0x6ba/0x10f0 [ 76.978582] schedule+0x9d/0x1e0 [ 76.978588] rwsem_down_write_slowpath+0x587/0xdf0 [ 76.978600] down_write+0xec/0x110 [ 76.978607] unregister_shrinker+0x2c/0xf0 [ 76.978616] dm_bufio_client_destroy+0x116/0x3d0 [ 76.978625] dm_block_manager_destroy+0x19/0x40 [ 76.978629] __destroy_persistent_data_objects+0x5e/0x70 [ 76.978636] dm_pool_abort_metadata+0x8e/0x100 [ 76.978643] metadata_operation_failed+0x86/0x110 [ 76.978649] commit+0x6a/0x230 [ 76.978655] do_worker+0xc6e/0xd90 [ 76.978702] process_one_work+0x269/0x630 [ 76.978714] worker_thread+0x266/0x630 [ 76.978730] kthread+0x151/0x1b0 [ 76.978772] INFO: task test.sh:2646 blocked for more than 15 seconds. [ 76.979756] Not tainted 6.1.0-rc4-00011-g8f17dd350364-dirty #910 [ 76.982111] task:test.sh state:D stack:0 pid:2646 ppid:2459 [ 76.982128] Call Trace: [ 76.982139] __schedule+0x6ba/0x10f0 [ 76.982155] schedule+0x9d/0x1e0 [ 76.982159] rwsem_down_read_slowpath+0x4f4/0x910 [ 76.982173] down_read+0x84/0x170 [ 76.982177] dm_thin_find_block+0x4c/0xd0 [ 76.982183] thin_map+0x201/0x3d0 [ 76.982188] __map_bio+0x5b/0x350 [ 76.982195] dm_submit_bio+0x2b6/0x930 [ 76.982202] __submit_bio+0x123/0x2d0 [ 76.982209] submit_bio_noacct_nocheck+0x101/0x3e0 [ 76.982222] submit_bio_noacct+0x389/0x770 [ 76.982227] submit_bio+0x50/0xc0 [ 76.982232] submit_bh_wbc+0x15e/0x230 [ 76.982238] submit_bh+0x14/0x20 [ 76.982241] ext4_read_bh_nowait+0xc5/0x130 [ 76.982247] ext4_read_block_bitmap_nowait+0x340/0xc60 [ 76.982254] ext4_mb_init_cache+0x1ce/0xdc0 [ 76.982259] ext4_mb_load_buddy_gfp+0x987/0xfa0 [ 76.982263] ext4_discard_preallocations+0x45d/0x830 [ 76.982274] ext4_clear_inode+0x48/0xf0 [ 76.982280] ext4_evict_inode+0xcf/0xc70 [ 76.982285] evict+0x119/0x2b0 [ 76.982290] dispose_list+0x43/0xa0 [ 76.982294] prune_icache_sb+0x64/0x90 [ 76.982298] super_cache_scan+0x155/0x210 [ 76.982303] do_shrink_slab+0x19e/0x4e0 [ 76.982310] shrink_slab+0x2bd/0x450 [ 76.982317] drop_slab+0xcc/0x1a0 [ 76.982323] drop_caches_sysctl_handler+0xb7/0xe0 [ 76.982327] proc_sys_call_handler+0x1bc/0x300 [ 76.982331] proc_sys_write+0x17/0x20 [ 76.982334] vfs_write+0x3d3/0x570 [ 76.982342] ksys_write+0x73/0x160 [ 76.982347] __x64_sys_write+0x1e/0x30 [ 76.982352] do_syscall_64+0x35/0x80 [ 76.982357] entry_SYSCALL_64_after_hwframe+0x63/0xcd Function metadata_operation_failed() is called when operations failed on dm pool metadata, dm pool will destroy and recreate metadata. So, shrinker will be unregistered and registered, which could down write shrinker_rwsem under pmd_write_lock. Fix it by allocating dm_block_manager before locking pmd->root_lock and destroying old dm_block_manager after unlocking pmd->root_lock, then old dm_block_manager is replaced with new dm_block_manager under pmd->root_lock. So, shrinker register/unregister could be done without holding pmd->root_lock. Fetch a reproducer in [Link]. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216676 Cc: stable@vger.kernel.org #v5.2+ Fixes: e49e582965b3 ("dm thin: add read only and fail io modes") Signed-off-by: Zhihao Cheng Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin-metadata.c | 51 +++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index a27395c8621f..1a62226ac34e 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -776,13 +776,15 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool f return r; } -static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd) +static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd, + bool destroy_bm) { dm_sm_destroy(pmd->data_sm); dm_sm_destroy(pmd->metadata_sm); dm_tm_destroy(pmd->nb_tm); dm_tm_destroy(pmd->tm); - dm_block_manager_destroy(pmd->bm); + if (destroy_bm) + dm_block_manager_destroy(pmd->bm); } static int __begin_transaction(struct dm_pool_metadata *pmd) @@ -989,7 +991,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) } pmd_write_unlock(pmd); if (!pmd->fail_io) - __destroy_persistent_data_objects(pmd); + __destroy_persistent_data_objects(pmd, true); kfree(pmd); return 0; @@ -1860,19 +1862,52 @@ static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd) int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) { int r = -EINVAL; + struct dm_block_manager *old_bm = NULL, *new_bm = NULL; + + /* fail_io is double-checked with pmd->root_lock held below */ + if (unlikely(pmd->fail_io)) + return r; + + /* + * Replacement block manager (new_bm) is created and old_bm destroyed outside of + * pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of + * shrinker associated with the block manager's bufio client vs pmd root_lock). + * - must take shrinker_rwsem without holding pmd->root_lock + */ + new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT, + THIN_MAX_CONCURRENT_LOCKS); pmd_write_lock(pmd); - if (pmd->fail_io) + if (pmd->fail_io) { + pmd_write_unlock(pmd); goto out; + } __set_abort_with_changes_flags(pmd); - __destroy_persistent_data_objects(pmd); - r = __create_persistent_data_objects(pmd, false); + __destroy_persistent_data_objects(pmd, false); + old_bm = pmd->bm; + if (IS_ERR(new_bm)) { + DMERR("could not create block manager during abort"); + pmd->bm = NULL; + r = PTR_ERR(new_bm); + goto out_unlock; + } + + pmd->bm = new_bm; + r = __open_or_format_metadata(pmd, false); + if (r) { + pmd->bm = NULL; + goto out_unlock; + } + new_bm = NULL; +out_unlock: if (r) pmd->fail_io = true; - -out: pmd_write_unlock(pmd); + dm_block_manager_destroy(old_bm); +out: + if (new_bm && !IS_ERR(new_bm)) + dm_block_manager_destroy(new_bm); return r; } From b91f481300e3a10eaf66b94fc39b740928762aaf Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Thu, 8 Dec 2022 22:28:02 +0800 Subject: [PATCH 068/207] dm thin: Use last transaction's pmd->root when commit failed commit 7991dbff6849f67e823b7cc0c15e5a90b0549b9f upstream. Recently we found a softlock up problem in dm thin pool btree lookup code due to corrupted metadata: Kernel panic - not syncing: softlockup: hung tasks CPU: 7 PID: 2669225 Comm: kworker/u16:3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) Workqueue: dm-thin do_worker [dm_thin_pool] Call Trace: dump_stack+0x9c/0xd3 panic+0x35d/0x6b9 watchdog_timer_fn.cold+0x16/0x25 __run_hrtimer+0xa2/0x2d0 RIP: 0010:__relink_lru+0x102/0x220 [dm_bufio] __bufio_new+0x11f/0x4f0 [dm_bufio] new_read+0xa3/0x1e0 [dm_bufio] dm_bm_read_lock+0x33/0xd0 [dm_persistent_data] ro_step+0x63/0x100 [dm_persistent_data] btree_lookup_raw.constprop.0+0x44/0x220 [dm_persistent_data] dm_btree_lookup+0x16f/0x210 [dm_persistent_data] dm_thin_find_block+0x12c/0x210 [dm_thin_pool] __process_bio_read_only+0xc5/0x400 [dm_thin_pool] process_thin_deferred_bios+0x1a4/0x4a0 [dm_thin_pool] process_one_work+0x3c5/0x730 Following process may generate a broken btree mixed with fresh and stale btree nodes, which could get dm thin trapped in an infinite loop while looking up data block: Transaction 1: pmd->root = A, A->B->C // One path in btree pmd->root = X, X->Y->Z // Copy-up Transaction 2: X,Z is updated on disk, Y write failed. // Commit failed, dm thin becomes read-only. process_bio_read_only dm_thin_find_block __find_block dm_btree_lookup(pmd->root) The pmd->root points to a broken btree, Y may contain stale node pointing to any block, for example X, which gets dm thin trapped into a dead loop while looking up Z. Fix this by setting pmd->root in __open_metadata(), so that dm thin will use the last transaction's pmd->root if commit failed. Fetch a reproducer in [Link]. Linke: https://bugzilla.kernel.org/show_bug.cgi?id=216790 Cc: stable@vger.kernel.org Fixes: 991d9fa02da0 ("dm: add thin provisioning target") Signed-off-by: Zhihao Cheng Acked-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin-metadata.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 1a62226ac34e..6bcc4c4786d8 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -724,6 +724,15 @@ static int __open_metadata(struct dm_pool_metadata *pmd) goto bad_cleanup_data_sm; } + /* + * For pool metadata opening process, root setting is redundant + * because it will be set again in __begin_transaction(). But dm + * pool aborting process really needs to get last transaction's + * root to avoid accessing broken btree. + */ + pmd->root = le64_to_cpu(disk_super->data_mapping_root); + pmd->details_root = le64_to_cpu(disk_super->device_details_root); + __setup_btree_details(pmd); dm_bm_unlock(sblock); From 82976ba6deb1636a0d63f69da96f804c3cf54d46 Mon Sep 17 00:00:00 2001 From: Luo Meng Date: Wed, 30 Nov 2022 10:09:45 +0800 Subject: [PATCH 069/207] dm thin: resume even if in FAIL mode commit 19eb1650afeb1aa86151f61900e9e5f1de5d8d02 upstream. If a thinpool set fail_io while suspending, resume will fail with: device-mapper: resume ioctl on vg-thinpool failed: Invalid argument The thin-pool also can't be removed if an in-flight bio is in the deferred list. This can be easily reproduced using: echo "offline" > /sys/block/sda/device/state dd if=/dev/zero of=/dev/mapper/thin bs=4K count=1 dmsetup suspend /dev/mapper/pool mkfs.ext4 /dev/mapper/thin dmsetup resume /dev/mapper/pool The root cause is maybe_resize_data_dev() will check fail_io and return error before called dm_resume. Fix this by adding FAIL mode check at the end of pool_preresume(). Cc: stable@vger.kernel.org Fixes: da105ed5fd7e ("dm thin metadata: introduce dm_pool_abort_metadata") Signed-off-by: Luo Meng Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index e76c96c760a9..c3ba30b40321 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3540,20 +3540,28 @@ static int pool_preresume(struct dm_target *ti) */ r = bind_control_target(pool, ti); if (r) - return r; + goto out; r = maybe_resize_data_dev(ti, &need_commit1); if (r) - return r; + goto out; r = maybe_resize_metadata_dev(ti, &need_commit2); if (r) - return r; + goto out; if (need_commit1 || need_commit2) (void) commit(pool); +out: + /* + * When a thin-pool is PM_FAIL, it cannot be rebuilt if + * bio is in deferred list. Therefore need to return 0 + * to allow pool_resume() to flush IO. + */ + if (r && get_pool_mode(pool) == PM_FAIL) + r = 0; - return 0; + return r; } static void pool_suspend_active_thins(struct pool *pool) From d9971fa4d8bde63d49c743c1b32d12fbbd3a30bd Mon Sep 17 00:00:00 2001 From: Luo Meng Date: Tue, 29 Nov 2022 10:48:47 +0800 Subject: [PATCH 070/207] dm thin: Fix UAF in run_timer_softirq() commit 88430ebcbc0ec637b710b947738839848c20feff upstream. When dm_resume() and dm_destroy() are concurrent, it will lead to UAF, as follows: BUG: KASAN: use-after-free in __run_timers+0x173/0x710 Write of size 8 at addr ffff88816d9490f0 by task swapper/0/0 Call Trace: dump_stack_lvl+0x73/0x9f print_report.cold+0x132/0xaa2 _raw_spin_lock_irqsave+0xcd/0x160 __run_timers+0x173/0x710 kasan_report+0xad/0x110 __run_timers+0x173/0x710 __asan_store8+0x9c/0x140 __run_timers+0x173/0x710 call_timer_fn+0x310/0x310 pvclock_clocksource_read+0xfa/0x250 kvm_clock_read+0x2c/0x70 kvm_clock_get_cycles+0xd/0x20 ktime_get+0x5c/0x110 lapic_next_event+0x38/0x50 clockevents_program_event+0xf1/0x1e0 run_timer_softirq+0x49/0x90 __do_softirq+0x16e/0x62c __irq_exit_rcu+0x1fa/0x270 irq_exit_rcu+0x12/0x20 sysvec_apic_timer_interrupt+0x8e/0xc0 One of the concurrency UAF can be shown as below: use free do_resume | __find_device_hash_cell | dm_get | atomic_inc(&md->holders) | | dm_destroy | __dm_destroy | if (!dm_suspended_md(md)) | atomic_read(&md->holders) | msleep(1) dm_resume | __dm_resume | dm_table_resume_targets | pool_resume | do_waker #add delay work | dm_put | atomic_dec(&md->holders) | | dm_table_destroy | pool_dtr | __pool_dec | __pool_destroy | destroy_workqueue | kfree(pool) # free pool time out __do_softirq run_timer_softirq # pool has already been freed This can be easily reproduced using: 1. create thin-pool 2. dmsetup suspend pool 3. dmsetup resume pool 4. dmsetup remove_all # Concurrent with 3 The root cause of this UAF bug is that dm_resume() adds timer after dm_destroy() skips cancelling the timer because of suspend status. After timeout, it will call run_timer_softirq(), however pool has already been freed. The concurrency UAF bug will happen. Therefore, cancelling timer again in __pool_destroy(). Cc: stable@vger.kernel.org Fixes: 991d9fa02da0d ("dm: add thin provisioning target") Signed-off-by: Luo Meng Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index c3ba30b40321..196f82559ad6 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2889,6 +2889,8 @@ static void __pool_destroy(struct pool *pool) dm_bio_prison_destroy(pool->prison); dm_kcopyd_client_destroy(pool->copier); + cancel_delayed_work_sync(&pool->waker); + cancel_delayed_work_sync(&pool->no_space_timeout); if (pool->wq) destroy_workqueue(pool->wq); From b6c93cd61afab061d80cc842333abca97b289774 Mon Sep 17 00:00:00 2001 From: Luo Meng Date: Tue, 29 Nov 2022 10:48:50 +0800 Subject: [PATCH 071/207] dm integrity: Fix UAF in dm_integrity_dtr() commit f50cb2cbabd6c4a60add93d72451728f86e4791c upstream. Dm_integrity also has the same UAF problem when dm_resume() and dm_destroy() are concurrent. Therefore, cancelling timer again in dm_integrity_dtr(). Cc: stable@vger.kernel.org Fixes: 7eada909bfd7a ("dm: add integrity target") Signed-off-by: Luo Meng Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index e97e9f97456d..1388ee35571e 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4558,6 +4558,8 @@ static void dm_integrity_dtr(struct dm_target *ti) BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); BUG_ON(!list_empty(&ic->wait_list)); + if (ic->mode == 'B') + cancel_delayed_work_sync(&ic->bitmap_flush_work); if (ic->metadata_wq) destroy_workqueue(ic->metadata_wq); if (ic->wait_wq) From 9e113cd4f61f3b0000843b2d0a90ce8b40a1fcff Mon Sep 17 00:00:00 2001 From: Luo Meng Date: Tue, 29 Nov 2022 10:48:48 +0800 Subject: [PATCH 072/207] dm clone: Fix UAF in clone_dtr() commit e4b5957c6f749a501c464f92792f1c8e26b61a94 upstream. Dm_clone also has the same UAF problem when dm_resume() and dm_destroy() are concurrent. Therefore, cancelling timer again in clone_dtr(). Cc: stable@vger.kernel.org Fixes: 7431b7835f554 ("dm: add clone target") Signed-off-by: Luo Meng Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-clone-target.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index 2f1cc66d2641..29e0b85eeaf0 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -1958,6 +1958,7 @@ static void clone_dtr(struct dm_target *ti) mempool_exit(&clone->hydration_pool); dm_kcopyd_client_destroy(clone->kcopyd_client); + cancel_delayed_work_sync(&clone->waker); destroy_workqueue(clone->wq); hash_table_exit(clone); dm_clone_metadata_close(clone->cmd); From 6a3e412c2ab131c54945327a7676b006f000a209 Mon Sep 17 00:00:00 2001 From: Luo Meng Date: Tue, 29 Nov 2022 10:48:49 +0800 Subject: [PATCH 073/207] dm cache: Fix UAF in destroy() commit 6a459d8edbdbe7b24db42a5a9f21e6aa9e00c2aa upstream. Dm_cache also has the same UAF problem when dm_resume() and dm_destroy() are concurrent. Therefore, cancelling timer again in destroy(). Cc: stable@vger.kernel.org Fixes: c6b4fcbad044e ("dm: add cache target") Signed-off-by: Luo Meng Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 54a8d5c9a44e..624a6335c832 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1887,6 +1887,7 @@ static void destroy(struct cache *cache) if (cache->prison) dm_bio_prison_destroy_v2(cache->prison); + cancel_delayed_work_sync(&cache->waker); if (cache->wq) destroy_workqueue(cache->wq); From a7e060c3ea3442bd6a4e7d4b0dfeef3733ef2b9c Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 30 Nov 2022 14:02:47 -0500 Subject: [PATCH 074/207] dm cache: set needs_check flag after aborting metadata commit 6b9973861cb2e96dcd0bb0f1baddc5c034207c5c upstream. Otherwise the commit that will be aborted will be associated with the metadata objects that will be torn down. Must write needs_check flag to metadata with a reset block manager. Found through code-inspection (and compared against dm-thin.c). Cc: stable@vger.kernel.org Fixes: 028ae9f76f29 ("dm cache: add fail io mode and needs_check flag") Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 624a6335c832..5e92fac90b67 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -907,16 +907,16 @@ static void abort_transaction(struct cache *cache) if (get_cache_mode(cache) >= CM_READ_ONLY) return; - if (dm_cache_metadata_set_needs_check(cache->cmd)) { - DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name); - set_cache_mode(cache, CM_FAIL); - } - DMERR_LIMIT("%s: aborting current metadata transaction", dev_name); if (dm_cache_metadata_abort(cache->cmd)) { DMERR("%s: failed to abort metadata transaction", dev_name); set_cache_mode(cache, CM_FAIL); } + + if (dm_cache_metadata_set_needs_check(cache->cmd)) { + DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name); + set_cache_mode(cache, CM_FAIL); + } } static void metadata_operation_failed(struct cache *cache, const char *op, int r) From 97e28deab8bfe70d5687650f94484f8f9101e566 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 3 Dec 2022 11:54:25 +0100 Subject: [PATCH 075/207] ata: ahci: fix enum constants for gcc-13 commit f07788079f515ca4a681c5f595bdad19cfbd7b1d upstream. gcc-13 slightly changes the type of constant expressions that are defined in an enum, which triggers a compile time sanity check in libata: linux/drivers/ata/libahci.c: In function 'ahci_led_store': linux/include/linux/compiler_types.h:357:45: error: call to '__compiletime_assert_302' declared with attribute error: BUILD_BUG_ON failed: sizeof(_s) > sizeof(long) 357 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) The new behavior is that sizeof() returns the same value for the constant as it does for the enum type, which is generally more sensible and consistent. The problem in libata is that it contains a single enum definition for lots of unrelated constants, some of which are large positive (unsigned) integers like 0xffffffff, while others like (1<<31) are interpreted as negative integers, and this forces the enum type to become 64 bit wide even though most constants would still fit into a signed 32-bit 'int'. Fix this by changing the entire enum definition to use BIT(x) in place of (1< Cc: linux-ide@vger.kernel.org Cc: Damien Le Moal Cc: stable@vger.kernel.org Cc: Randy Dunlap Signed-off-by: Arnd Bergmann Tested-by: Luis Machado Signed-off-by: Damien Le Moal Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.h | 241 +++++++++++++++++++++++---------------------- 1 file changed, 121 insertions(+), 120 deletions(-) diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 7add8e79912b..ff8e6ae1c636 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -24,6 +24,7 @@ #include #include #include +#include /* Enclosure Management Control */ #define EM_CTRL_MSG_TYPE 0x000f0000 @@ -53,12 +54,12 @@ enum { AHCI_PORT_PRIV_FBS_DMA_SZ = AHCI_CMD_SLOT_SZ + AHCI_CMD_TBL_AR_SZ + (AHCI_RX_FIS_SZ * 16), - AHCI_IRQ_ON_SG = (1 << 31), - AHCI_CMD_ATAPI = (1 << 5), - AHCI_CMD_WRITE = (1 << 6), - AHCI_CMD_PREFETCH = (1 << 7), - AHCI_CMD_RESET = (1 << 8), - AHCI_CMD_CLR_BUSY = (1 << 10), + AHCI_IRQ_ON_SG = BIT(31), + AHCI_CMD_ATAPI = BIT(5), + AHCI_CMD_WRITE = BIT(6), + AHCI_CMD_PREFETCH = BIT(7), + AHCI_CMD_RESET = BIT(8), + AHCI_CMD_CLR_BUSY = BIT(10), RX_FIS_PIO_SETUP = 0x20, /* offset of PIO Setup FIS data */ RX_FIS_D2H_REG = 0x40, /* offset of D2H Register FIS data */ @@ -76,37 +77,37 @@ enum { HOST_CAP2 = 0x24, /* host capabilities, extended */ /* HOST_CTL bits */ - HOST_RESET = (1 << 0), /* reset controller; self-clear */ - HOST_IRQ_EN = (1 << 1), /* global IRQ enable */ - HOST_MRSM = (1 << 2), /* MSI Revert to Single Message */ - HOST_AHCI_EN = (1 << 31), /* AHCI enabled */ + HOST_RESET = BIT(0), /* reset controller; self-clear */ + HOST_IRQ_EN = BIT(1), /* global IRQ enable */ + HOST_MRSM = BIT(2), /* MSI Revert to Single Message */ + HOST_AHCI_EN = BIT(31), /* AHCI enabled */ /* HOST_CAP bits */ - HOST_CAP_SXS = (1 << 5), /* Supports External SATA */ - HOST_CAP_EMS = (1 << 6), /* Enclosure Management support */ - HOST_CAP_CCC = (1 << 7), /* Command Completion Coalescing */ - HOST_CAP_PART = (1 << 13), /* Partial state capable */ - HOST_CAP_SSC = (1 << 14), /* Slumber state capable */ - HOST_CAP_PIO_MULTI = (1 << 15), /* PIO multiple DRQ support */ - HOST_CAP_FBS = (1 << 16), /* FIS-based switching support */ - HOST_CAP_PMP = (1 << 17), /* Port Multiplier support */ - HOST_CAP_ONLY = (1 << 18), /* Supports AHCI mode only */ - HOST_CAP_CLO = (1 << 24), /* Command List Override support */ - HOST_CAP_LED = (1 << 25), /* Supports activity LED */ - HOST_CAP_ALPM = (1 << 26), /* Aggressive Link PM support */ - HOST_CAP_SSS = (1 << 27), /* Staggered Spin-up */ - HOST_CAP_MPS = (1 << 28), /* Mechanical presence switch */ - HOST_CAP_SNTF = (1 << 29), /* SNotification register */ - HOST_CAP_NCQ = (1 << 30), /* Native Command Queueing */ - HOST_CAP_64 = (1 << 31), /* PCI DAC (64-bit DMA) support */ + HOST_CAP_SXS = BIT(5), /* Supports External SATA */ + HOST_CAP_EMS = BIT(6), /* Enclosure Management support */ + HOST_CAP_CCC = BIT(7), /* Command Completion Coalescing */ + HOST_CAP_PART = BIT(13), /* Partial state capable */ + HOST_CAP_SSC = BIT(14), /* Slumber state capable */ + HOST_CAP_PIO_MULTI = BIT(15), /* PIO multiple DRQ support */ + HOST_CAP_FBS = BIT(16), /* FIS-based switching support */ + HOST_CAP_PMP = BIT(17), /* Port Multiplier support */ + HOST_CAP_ONLY = BIT(18), /* Supports AHCI mode only */ + HOST_CAP_CLO = BIT(24), /* Command List Override support */ + HOST_CAP_LED = BIT(25), /* Supports activity LED */ + HOST_CAP_ALPM = BIT(26), /* Aggressive Link PM support */ + HOST_CAP_SSS = BIT(27), /* Staggered Spin-up */ + HOST_CAP_MPS = BIT(28), /* Mechanical presence switch */ + HOST_CAP_SNTF = BIT(29), /* SNotification register */ + HOST_CAP_NCQ = BIT(30), /* Native Command Queueing */ + HOST_CAP_64 = BIT(31), /* PCI DAC (64-bit DMA) support */ /* HOST_CAP2 bits */ - HOST_CAP2_BOH = (1 << 0), /* BIOS/OS handoff supported */ - HOST_CAP2_NVMHCI = (1 << 1), /* NVMHCI supported */ - HOST_CAP2_APST = (1 << 2), /* Automatic partial to slumber */ - HOST_CAP2_SDS = (1 << 3), /* Support device sleep */ - HOST_CAP2_SADM = (1 << 4), /* Support aggressive DevSlp */ - HOST_CAP2_DESO = (1 << 5), /* DevSlp from slumber only */ + HOST_CAP2_BOH = BIT(0), /* BIOS/OS handoff supported */ + HOST_CAP2_NVMHCI = BIT(1), /* NVMHCI supported */ + HOST_CAP2_APST = BIT(2), /* Automatic partial to slumber */ + HOST_CAP2_SDS = BIT(3), /* Support device sleep */ + HOST_CAP2_SADM = BIT(4), /* Support aggressive DevSlp */ + HOST_CAP2_DESO = BIT(5), /* DevSlp from slumber only */ /* registers for each SATA port */ PORT_LST_ADDR = 0x00, /* command list DMA addr */ @@ -128,24 +129,24 @@ enum { PORT_DEVSLP = 0x44, /* device sleep */ /* PORT_IRQ_{STAT,MASK} bits */ - PORT_IRQ_COLD_PRES = (1 << 31), /* cold presence detect */ - PORT_IRQ_TF_ERR = (1 << 30), /* task file error */ - PORT_IRQ_HBUS_ERR = (1 << 29), /* host bus fatal error */ - PORT_IRQ_HBUS_DATA_ERR = (1 << 28), /* host bus data error */ - PORT_IRQ_IF_ERR = (1 << 27), /* interface fatal error */ - PORT_IRQ_IF_NONFATAL = (1 << 26), /* interface non-fatal error */ - PORT_IRQ_OVERFLOW = (1 << 24), /* xfer exhausted available S/G */ - PORT_IRQ_BAD_PMP = (1 << 23), /* incorrect port multiplier */ + PORT_IRQ_COLD_PRES = BIT(31), /* cold presence detect */ + PORT_IRQ_TF_ERR = BIT(30), /* task file error */ + PORT_IRQ_HBUS_ERR = BIT(29), /* host bus fatal error */ + PORT_IRQ_HBUS_DATA_ERR = BIT(28), /* host bus data error */ + PORT_IRQ_IF_ERR = BIT(27), /* interface fatal error */ + PORT_IRQ_IF_NONFATAL = BIT(26), /* interface non-fatal error */ + PORT_IRQ_OVERFLOW = BIT(24), /* xfer exhausted available S/G */ + PORT_IRQ_BAD_PMP = BIT(23), /* incorrect port multiplier */ - PORT_IRQ_PHYRDY = (1 << 22), /* PhyRdy changed */ - PORT_IRQ_DMPS = (1 << 7), /* mechanical presence status */ - PORT_IRQ_CONNECT = (1 << 6), /* port connect change status */ - PORT_IRQ_SG_DONE = (1 << 5), /* descriptor processed */ - PORT_IRQ_UNK_FIS = (1 << 4), /* unknown FIS rx'd */ - PORT_IRQ_SDB_FIS = (1 << 3), /* Set Device Bits FIS rx'd */ - PORT_IRQ_DMAS_FIS = (1 << 2), /* DMA Setup FIS rx'd */ - PORT_IRQ_PIOS_FIS = (1 << 1), /* PIO Setup FIS rx'd */ - PORT_IRQ_D2H_REG_FIS = (1 << 0), /* D2H Register FIS rx'd */ + PORT_IRQ_PHYRDY = BIT(22), /* PhyRdy changed */ + PORT_IRQ_DMPS = BIT(7), /* mechanical presence status */ + PORT_IRQ_CONNECT = BIT(6), /* port connect change status */ + PORT_IRQ_SG_DONE = BIT(5), /* descriptor processed */ + PORT_IRQ_UNK_FIS = BIT(4), /* unknown FIS rx'd */ + PORT_IRQ_SDB_FIS = BIT(3), /* Set Device Bits FIS rx'd */ + PORT_IRQ_DMAS_FIS = BIT(2), /* DMA Setup FIS rx'd */ + PORT_IRQ_PIOS_FIS = BIT(1), /* PIO Setup FIS rx'd */ + PORT_IRQ_D2H_REG_FIS = BIT(0), /* D2H Register FIS rx'd */ PORT_IRQ_FREEZE = PORT_IRQ_HBUS_ERR | PORT_IRQ_IF_ERR | @@ -161,27 +162,27 @@ enum { PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS, /* PORT_CMD bits */ - PORT_CMD_ASP = (1 << 27), /* Aggressive Slumber/Partial */ - PORT_CMD_ALPE = (1 << 26), /* Aggressive Link PM enable */ - PORT_CMD_ATAPI = (1 << 24), /* Device is ATAPI */ - PORT_CMD_FBSCP = (1 << 22), /* FBS Capable Port */ - PORT_CMD_ESP = (1 << 21), /* External Sata Port */ - PORT_CMD_CPD = (1 << 20), /* Cold Presence Detection */ - PORT_CMD_MPSP = (1 << 19), /* Mechanical Presence Switch */ - PORT_CMD_HPCP = (1 << 18), /* HotPlug Capable Port */ - PORT_CMD_PMP = (1 << 17), /* PMP attached */ - PORT_CMD_LIST_ON = (1 << 15), /* cmd list DMA engine running */ - PORT_CMD_FIS_ON = (1 << 14), /* FIS DMA engine running */ - PORT_CMD_FIS_RX = (1 << 4), /* Enable FIS receive DMA engine */ - PORT_CMD_CLO = (1 << 3), /* Command list override */ - PORT_CMD_POWER_ON = (1 << 2), /* Power up device */ - PORT_CMD_SPIN_UP = (1 << 1), /* Spin up device */ - PORT_CMD_START = (1 << 0), /* Enable port DMA engine */ + PORT_CMD_ASP = BIT(27), /* Aggressive Slumber/Partial */ + PORT_CMD_ALPE = BIT(26), /* Aggressive Link PM enable */ + PORT_CMD_ATAPI = BIT(24), /* Device is ATAPI */ + PORT_CMD_FBSCP = BIT(22), /* FBS Capable Port */ + PORT_CMD_ESP = BIT(21), /* External Sata Port */ + PORT_CMD_CPD = BIT(20), /* Cold Presence Detection */ + PORT_CMD_MPSP = BIT(19), /* Mechanical Presence Switch */ + PORT_CMD_HPCP = BIT(18), /* HotPlug Capable Port */ + PORT_CMD_PMP = BIT(17), /* PMP attached */ + PORT_CMD_LIST_ON = BIT(15), /* cmd list DMA engine running */ + PORT_CMD_FIS_ON = BIT(14), /* FIS DMA engine running */ + PORT_CMD_FIS_RX = BIT(4), /* Enable FIS receive DMA engine */ + PORT_CMD_CLO = BIT(3), /* Command list override */ + PORT_CMD_POWER_ON = BIT(2), /* Power up device */ + PORT_CMD_SPIN_UP = BIT(1), /* Spin up device */ + PORT_CMD_START = BIT(0), /* Enable port DMA engine */ - PORT_CMD_ICC_MASK = (0xf << 28), /* i/f ICC state mask */ - PORT_CMD_ICC_ACTIVE = (0x1 << 28), /* Put i/f in active state */ - PORT_CMD_ICC_PARTIAL = (0x2 << 28), /* Put i/f in partial state */ - PORT_CMD_ICC_SLUMBER = (0x6 << 28), /* Put i/f in slumber state */ + PORT_CMD_ICC_MASK = (0xfu << 28), /* i/f ICC state mask */ + PORT_CMD_ICC_ACTIVE = (0x1u << 28), /* Put i/f in active state */ + PORT_CMD_ICC_PARTIAL = (0x2u << 28), /* Put i/f in partial state */ + PORT_CMD_ICC_SLUMBER = (0x6u << 28), /* Put i/f in slumber state */ /* PORT_CMD capabilities mask */ PORT_CMD_CAP = PORT_CMD_HPCP | PORT_CMD_MPSP | @@ -192,9 +193,9 @@ enum { PORT_FBS_ADO_OFFSET = 12, /* FBS active dev optimization offset */ PORT_FBS_DEV_OFFSET = 8, /* FBS device to issue offset */ PORT_FBS_DEV_MASK = (0xf << PORT_FBS_DEV_OFFSET), /* FBS.DEV */ - PORT_FBS_SDE = (1 << 2), /* FBS single device error */ - PORT_FBS_DEC = (1 << 1), /* FBS device error clear */ - PORT_FBS_EN = (1 << 0), /* Enable FBS */ + PORT_FBS_SDE = BIT(2), /* FBS single device error */ + PORT_FBS_DEC = BIT(1), /* FBS device error clear */ + PORT_FBS_EN = BIT(0), /* Enable FBS */ /* PORT_DEVSLP bits */ PORT_DEVSLP_DM_OFFSET = 25, /* DITO multiplier offset */ @@ -202,50 +203,50 @@ enum { PORT_DEVSLP_DITO_OFFSET = 15, /* DITO offset */ PORT_DEVSLP_MDAT_OFFSET = 10, /* Minimum assertion time */ PORT_DEVSLP_DETO_OFFSET = 2, /* DevSlp exit timeout */ - PORT_DEVSLP_DSP = (1 << 1), /* DevSlp present */ - PORT_DEVSLP_ADSE = (1 << 0), /* Aggressive DevSlp enable */ + PORT_DEVSLP_DSP = BIT(1), /* DevSlp present */ + PORT_DEVSLP_ADSE = BIT(0), /* Aggressive DevSlp enable */ /* hpriv->flags bits */ #define AHCI_HFLAGS(flags) .private_data = (void *)(flags) - AHCI_HFLAG_NO_NCQ = (1 << 0), - AHCI_HFLAG_IGN_IRQ_IF_ERR = (1 << 1), /* ignore IRQ_IF_ERR */ - AHCI_HFLAG_IGN_SERR_INTERNAL = (1 << 2), /* ignore SERR_INTERNAL */ - AHCI_HFLAG_32BIT_ONLY = (1 << 3), /* force 32bit */ - AHCI_HFLAG_MV_PATA = (1 << 4), /* PATA port */ - AHCI_HFLAG_NO_MSI = (1 << 5), /* no PCI MSI */ - AHCI_HFLAG_NO_PMP = (1 << 6), /* no PMP */ - AHCI_HFLAG_SECT255 = (1 << 8), /* max 255 sectors */ - AHCI_HFLAG_YES_NCQ = (1 << 9), /* force NCQ cap on */ - AHCI_HFLAG_NO_SUSPEND = (1 << 10), /* don't suspend */ - AHCI_HFLAG_SRST_TOUT_IS_OFFLINE = (1 << 11), /* treat SRST timeout as - link offline */ - AHCI_HFLAG_NO_SNTF = (1 << 12), /* no sntf */ - AHCI_HFLAG_NO_FPDMA_AA = (1 << 13), /* no FPDMA AA */ - AHCI_HFLAG_YES_FBS = (1 << 14), /* force FBS cap on */ - AHCI_HFLAG_DELAY_ENGINE = (1 << 15), /* do not start engine on - port start (wait until - error-handling stage) */ - AHCI_HFLAG_NO_DEVSLP = (1 << 17), /* no device sleep */ - AHCI_HFLAG_NO_FBS = (1 << 18), /* no FBS */ + AHCI_HFLAG_NO_NCQ = BIT(0), + AHCI_HFLAG_IGN_IRQ_IF_ERR = BIT(1), /* ignore IRQ_IF_ERR */ + AHCI_HFLAG_IGN_SERR_INTERNAL = BIT(2), /* ignore SERR_INTERNAL */ + AHCI_HFLAG_32BIT_ONLY = BIT(3), /* force 32bit */ + AHCI_HFLAG_MV_PATA = BIT(4), /* PATA port */ + AHCI_HFLAG_NO_MSI = BIT(5), /* no PCI MSI */ + AHCI_HFLAG_NO_PMP = BIT(6), /* no PMP */ + AHCI_HFLAG_SECT255 = BIT(8), /* max 255 sectors */ + AHCI_HFLAG_YES_NCQ = BIT(9), /* force NCQ cap on */ + AHCI_HFLAG_NO_SUSPEND = BIT(10), /* don't suspend */ + AHCI_HFLAG_SRST_TOUT_IS_OFFLINE = BIT(11), /* treat SRST timeout as + link offline */ + AHCI_HFLAG_NO_SNTF = BIT(12), /* no sntf */ + AHCI_HFLAG_NO_FPDMA_AA = BIT(13), /* no FPDMA AA */ + AHCI_HFLAG_YES_FBS = BIT(14), /* force FBS cap on */ + AHCI_HFLAG_DELAY_ENGINE = BIT(15), /* do not start engine on + port start (wait until + error-handling stage) */ + AHCI_HFLAG_NO_DEVSLP = BIT(17), /* no device sleep */ + AHCI_HFLAG_NO_FBS = BIT(18), /* no FBS */ #ifdef CONFIG_PCI_MSI - AHCI_HFLAG_MULTI_MSI = (1 << 20), /* per-port MSI(-X) */ + AHCI_HFLAG_MULTI_MSI = BIT(20), /* per-port MSI(-X) */ #else /* compile out MSI infrastructure */ AHCI_HFLAG_MULTI_MSI = 0, #endif - AHCI_HFLAG_WAKE_BEFORE_STOP = (1 << 22), /* wake before DMA stop */ - AHCI_HFLAG_YES_ALPM = (1 << 23), /* force ALPM cap on */ - AHCI_HFLAG_NO_WRITE_TO_RO = (1 << 24), /* don't write to read - only registers */ - AHCI_HFLAG_USE_LPM_POLICY = (1 << 25), /* chipset that should use - SATA_MOBILE_LPM_POLICY - as default lpm_policy */ - AHCI_HFLAG_SUSPEND_PHYS = (1 << 26), /* handle PHYs during - suspend/resume */ - AHCI_HFLAG_NO_SXS = (1 << 28), /* SXS not supported */ + AHCI_HFLAG_WAKE_BEFORE_STOP = BIT(22), /* wake before DMA stop */ + AHCI_HFLAG_YES_ALPM = BIT(23), /* force ALPM cap on */ + AHCI_HFLAG_NO_WRITE_TO_RO = BIT(24), /* don't write to read + only registers */ + AHCI_HFLAG_USE_LPM_POLICY = BIT(25), /* chipset that should use + SATA_MOBILE_LPM_POLICY + as default lpm_policy */ + AHCI_HFLAG_SUSPEND_PHYS = BIT(26), /* handle PHYs during + suspend/resume */ + AHCI_HFLAG_NO_SXS = BIT(28), /* SXS not supported */ /* ap->flags bits */ @@ -261,22 +262,22 @@ enum { EM_MAX_RETRY = 5, /* em_ctl bits */ - EM_CTL_RST = (1 << 9), /* Reset */ - EM_CTL_TM = (1 << 8), /* Transmit Message */ - EM_CTL_MR = (1 << 0), /* Message Received */ - EM_CTL_ALHD = (1 << 26), /* Activity LED */ - EM_CTL_XMT = (1 << 25), /* Transmit Only */ - EM_CTL_SMB = (1 << 24), /* Single Message Buffer */ - EM_CTL_SGPIO = (1 << 19), /* SGPIO messages supported */ - EM_CTL_SES = (1 << 18), /* SES-2 messages supported */ - EM_CTL_SAFTE = (1 << 17), /* SAF-TE messages supported */ - EM_CTL_LED = (1 << 16), /* LED messages supported */ + EM_CTL_RST = BIT(9), /* Reset */ + EM_CTL_TM = BIT(8), /* Transmit Message */ + EM_CTL_MR = BIT(0), /* Message Received */ + EM_CTL_ALHD = BIT(26), /* Activity LED */ + EM_CTL_XMT = BIT(25), /* Transmit Only */ + EM_CTL_SMB = BIT(24), /* Single Message Buffer */ + EM_CTL_SGPIO = BIT(19), /* SGPIO messages supported */ + EM_CTL_SES = BIT(18), /* SES-2 messages supported */ + EM_CTL_SAFTE = BIT(17), /* SAF-TE messages supported */ + EM_CTL_LED = BIT(16), /* LED messages supported */ /* em message type */ - EM_MSG_TYPE_LED = (1 << 0), /* LED */ - EM_MSG_TYPE_SAFTE = (1 << 1), /* SAF-TE */ - EM_MSG_TYPE_SES2 = (1 << 2), /* SES-2 */ - EM_MSG_TYPE_SGPIO = (1 << 3), /* SGPIO */ + EM_MSG_TYPE_LED = BIT(0), /* LED */ + EM_MSG_TYPE_SAFTE = BIT(1), /* SAF-TE */ + EM_MSG_TYPE_SES2 = BIT(2), /* SES-2 */ + EM_MSG_TYPE_SGPIO = BIT(3), /* SGPIO */ }; struct ahci_cmd_hdr { From 1d408dabdba95aa9bccaf21766df406899a982f0 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Wed, 16 Nov 2022 09:56:37 +0800 Subject: [PATCH 076/207] PCI/DOE: Fix maximum data object length miscalculation commit a4ff8e7a71601321f7bf7b58ede664dc0d774274 upstream. Per PCIe r6.0, sec 6.30.1, a data object Length of 0x0 indicates 2^18 DWORDs (256K DW or 1MB) being transferred. Adjust the value of data object length for this case on both sending side and receiving side. Don't bother checking whether Length is greater than SZ_1M because all values of the 18-bit Length field are valid, and it is impossible to represent anything larger than SZ_1M: 0x00000 256K DW (1M bytes) 0x00001 1 DW (4 bytes) ... 0x3ffff 256K-1 DW (1M - 4 bytes) [bhelgaas: commit log] Link: https://lore.kernel.org/r/20221116015637.3299664-1-ming4.li@intel.com Fixes: 9d24322e887b ("PCI/DOE: Add DOE mailbox support functions") Signed-off-by: Li Ming Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Lukas Wunner Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/doe.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c index e402f05068a5..66d9ab288646 100644 --- a/drivers/pci/doe.c +++ b/drivers/pci/doe.c @@ -29,6 +29,9 @@ #define PCI_DOE_FLAG_CANCEL 0 #define PCI_DOE_FLAG_DEAD 1 +/* Max data object length is 2^18 dwords */ +#define PCI_DOE_MAX_LENGTH (1 << 18) + /** * struct pci_doe_mb - State for a single DOE mailbox * @@ -107,6 +110,7 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, { struct pci_dev *pdev = doe_mb->pdev; int offset = doe_mb->cap_offset; + size_t length; u32 val; int i; @@ -123,15 +127,20 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, if (FIELD_GET(PCI_DOE_STATUS_ERROR, val)) return -EIO; + /* Length is 2 DW of header + length of payload in DW */ + length = 2 + task->request_pl_sz / sizeof(u32); + if (length > PCI_DOE_MAX_LENGTH) + return -EIO; + if (length == PCI_DOE_MAX_LENGTH) + length = 0; + /* Write DOE Header */ val = FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_VID, task->prot.vid) | FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, task->prot.type); pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, val); - /* Length is 2 DW of header + length of payload in DW */ pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH, - 2 + task->request_pl_sz / - sizeof(u32))); + length)); for (i = 0; i < task->request_pl_sz / sizeof(u32); i++) pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, task->request_pl[i]); @@ -178,7 +187,10 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0); length = FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH, val); - if (length > SZ_1M || length < 2) + /* A value of 0x0 indicates max data object length */ + if (!length) + length = PCI_DOE_MAX_LENGTH; + if (length < 2) return -EIO; /* First 2 dwords have already been read */ From 04241956ce8825ff06e06e4083e7b692e9d5f712 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Wed, 7 Dec 2022 11:51:43 +0800 Subject: [PATCH 077/207] tracing/hist: Fix out-of-bound write on 'action_data.var_ref_idx' commit 82470f7d9044842618c847a7166de2b7458157a7 upstream. When generate a synthetic event with many params and then create a trace action for it [1], kernel panic happened [2]. It is because that in trace_action_create() 'data->n_params' is up to SYNTH_FIELDS_MAX (current value is 64), and array 'data->var_ref_idx' keeps indices into array 'hist_data->var_refs' for each synthetic event param, but the length of 'data->var_ref_idx' is TRACING_MAP_VARS_MAX (current value is 16), so out-of-bound write happened when 'data->n_params' more than 16. In this case, 'data->match_data.event' is overwritten and eventually cause the panic. To solve the issue, adjust the length of 'data->var_ref_idx' to be SYNTH_FIELDS_MAX and add sanity checks to avoid out-of-bound write. [1] # cd /sys/kernel/tracing/ # echo "my_synth_event int v1; int v2; int v3; int v4; int v5; int v6;\ int v7; int v8; int v9; int v10; int v11; int v12; int v13; int v14;\ int v15; int v16; int v17; int v18; int v19; int v20; int v21; int v22;\ int v23; int v24; int v25; int v26; int v27; int v28; int v29; int v30;\ int v31; int v32; int v33; int v34; int v35; int v36; int v37; int v38;\ int v39; int v40; int v41; int v42; int v43; int v44; int v45; int v46;\ int v47; int v48; int v49; int v50; int v51; int v52; int v53; int v54;\ int v55; int v56; int v57; int v58; int v59; int v60; int v61; int v62;\ int v63" >> synthetic_events # echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="bash"' >> \ events/sched/sched_waking/trigger # echo "hist:keys=next_pid:onmatch(sched.sched_waking).my_synth_event(\ pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\ pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\ pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\ pid,pid,pid,pid,pid,pid,pid,pid,pid)" >> events/sched/sched_switch/trigger [2] BUG: unable to handle page fault for address: ffff91c900000000 PGD 61001067 P4D 61001067 PUD 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 2 PID: 322 Comm: bash Tainted: G W 6.1.0-rc8+ #229 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 RIP: 0010:strcmp+0xc/0x30 Code: 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee c3 cc cc cc cc 0f 1f 00 31 c0 eb 08 48 83 c0 01 84 d2 74 13 <0f> b6 14 07 3a 14 06 74 ef 19 c0 83 c8 01 c3 cc cc cc cc 31 c3 RSP: 0018:ffff9b3b00f53c48 EFLAGS: 00000246 RAX: 0000000000000000 RBX: ffffffffba958a68 RCX: 0000000000000000 RDX: 0000000000000010 RSI: ffff91c943d33a90 RDI: ffff91c900000000 RBP: ffff91c900000000 R08: 00000018d604b529 R09: 0000000000000000 R10: ffff91c9483eddb1 R11: ffff91ca483eddab R12: ffff91c946171580 R13: ffff91c9479f0538 R14: ffff91c9457c2848 R15: ffff91c9479f0538 FS: 00007f1d1cfbe740(0000) GS:ffff91c9bdc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff91c900000000 CR3: 0000000006316000 CR4: 00000000000006e0 Call Trace: __find_event_file+0x55/0x90 action_create+0x76c/0x1060 event_hist_trigger_parse+0x146d/0x2060 ? event_trigger_write+0x31/0xd0 trigger_process_regex+0xbb/0x110 event_trigger_write+0x6b/0xd0 vfs_write+0xc8/0x3e0 ? alloc_fd+0xc0/0x160 ? preempt_count_add+0x4d/0xa0 ? preempt_count_add+0x70/0xa0 ksys_write+0x5f/0xe0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f1d1d0cf077 Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 RSP: 002b:00007ffcebb0e568 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000143 RCX: 00007f1d1d0cf077 RDX: 0000000000000143 RSI: 00005639265aa7e0 RDI: 0000000000000001 RBP: 00005639265aa7e0 R08: 000000000000000a R09: 0000000000000142 R10: 000056392639c017 R11: 0000000000000246 R12: 0000000000000143 R13: 00007f1d1d1ae6a0 R14: 00007f1d1d1aa4a0 R15: 00007f1d1d1a98a0 Modules linked in: CR2: ffff91c900000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strcmp+0xc/0x30 Code: 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee c3 cc cc cc cc 0f 1f 00 31 c0 eb 08 48 83 c0 01 84 d2 74 13 <0f> b6 14 07 3a 14 06 74 ef 19 c0 83 c8 01 c3 cc cc cc cc 31 c3 RSP: 0018:ffff9b3b00f53c48 EFLAGS: 00000246 RAX: 0000000000000000 RBX: ffffffffba958a68 RCX: 0000000000000000 RDX: 0000000000000010 RSI: ffff91c943d33a90 RDI: ffff91c900000000 RBP: ffff91c900000000 R08: 00000018d604b529 R09: 0000000000000000 R10: ffff91c9483eddb1 R11: ffff91ca483eddab R12: ffff91c946171580 R13: ffff91c9479f0538 R14: ffff91c9457c2848 R15: ffff91c9479f0538 FS: 00007f1d1cfbe740(0000) GS:ffff91c9bdc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff91c900000000 CR3: 0000000006316000 CR4: 00000000000006e0 Link: https://lore.kernel.org/linux-trace-kernel/20221207035143.2278781-1-zhengyejian1@huawei.com Cc: Cc: Cc: stable@vger.kernel.org Fixes: d380dcde9a07 ("tracing: Fix now invalid var_ref_vals assumption in trace action") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_hist.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index b6e5724a9ea3..b97412053c0a 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -617,7 +617,7 @@ struct action_data { * event param, and is passed to the synthetic event * invocation. */ - unsigned int var_ref_idx[TRACING_MAP_VARS_MAX]; + unsigned int var_ref_idx[SYNTH_FIELDS_MAX]; struct synth_event *synth_event; bool use_trace_keyword; char *synth_event_name; @@ -2173,7 +2173,9 @@ static struct hist_field *create_var_ref(struct hist_trigger_data *hist_data, return ref_field; } } - + /* Sanity check to avoid out-of-bound write on 'hist_data->var_refs' */ + if (hist_data->n_var_refs >= TRACING_MAP_VARS_MAX) + return NULL; ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL); if (ref_field) { if (init_var_ref(ref_field, var_field, system, event_name)) { @@ -3922,6 +3924,10 @@ static int trace_action_create(struct hist_trigger_data *hist_data, lockdep_assert_held(&event_mutex); + /* Sanity check to avoid out-of-bound write on 'data->var_ref_idx' */ + if (data->n_params > SYNTH_FIELDS_MAX) + return -EINVAL; + if (data->use_trace_keyword) synth_event_name = data->synth_event_name; else From 9dd6b35e2bcd2c64b2a830aa8bda0a0ff6c58705 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 20 Dec 2022 14:31:40 -0800 Subject: [PATCH 078/207] perf/core: Call LSM hook after copying perf_event_attr commit 0a041ebca4956292cadfb14a63ace3a9c1dcb0a3 upstream. It passes the attr struct to the security_perf_event_open() but it's not initialized yet. Fixes: da97e18458fb ("perf_event: Add support for LSM and SELinux checks") Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Joel Fernandes (Google) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20221220223140.4020470-1-namhyung@kernel.org Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 732b392fc5c6..3b9e86108f43 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -12231,12 +12231,12 @@ SYSCALL_DEFINE5(perf_event_open, if (flags & ~PERF_FLAG_ALL) return -EINVAL; - /* Do we allow access to perf_event_open(2) ? */ - err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); + err = perf_copy_attr(attr_uptr, &attr); if (err) return err; - err = perf_copy_attr(attr_uptr, &attr); + /* Do we allow access to perf_event_open(2) ? */ + err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); if (err) return err; From aa3e7a48e921aeb6a0e01c3a474db50de6c9bcac Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 5 Dec 2022 13:19:21 -0800 Subject: [PATCH 079/207] xtensa: add __umulsidi3 helper commit 8939c58d68f97ce530f02d46c9f2b56c3ec88399 upstream. xtensa gcc-13 has changed multiplication handling and may now use __umulsidi3 helper where it used to use __muldi3. As a result building the kernel with the new gcc may fail with the following error: linux/init/main.c:1287: undefined reference to `__umulsidi3' Fix the build by providing __umulsidi3 implementation for xtensa. Cc: stable@vger.kernel.org # 5.18+ Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/xtensa_ksyms.c | 2 + arch/xtensa/lib/Makefile | 2 +- arch/xtensa/lib/umulsidi3.S | 230 ++++++++++++++++++++++++++++++ 3 files changed, 233 insertions(+), 1 deletion(-) create mode 100644 arch/xtensa/lib/umulsidi3.S diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index b0bc8897c924..2a31b1ab0c9f 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -62,6 +62,7 @@ extern int __modsi3(int, int); extern int __mulsi3(int, int); extern unsigned int __udivsi3(unsigned int, unsigned int); extern unsigned int __umodsi3(unsigned int, unsigned int); +extern unsigned long long __umulsidi3(unsigned int, unsigned int); EXPORT_SYMBOL(__ashldi3); EXPORT_SYMBOL(__ashrdi3); @@ -71,6 +72,7 @@ EXPORT_SYMBOL(__modsi3); EXPORT_SYMBOL(__mulsi3); EXPORT_SYMBOL(__udivsi3); EXPORT_SYMBOL(__umodsi3); +EXPORT_SYMBOL(__umulsidi3); unsigned int __sync_fetch_and_and_4(volatile void *p, unsigned int v) { diff --git a/arch/xtensa/lib/Makefile b/arch/xtensa/lib/Makefile index d4e9c397e3fd..7ecef0519a27 100644 --- a/arch/xtensa/lib/Makefile +++ b/arch/xtensa/lib/Makefile @@ -5,7 +5,7 @@ lib-y += memcopy.o memset.o checksum.o \ ashldi3.o ashrdi3.o lshrdi3.o \ - divsi3.o udivsi3.o modsi3.o umodsi3.o mulsi3.o \ + divsi3.o udivsi3.o modsi3.o umodsi3.o mulsi3.o umulsidi3.o \ usercopy.o strncpy_user.o strnlen_user.o lib-$(CONFIG_PCI) += pci-auto.o lib-$(CONFIG_KCSAN) += kcsan-stubs.o diff --git a/arch/xtensa/lib/umulsidi3.S b/arch/xtensa/lib/umulsidi3.S new file mode 100644 index 000000000000..136081647942 --- /dev/null +++ b/arch/xtensa/lib/umulsidi3.S @@ -0,0 +1,230 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include +#include +#include + +#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 +#define XCHAL_NO_MUL 1 +#endif + +ENTRY(__umulsidi3) + +#ifdef __XTENSA_CALL0_ABI__ + abi_entry(32) + s32i a12, sp, 16 + s32i a13, sp, 20 + s32i a14, sp, 24 + s32i a15, sp, 28 +#elif XCHAL_NO_MUL + /* This is not really a leaf function; allocate enough stack space + to allow CALL12s to a helper function. */ + abi_entry(32) +#else + abi_entry_default +#endif + +#ifdef __XTENSA_EB__ +#define wh a2 +#define wl a3 +#else +#define wh a3 +#define wl a2 +#endif /* __XTENSA_EB__ */ + + /* This code is taken from the mulsf3 routine in ieee754-sf.S. + See more comments there. */ + +#if XCHAL_HAVE_MUL32_HIGH + mull a6, a2, a3 + muluh wh, a2, a3 + mov wl, a6 + +#else /* ! MUL32_HIGH */ + +#if defined(__XTENSA_CALL0_ABI__) && XCHAL_NO_MUL + /* a0 and a8 will be clobbered by calling the multiply function + but a8 is not used here and need not be saved. */ + s32i a0, sp, 0 +#endif + +#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 + +#define a2h a4 +#define a3h a5 + + /* Get the high halves of the inputs into registers. */ + srli a2h, a2, 16 + srli a3h, a3, 16 + +#define a2l a2 +#define a3l a3 + +#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 + /* Clear the high halves of the inputs. This does not matter + for MUL16 because the high bits are ignored. */ + extui a2, a2, 0, 16 + extui a3, a3, 0, 16 +#endif +#endif /* MUL16 || MUL32 */ + + +#if XCHAL_HAVE_MUL16 + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + mul16u dst, xreg ## xhalf, yreg ## yhalf + +#elif XCHAL_HAVE_MUL32 + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + mull dst, xreg ## xhalf, yreg ## yhalf + +#elif XCHAL_HAVE_MAC16 + +/* The preprocessor insists on inserting a space when concatenating after + a period in the definition of do_mul below. These macros are a workaround + using underscores instead of periods when doing the concatenation. */ +#define umul_aa_ll umul.aa.ll +#define umul_aa_lh umul.aa.lh +#define umul_aa_hl umul.aa.hl +#define umul_aa_hh umul.aa.hh + +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + umul_aa_ ## xhalf ## yhalf xreg, yreg; \ + rsr dst, ACCLO + +#else /* no multiply hardware */ + +#define set_arg_l(dst, src) \ + extui dst, src, 0, 16 +#define set_arg_h(dst, src) \ + srli dst, src, 16 + +#ifdef __XTENSA_CALL0_ABI__ +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a13, xreg); \ + set_arg_ ## yhalf (a14, yreg); \ + call0 .Lmul_mulsi3; \ + mov dst, a12 +#else +#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ + set_arg_ ## xhalf (a14, xreg); \ + set_arg_ ## yhalf (a15, yreg); \ + call12 .Lmul_mulsi3; \ + mov dst, a14 +#endif /* __XTENSA_CALL0_ABI__ */ + +#endif /* no multiply hardware */ + + /* Add pp1 and pp2 into a6 with carry-out in a9. */ + do_mul(a6, a2, l, a3, h) /* pp 1 */ + do_mul(a11, a2, h, a3, l) /* pp 2 */ + movi a9, 0 + add a6, a6, a11 + bgeu a6, a11, 1f + addi a9, a9, 1 +1: + /* Shift the high half of a9/a6 into position in a9. Note that + this value can be safely incremented without any carry-outs. */ + ssai 16 + src a9, a9, a6 + + /* Compute the low word into a6. */ + do_mul(a11, a2, l, a3, l) /* pp 0 */ + sll a6, a6 + add a6, a6, a11 + bgeu a6, a11, 1f + addi a9, a9, 1 +1: + /* Compute the high word into wh. */ + do_mul(wh, a2, h, a3, h) /* pp 3 */ + add wh, wh, a9 + mov wl, a6 + +#endif /* !MUL32_HIGH */ + +#if defined(__XTENSA_CALL0_ABI__) && XCHAL_NO_MUL + /* Restore the original return address. */ + l32i a0, sp, 0 +#endif +#ifdef __XTENSA_CALL0_ABI__ + l32i a12, sp, 16 + l32i a13, sp, 20 + l32i a14, sp, 24 + l32i a15, sp, 28 + abi_ret(32) +#else + abi_ret_default +#endif + +#if XCHAL_NO_MUL + + .macro do_addx2 dst, as, at, tmp +#if XCHAL_HAVE_ADDX + addx2 \dst, \as, \at +#else + slli \tmp, \as, 1 + add \dst, \tmp, \at +#endif + .endm + + .macro do_addx4 dst, as, at, tmp +#if XCHAL_HAVE_ADDX + addx4 \dst, \as, \at +#else + slli \tmp, \as, 2 + add \dst, \tmp, \at +#endif + .endm + + .macro do_addx8 dst, as, at, tmp +#if XCHAL_HAVE_ADDX + addx8 \dst, \as, \at +#else + slli \tmp, \as, 3 + add \dst, \tmp, \at +#endif + .endm + + /* For Xtensa processors with no multiply hardware, this simplified + version of _mulsi3 is used for multiplying 16-bit chunks of + the floating-point mantissas. When using CALL0, this function + uses a custom ABI: the inputs are passed in a13 and a14, the + result is returned in a12, and a8 and a15 are clobbered. */ + .align 4 +.Lmul_mulsi3: + abi_entry_default + + .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 + movi \dst, 0 +1: add \tmp1, \src2, \dst + extui \tmp2, \src1, 0, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx2 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 1, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx4 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 2, 1 + movnez \dst, \tmp1, \tmp2 + + do_addx8 \tmp1, \src2, \dst, \tmp1 + extui \tmp2, \src1, 3, 1 + movnez \dst, \tmp1, \tmp2 + + srli \src1, \src1, 4 + slli \src2, \src2, 4 + bnez \src1, 1b + .endm + +#ifdef __XTENSA_CALL0_ABI__ + mul_mulsi3_body a12, a13, a14, a15, a8 +#else + /* The result will be written into a2, so save that argument in a4. */ + mov a4, a2 + mul_mulsi3_body a2, a4, a3, a5, a6 +#endif + abi_ret_default +#endif /* XCHAL_NO_MUL */ + +ENDPROC(__umulsidi3) From 8a6cd16af71ef533f11d72e06d4c934af7fee891 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 28 Nov 2022 14:24:39 -0600 Subject: [PATCH 080/207] of/kexec: Fix reading 32-bit "linux,initrd-{start,end}" values commit e553ad8d7957697385e81034bf76db3b2cb2cf27 upstream. "linux,initrd-start" and "linux,initrd-end" can be 32-bit values even on a 64-bit platform. Ideally, the size should be based on '#address-cells', but that has never been enforced in the kernel's FDT boot parsing code (early_init_dt_check_for_initrd()). Bootloader behavior is known to vary. For example, kexec always writes these as 64-bit. The result of incorrectly reading 32-bit values is most likely the reserved memory for the original initrd will still be reserved for the new kernel. The original arm64 equivalent of this code failed to release the initrd reserved memory in *all* cases. Use of_read_number() to mirror the early_init_dt_check_for_initrd() code. Fixes: b30be4dc733e ("of: Add a common kexec FDT setup function") Cc: stable@vger.kernel.org Reported-by: Peter Maydell Link: https://lore.kernel.org/r/20221128202440.1411895-1-robh@kernel.org Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/kexec.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c index e6c01db393f9..f26d2ba8a371 100644 --- a/drivers/of/kexec.c +++ b/drivers/of/kexec.c @@ -281,7 +281,7 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, const char *cmdline, size_t extra_fdt_size) { void *fdt; - int ret, chosen_node; + int ret, chosen_node, len; const void *prop; size_t fdt_size; @@ -324,19 +324,19 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, goto out; /* Did we boot using an initrd? */ - prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL); + prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", &len); if (prop) { u64 tmp_start, tmp_end, tmp_size; - tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop)); + tmp_start = of_read_number(prop, len / 4); - prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL); + prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", &len); if (!prop) { ret = -EINVAL; goto out; } - tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop)); + tmp_end = of_read_number(prop, len / 4); /* * kexec reserves exact initrd size, while firmware may From 2d627fbb50267e89ad4070acdfef1c7845c33280 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 25 Nov 2022 20:18:40 +0800 Subject: [PATCH 081/207] ima: Fix hash dependency to correct algorithm commit b6018af440a07bd0d74b58c4e18045f4a8dbfe6b upstream. Commit d2825fa9365d ("crypto: sm3,sm4 - move into crypto directory") moves the SM3 and SM4 stand-alone library and the algorithm implementation for the Crypto API into the same directory, and the corresponding relationship of Kconfig is modified, CONFIG_CRYPTO_SM3/4 corresponds to the stand-alone library of SM3/4, and CONFIG_CRYPTO_SM3/4_GENERIC corresponds to the algorithm implementation for the Crypto API. Therefore, it is necessary for this module to depend on the correct algorithm. Fixes: d2825fa9365d ("crypto: sm3,sm4 - move into crypto directory") Cc: Jason A. Donenfeld Cc: stable@vger.kernel.org # v5.19+ Signed-off-by: Tianjia Zhang Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig index 7249f16257c7..39caeca47444 100644 --- a/security/integrity/ima/Kconfig +++ b/security/integrity/ima/Kconfig @@ -112,7 +112,7 @@ choice config IMA_DEFAULT_HASH_SM3 bool "SM3" - depends on CRYPTO_SM3=y + depends on CRYPTO_SM3_GENERIC=y endchoice config IMA_DEFAULT_HASH From c877c99ee5c0ce20d6eca98fc8c6925bed5359b7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 30 Sep 2022 23:31:32 +0000 Subject: [PATCH 082/207] KVM: VMX: Resume guest immediately when injecting #GP on ECREATE commit eb3992e833d3a17f9b0a3e0371d0b1d3d566f740 upstream. Resume the guest immediately when injecting a #GP on ECREATE due to an invalid enclave size, i.e. don't attempt ECREATE in the host. The #GP is a terminal fault, e.g. skipping the instruction if ECREATE is successful would result in KVM injecting #GP on the instruction following ECREATE. Fixes: 70210c044b4e ("KVM: VMX: Add SGX ENCLS[ECREATE] handler to enforce CPUID restrictions") Cc: stable@vger.kernel.org Cc: Kai Huang Signed-off-by: Sean Christopherson Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20220930233132.1723330-1-seanjc@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/sgx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c index 8f95c7c01433..b12da2a6dec9 100644 --- a/arch/x86/kvm/vmx/sgx.c +++ b/arch/x86/kvm/vmx/sgx.c @@ -182,8 +182,10 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu, /* Enforce CPUID restriction on max enclave size. */ max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 : sgx_12_0->edx; - if (size >= BIT_ULL(max_size_log2)) + if (size >= BIT_ULL(max_size_log2)) { kvm_inject_gp(vcpu, 0); + return 1; + } /* * sgx_virt_ecreate() returns: From 04066fcbf18eaa8747b8e7560d318b669e676503 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:19:56 +0000 Subject: [PATCH 083/207] KVM: nVMX: Inject #GP, not #UD, if "generic" VMXON CR0/CR4 check fails commit 9cc409325ddd776f6fd6293d5ce93ce1248af6e4 upstream. Inject #GP for if VMXON is attempting with a CR0/CR4 that fails the generic "is CRx valid" check, but passes the CR4.VMXE check, and do the generic checks _after_ handling the post-VMXON VM-Fail. The CR4.VMXE check, and all other #UD cases, are special pre-conditions that are enforced prior to pivoting on the current VMX mode, i.e. occur before interception if VMXON is attempted in VMX non-root mode. All other CR0/CR4 checks generate #GP and effectively have lower priority than the post-VMXON check. Per the SDM: IF (register operand) or (CR0.PE = 0) or (CR4.VMXE = 0) or ... THEN #UD; ELSIF not in VMX operation THEN IF (CPL > 0) or (in A20M mode) or (the values of CR0 and CR4 are not supported in VMX operation) THEN #GP(0); ELSIF in VMX non-root operation THEN VMexit; ELSIF CPL > 0 THEN #GP(0); ELSE VMfail("VMXON executed in VMX root operation"); FI; which, if re-written without ELSIF, yields: IF (register operand) or (CR0.PE = 0) or (CR4.VMXE = 0) or ... THEN #UD IF in VMX non-root operation THEN VMexit; IF CPL > 0 THEN #GP(0) IF in VMX operation THEN VMfail("VMXON executed in VMX root operation"); IF (in A20M mode) or (the values of CR0 and CR4 are not supported in VMX operation) THEN #GP(0); Note, KVM unconditionally forwards VMXON VM-Exits that occur in L2 to L1, i.e. there is no need to check the vCPU is not in VMX non-root mode. Add a comment to explain why unconditionally forwarding such exits is functionally correct. Reported-by: Eric Li Fixes: c7d855c2aff2 ("KVM: nVMX: Inject #UD if VMXON is attempted with incompatible CR0/CR4") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Link: https://lore.kernel.org/r/20221006001956.329314-1-seanjc@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/nested.c | 44 +++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 5b0d4859e4b7..3539ca650fb0 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5100,24 +5100,35 @@ static int handle_vmxon(struct kvm_vcpu *vcpu) | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; /* - * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks - * that have higher priority than VM-Exit (see Intel SDM's pseudocode - * for VMXON), as KVM must load valid CR0/CR4 values into hardware while - * running the guest, i.e. KVM needs to check the _guest_ values. + * Manually check CR4.VMXE checks, KVM must force CR4.VMXE=1 to enter + * the guest and so cannot rely on hardware to perform the check, + * which has higher priority than VM-Exit (see Intel SDM's pseudocode + * for VMXON). * - * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and - * !COMPATIBILITY modes. KVM may run the guest in VM86 to emulate Real - * Mode, but KVM will never take the guest out of those modes. + * Rely on hardware for the other pre-VM-Exit checks, CR0.PE=1, !VM86 + * and !COMPATIBILITY modes. For an unrestricted guest, KVM doesn't + * force any of the relevant guest state. For a restricted guest, KVM + * does force CR0.PE=1, but only to also force VM86 in order to emulate + * Real Mode, and so there's no need to check CR0.PE manually. */ - if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) || - !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) { + if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } /* - * CPL=0 and all other checks that are lower priority than VM-Exit must - * be checked manually. + * The CPL is checked for "not in VMX operation" and for "in VMX root", + * and has higher priority than the VM-Fail due to being post-VMXON, + * i.e. VMXON #GPs outside of VMX non-root if CPL!=0. In VMX non-root, + * VMXON causes VM-Exit and KVM unconditionally forwards VMXON VM-Exits + * from L2 to L1, i.e. there's no need to check for the vCPU being in + * VMX non-root. + * + * Forwarding the VM-Exit unconditionally, i.e. without performing the + * #UD checks (see above), is functionally ok because KVM doesn't allow + * L1 to run L2 without CR4.VMXE=0, and because KVM never modifies L2's + * CR0 or CR4, i.e. it's L2's responsibility to emulate #UDs that are + * missed by hardware due to shadowing CR0 and/or CR4. */ if (vmx_get_cpl(vcpu)) { kvm_inject_gp(vcpu, 0); @@ -5127,6 +5138,17 @@ static int handle_vmxon(struct kvm_vcpu *vcpu) if (vmx->nested.vmxon) return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); + /* + * Invalid CR0/CR4 generates #GP. These checks are performed if and + * only if the vCPU isn't already in VMX operation, i.e. effectively + * have lower priority than the VM-Fail above. + */ + if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) || + !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) { + kvm_inject_gp(vcpu, 0); + return 1; + } + if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES) != VMXON_NEEDED_FEATURES) { kvm_inject_gp(vcpu, 0); From 59cc9627be2577d1701a2d1248c38c6ca9e46323 Mon Sep 17 00:00:00 2001 From: Yuan ZhaoXiong Date: Fri, 2 Dec 2022 20:36:14 +0800 Subject: [PATCH 084/207] KVM: x86: fix APICv/x2AVIC disabled when vm reboot by itself commit ef40757743b47cc95de9b4ed41525c94f8dc73d9 upstream. When a VM reboots itself, the reset process will result in an ioctl(KVM_SET_LAPIC, ...) to disable x2APIC mode and set the xAPIC id of the vCPU to its default value, which is the vCPU id. That will be handled in KVM as follows: kvm_vcpu_ioctl_set_lapic kvm_apic_set_state kvm_lapic_set_base => disable X2APIC mode kvm_apic_state_fixup kvm_lapic_xapic_id_updated kvm_xapic_id(apic) != apic->vcpu->vcpu_id kvm_set_apicv_inhibit(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)) => update APIC_ID When kvm_apic_set_state invokes kvm_lapic_set_base to disable x2APIC mode, the old 32-bit x2APIC id is still present rather than the 8-bit xAPIC id. kvm_lapic_xapic_id_updated will set the APICV_INHIBIT_REASON_APIC_ID_MODIFIED bit and disable APICv/x2AVIC. Instead, kvm_lapic_xapic_id_updated must be called after APIC_ID is changed. In fact, this fixes another small issue in the code in that potential changes to a vCPU's xAPIC ID need not be tracked for KVM_GET_LAPIC. Fixes: 3743c2f02517 ("KVM: x86: inhibit APICv/AVIC on changes to APIC ID or APIC base") Signed-off-by: Yuan ZhaoXiong Message-Id: <1669984574-32692-1-git-send-email-yuanzhaoxiong@baidu.com> Cc: stable@vger.kernel.org Reported-by: Alejandro Jimenez Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d7639d126e6c..bf5ce862c4da 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2722,8 +2722,6 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); } - } else { - kvm_lapic_xapic_id_updated(vcpu->arch.apic); } return 0; @@ -2759,6 +2757,9 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) } memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); + if (!apic_x2apic_mode(apic)) + kvm_lapic_xapic_id_updated(apic); + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); kvm_recalculate_apic_map(vcpu->kvm); kvm_apic_set_version(vcpu); From 891a644b0cbdef21ffa3caecfc8e4f4a543dcdfa Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Dec 2022 06:23:03 +0000 Subject: [PATCH 085/207] KVM: nVMX: Properly expose ENABLE_USR_WAIT_PAUSE control to L1 commit 31de69f4eea77b28a9724b3fa55aae104fc91fc7 upstream. Set ENABLE_USR_WAIT_PAUSE in KVM's supported VMX MSR configuration if the feature is supported in hardware and enabled in KVM's base, non-nested configuration, i.e. expose ENABLE_USR_WAIT_PAUSE to L1 if it's supported. This fixes a bug where saving/restoring, i.e. migrating, a vCPU will fail if WAITPKG (the associated CPUID feature) is enabled for the vCPU, and obviously allows L1 to enable the feature for L2. KVM already effectively exposes ENABLE_USR_WAIT_PAUSE to L1 by stuffing the allowed-1 control ina vCPU's virtual MSR_IA32_VMX_PROCBASED_CTLS2 when updating secondary controls in response to KVM_SET_CPUID(2), but (a) that depends on flawed code (KVM shouldn't touch VMX MSRs in response to CPUID updates) and (b) runs afoul of vmx_restore_control_msr()'s restriction that the guest value must be a strict subset of the supported host value. Although no past commit explicitly enabled nested support for WAITPKG, doing so is safe and functionally correct from an architectural perspective as no additional KVM support is needed to virtualize TPAUSE, UMONITOR, and UMWAIT for L2 relative to L1, and KVM already forwards VM-Exits to L1 as necessary (commit bf653b78f960, "KVM: vmx: Introduce handle_unexpected_vmexit and handle WAITPKG vmexit"). Note, KVM always keeps the hosts MSR_IA32_UMWAIT_CONTROL resident in hardware, i.e. always runs both L1 and L2 with the host's power management settings for TPAUSE and UMWAIT. See commit bf09fb6cba4f ("KVM: VMX: Stop context switching MSR_IA32_UMWAIT_CONTROL") for more details. Fixes: e69e72faa3a0 ("KVM: x86: Add support for user wait instructions") Cc: stable@vger.kernel.org Reported-by: Aaron Lewis Reported-by: Yu Zhang Signed-off-by: Sean Christopherson Reviewed-by: Jim Mattson Message-Id: <20221213062306.667649-2-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/nested.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 3539ca650fb0..10c63b1bf92f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6830,7 +6830,8 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps) SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_RDSEED_EXITING | SECONDARY_EXEC_XSAVES | - SECONDARY_EXEC_TSC_SCALING; + SECONDARY_EXEC_TSC_SCALING | + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; /* * We can emulate "VMCS shadowing," even if the hardware From db10ca17bbe2294452995b863100078a55bc6229 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Tue, 29 Nov 2022 13:08:27 -0800 Subject: [PATCH 086/207] x86/microcode/intel: Do not retry microcode reloading on the APs commit be1b670f61443aa5d0d01782e9b8ea0ee825d018 upstream. The retries in load_ucode_intel_ap() were in place to support systems with mixed steppings. Mixed steppings are no longer supported and there is only one microcode image at a time. Any retries will simply reattempt to apply the same image over and over without making progress. [ bp: Zap the circumstantial reasoning from the commit message. ] Fixes: 06b8534cb728 ("x86/microcode: Rework microcode loading") Signed-off-by: Ashok Raj Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221129210832.107850-3-ashok.raj@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/intel.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 1fcbd671f1df..048e38ec99e7 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -621,7 +621,6 @@ void load_ucode_intel_ap(void) else iup = &intel_ucode_patch; -reget: if (!*iup) { patch = __load_ucode_intel(&uci); if (!patch) @@ -632,12 +631,7 @@ reget: uci.mc = *iup; - if (apply_microcode_early(&uci, true)) { - /* Mixed-silicon system? Try to refetch the proper patch: */ - *iup = NULL; - - goto reget; - } + apply_microcode_early(&uci, true); } static struct microcode_intel *find_patch(struct ucode_cpu_info *uci) From 3dabe6c5f3e9444840033264671dce2e6d82ce88 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 9 Dec 2022 10:52:47 -0500 Subject: [PATCH 087/207] ftrace/x86: Add back ftrace_expected for ftrace bug reports commit fd3dc56253acbe9c641a66d312d8393cd55eb04c upstream. After someone reported a bug report with a failed modification due to the expected value not matching what was found, it came to my attention that the ftrace_expected is no longer set when that happens. This makes for debugging the issue a bit more difficult. Set ftrace_expected to the expected code before calling ftrace_bug, so that it shows what was expected and why it failed. Link: https://lore.kernel.org/all/CA+wXwBQ-VhK+hpBtYtyZP-NiX4g8fqRRWithFOHQW-0coQ3vLg@mail.gmail.com/ Link: https://lore.kernel.org/linux-trace-kernel/20221209105247.01d4e51d@gandalf.local.home Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: "x86@kernel.org" Cc: Borislav Petkov Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 768ae4406a5c ("x86/ftrace: Use text_poke()") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/ftrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index bd165004776d..e07234ec7e23 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -217,7 +217,9 @@ void ftrace_replace_code(int enable) ret = ftrace_verify_code(rec->ip, old); if (ret) { + ftrace_expected = old; ftrace_bug(ret, rec); + ftrace_expected = NULL; return; } } From 85932e3882e2f8c349cf4a9e5b6d027d5a1ab18d Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 19 Dec 2022 23:35:10 +0900 Subject: [PATCH 088/207] x86/kprobes: Fix kprobes instruction boudary check with CONFIG_RETHUNK commit 1993bf97992df2d560287f3c4120eda57426843d upstream. Since the CONFIG_RETHUNK and CONFIG_SLS will use INT3 for stopping speculative execution after RET instruction, kprobes always failes to check the probed instruction boundary by decoding the function body if the probed address is after such sequence. (Note that some conditional code blocks will be placed after function return, if compiler decides it is not on the hot path.) This is because kprobes expects kgdb puts the INT3 as a software breakpoint and it will replace the original instruction. But these INT3 are not such purpose, it doesn't need to recover the original instruction. To avoid this issue, kprobes checks whether the INT3 is owned by kgdb or not, and if so, stop decoding and make it fail. The other INT3 will come from CONFIG_RETHUNK/CONFIG_SLS and those can be treated as a one-byte instruction. Fixes: e463a09af2f0 ("x86: Add straight-line-speculation mitigation") Suggested-by: Peter Zijlstra Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/167146051026.1374301.392728975473572291.stgit@devnote3 Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index eb8bc82846b9..5be7f23099e1 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -281,12 +282,15 @@ static int can_probe(unsigned long paddr) if (ret < 0) return 0; +#ifdef CONFIG_KGDB /* - * Another debugging subsystem might insert this breakpoint. - * In that case, we can't recover it. + * If there is a dynamically installed kgdb sw breakpoint, + * this function should not be probed. */ - if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) + if (insn.opcode.bytes[0] == INT3_INSN_OPCODE && + kgdb_has_hit_break(addr)) return 0; +#endif addr += insn.length; } From c9449d762f0eebb4021137cb185e914b62d6c5d1 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 19 Dec 2022 23:35:19 +0900 Subject: [PATCH 089/207] x86/kprobes: Fix optprobe optimization check with CONFIG_RETHUNK commit 63dc6325ff41ee9e570bde705ac34a39c5dbeb44 upstream. Since the CONFIG_RETHUNK and CONFIG_SLS will use INT3 for stopping speculative execution after function return, kprobe jump optimization always fails on the functions with such INT3 inside the function body. (It already checks the INT3 padding between functions, but not inside the function) To avoid this issue, as same as kprobes, check whether the INT3 comes from kgdb or not, and if so, stop decoding and make it fail. The other INT3 will come from CONFIG_RETHUNK/CONFIG_SLS and those can be treated as a one-byte instruction. Fixes: e463a09af2f0 ("x86: Add straight-line-speculation mitigation") Suggested-by: Peter Zijlstra Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/167146051929.1374301.7419382929328081706.stgit@devnote3 Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/opt.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index e6b8c5362b94..e57e07b0edb6 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -279,19 +280,6 @@ static int insn_is_indirect_jump(struct insn *insn) return ret; } -static bool is_padding_int3(unsigned long addr, unsigned long eaddr) -{ - unsigned char ops; - - for (; addr < eaddr; addr++) { - if (get_kernel_nofault(ops, (void *)addr) < 0 || - ops != INT3_INSN_OPCODE) - return false; - } - - return true; -} - /* Decode whole function to ensure any instructions don't jump into target */ static int can_optimize(unsigned long paddr) { @@ -334,15 +322,15 @@ static int can_optimize(unsigned long paddr) ret = insn_decode_kernel(&insn, (void *)recovered_insn); if (ret < 0) return 0; - +#ifdef CONFIG_KGDB /* - * In the case of detecting unknown breakpoint, this could be - * a padding INT3 between functions. Let's check that all the - * rest of the bytes are also INT3. + * If there is a dynamically installed kgdb sw breakpoint, + * this function should not be probed. */ - if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) - return is_padding_int3(addr, paddr - offset + size) ? 1 : 0; - + if (insn.opcode.bytes[0] == INT3_INSN_OPCODE && + kgdb_has_hit_break(addr)) + return 0; +#endif /* Recover address */ insn.kaddr = (void *)addr; insn.next_byte = (void *)(addr + insn.length); From 43f7cd89b584078459a82955260d03eee60a72fd Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 17 Nov 2022 21:42:49 -0500 Subject: [PATCH 090/207] tracing: Fix race where eprobes can be called before the event commit d5f30a7da8ea8e6450250275cec5670cee3c4264 upstream. The flag that tells the event to call its triggers after reading the event is set for eprobes after the eprobe is enabled. This leads to a race where the eprobe may be triggered at the beginning of the event where the record information is NULL. The eprobe then dereferences the NULL record causing a NULL kernel pointer bug. Test for a NULL record to keep this from happening. Link: https://lore.kernel.org/linux-trace-kernel/20221116192552.1066630-1-rafaelmendsr@gmail.com/ Link: https://lore.kernel.org/all/20221117214249.2addbe10@gandalf.local.home/ Cc: stable@vger.kernel.org Fixes: 7491e2c442781 ("tracing: Add a probe that attaches to trace events") Reported-by: Rafael Mendonca Signed-off-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_eprobe.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 352b65e2b910..753fc536525d 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -564,6 +564,9 @@ static void eprobe_trigger_func(struct event_trigger_data *data, { struct eprobe_data *edata = data->private_data; + if (unlikely(!rec)) + return; + if (unlikely(!rec)) return; From 938791ad58a3d557dfd2f1eed984618f48849f46 Mon Sep 17 00:00:00 2001 From: Michael Jeanson Date: Thu, 1 Dec 2022 11:14:42 -0500 Subject: [PATCH 091/207] powerpc/ftrace: fix syscall tracing on PPC64_ELF_ABI_V1 commit ad050d2390fccb22aa3e6f65e11757ce7a5a7ca5 upstream. In v5.7 the powerpc syscall entry/exit logic was rewritten in C, on PPC64_ELF_ABI_V1 this resulted in the symbols in the syscall table changing from their dot prefixed variant to the non-prefixed ones. Since ftrace prefixes a dot to the syscall names when matching them to build its syscall event list, this resulted in no syscall events being available. Remove the PPC64_ELF_ABI_V1 specific version of arch_syscall_match_sym_name to have the same behavior across all powerpc variants. Fixes: 68b34588e202 ("powerpc/64/sycall: Implement syscall entry/exit logic in C") Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Michael Jeanson Reviewed-by: Mathieu Desnoyers Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201161442.2127231-1-mjeanson@efficios.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/ftrace.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 3cee7115441b..e3d1f377bc5b 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -64,17 +64,6 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, * those. */ #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME -#ifdef CONFIG_PPC64_ELF_ABI_V1 -static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) -{ - /* We need to skip past the initial dot, and the __se_sys alias */ - return !strcmp(sym + 1, name) || - (!strncmp(sym, ".__se_sys", 9) && !strcmp(sym + 6, name)) || - (!strncmp(sym, ".ppc_", 5) && !strcmp(sym + 5, name + 4)) || - (!strncmp(sym, ".ppc32_", 7) && !strcmp(sym + 7, name + 4)) || - (!strncmp(sym, ".ppc64_", 7) && !strcmp(sym + 7, name + 4)); -} -#else static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) { return !strcmp(sym, name) || @@ -83,7 +72,6 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name (!strncmp(sym, "ppc32_", 6) && !strcmp(sym + 6, name + 4)) || (!strncmp(sym, "ppc64_", 6) && !strcmp(sym + 6, name + 4)); } -#endif /* CONFIG_PPC64_ELF_ABI_V1 */ #endif /* CONFIG_FTRACE_SYSCALLS */ #if defined(CONFIG_PPC64) && defined(CONFIG_FUNCTION_TRACER) From cb8dce806598d1818ee371fa51696164559657ab Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 6 Dec 2022 23:18:01 +0900 Subject: [PATCH 092/207] tracing: Fix complicated dependency of CONFIG_TRACER_MAX_TRACE commit e25e43a4e5d8cb2323553d8b6a7ba08d2ebab21f upstream. Both CONFIG_OSNOISE_TRACER and CONFIG_HWLAT_TRACER partially enables the CONFIG_TRACER_MAX_TRACE code, but that is complicated and has introduced a bug; It declares tracing_max_lat_fops data structure outside of #ifdefs, but since it is defined only when CONFIG_TRACER_MAX_TRACE=y or CONFIG_HWLAT_TRACER=y, if only CONFIG_OSNOISE_TRACER=y, that declaration comes to a definition(!). To fix this issue, and do not repeat the similar problem, makes CONFIG_OSNOISE_TRACER and CONFIG_HWLAT_TRACER enables the CONFIG_TRACER_MAX_TRACE always. It has there benefits; - Fix the tracing_max_lat_fops bug - Simplify the #ifdefs - CONFIG_TRACER_MAX_TRACE code is fully enabled, or not. Link: https://lore.kernel.org/linux-trace-kernel/167033628155.4111793.12185405690820208159.stgit@devnote3 Fixes: 424b650f35c7 ("tracing: Fix missing osnoise tracer on max_latency") Cc: Daniel Bristot de Oliveira Cc: stable@vger.kernel.org Reported-by: David Howells Reported-by: kernel test robot Signed-off-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/all/166992525941.1716618.13740663757583361463.stgit@warthog.procyon.org.uk/ (original thread and v1) Link: https://lore.kernel.org/all/202212052253.VuhZ2ulJ-lkp@intel.com/T/#u (v1 error report) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/Kconfig | 2 ++ kernel/trace/trace.c | 23 +++++++++++++---------- kernel/trace/trace.h | 8 +++----- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e9e95c790b8e..93d724996283 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -375,6 +375,7 @@ config SCHED_TRACER config HWLAT_TRACER bool "Tracer to detect hardware latencies (like SMIs)" select GENERIC_TRACER + select TRACER_MAX_TRACE help This tracer, when enabled will create one or more kernel threads, depending on what the cpumask file is set to, which each thread @@ -410,6 +411,7 @@ config HWLAT_TRACER config OSNOISE_TRACER bool "OS Noise tracer" select GENERIC_TRACER + select TRACER_MAX_TRACE help In the context of high-performance computing (HPC), the Operating System Noise (osnoise) refers to the interference experienced by an diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5cfc95a52bc3..459ff8cf768a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1421,6 +1421,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr) return false; } EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); +#define free_snapshot(tr) do { } while (0) #endif /* CONFIG_TRACER_SNAPSHOT */ void tracer_tracing_off(struct trace_array *tr) @@ -1692,6 +1693,8 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) } unsigned long __read_mostly tracing_thresh; + +#ifdef CONFIG_TRACER_MAX_TRACE static const struct file_operations tracing_max_lat_fops; #ifdef LATENCY_FS_NOTIFY @@ -1748,18 +1751,14 @@ void latency_fsnotify(struct trace_array *tr) irq_work_queue(&tr->fsnotify_irqwork); } -#elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \ - || defined(CONFIG_OSNOISE_TRACER) +#else /* !LATENCY_FS_NOTIFY */ #define trace_create_maxlat_file(tr, d_tracer) \ trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \ d_tracer, &tr->max_latency, &tracing_max_lat_fops) -#else -#define trace_create_maxlat_file(tr, d_tracer) do { } while (0) #endif -#ifdef CONFIG_TRACER_MAX_TRACE /* * Copy the new maximum trace into the separate maximum-trace * structure. (this way the maximum trace is permanently saved, @@ -1834,14 +1833,15 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, ring_buffer_record_off(tr->max_buffer.buffer); #ifdef CONFIG_TRACER_SNAPSHOT - if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) - goto out_unlock; + if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { + arch_spin_unlock(&tr->max_lock); + return; + } #endif swap(tr->array_buffer.buffer, tr->max_buffer.buffer); __update_max_tr(tr, tsk, cpu); - out_unlock: arch_spin_unlock(&tr->max_lock); } @@ -1888,6 +1888,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) __update_max_tr(tr, tsk, cpu); arch_spin_unlock(&tr->max_lock); } + #endif /* CONFIG_TRACER_MAX_TRACE */ static int wait_on_pipe(struct trace_iterator *iter, int full) @@ -6572,7 +6573,7 @@ out: return ret; } -#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) +#ifdef CONFIG_TRACER_MAX_TRACE static ssize_t tracing_max_lat_read(struct file *filp, char __user *ubuf, @@ -7587,7 +7588,7 @@ static const struct file_operations tracing_thresh_fops = { .llseek = generic_file_llseek, }; -#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) +#ifdef CONFIG_TRACER_MAX_TRACE static const struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, @@ -9601,7 +9602,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) create_trace_options_dir(tr); +#ifdef CONFIG_TRACER_MAX_TRACE trace_create_maxlat_file(tr, d_tracer); +#endif if (ftrace_create_function_files(tr, d_tracer)) MEM_FAIL(1, "Could not allocate function filter files"); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d42e24507152..8b69698780a1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -308,8 +308,7 @@ struct trace_array { struct array_buffer max_buffer; bool allocated_snapshot; #endif -#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \ - || defined(CONFIG_OSNOISE_TRACER) +#ifdef CONFIG_TRACER_MAX_TRACE unsigned long max_latency; #ifdef CONFIG_FSNOTIFY struct dentry *d_max_latency; @@ -688,12 +687,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, void *cond_data); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); -#endif /* CONFIG_TRACER_MAX_TRACE */ -#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \ - || defined(CONFIG_OSNOISE_TRACER)) && defined(CONFIG_FSNOTIFY) +#ifdef CONFIG_FSNOTIFY #define LATENCY_FS_NOTIFY #endif +#endif /* CONFIG_TRACER_MAX_TRACE */ #ifdef LATENCY_FS_NOTIFY void latency_fsnotify(struct trace_array *tr); From eac8938856950cc4cdccb94a7001b3aab32a27e9 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Wed, 7 Dec 2022 11:46:35 +0800 Subject: [PATCH 093/207] tracing/hist: Fix wrong return value in parse_action_params() commit 2cc6a528882d0e0ccbc1bca5f95b8c963cedac54 upstream. When number of synth fields is more than SYNTH_FIELDS_MAX, parse_action_params() should return -EINVAL. Link: https://lore.kernel.org/linux-trace-kernel/20221207034635.2253990-1-zhengyejian1@huawei.com Cc: Cc: Cc: stable@vger.kernel.org Fixes: c282a386a397 ("tracing: Add 'onmatch' hist trigger action support") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_hist.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index b97412053c0a..c6e406995c11 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -3588,6 +3588,7 @@ static int parse_action_params(struct trace_array *tr, char *params, while (params) { if (data->n_params >= SYNTH_FIELDS_MAX) { hist_err(tr, HIST_ERR_TOO_MANY_PARAMS, 0); + ret = -EINVAL; goto out; } From 5d067ad7e2e2f90627f3742bc5cb28792b0c8b66 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 22 Nov 2022 12:23:45 -0500 Subject: [PATCH 094/207] tracing/probes: Handle system names with hyphens commit 575b76cb885532aae13a9d979fd476bb2b156cb9 upstream. When creating probe names, a check is done to make sure it matches basic C standard variable naming standards. Basically, starts with alphabetic or underline, and then the rest of the characters have alpha-numeric or underline in them. But system names do not have any true naming conventions, as they are created by the TRACE_SYSTEM macro and nothing tests to see what they are. The "xhci-hcd" trace events has a '-' in the system name. When trying to attach a eprobe to one of these trace points, it fails because the system name does not follow the variable naming convention because of the hyphen, and the eprobe checks fail on this. Allow hyphens in the system name so that eprobes can attach to the "xhci-hcd" trace events. Link: https://lore.kernel.org/all/Y3eJ8GiGnEvVd8%2FN@macondo/ Link: https://lore.kernel.org/linux-trace-kernel/20221122122345.160f5077@gandalf.local.home Cc: Masami Hiramatsu Cc: stable@vger.kernel.org Fixes: 5b7a96220900e ("tracing/probe: Check event/group naming rule at parsing") Reported-by: Rafael Mendonca Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.h | 19 ++++++++++++++++--- kernel/trace/trace_probe.c | 2 +- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8b69698780a1..5581754d9762 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1954,17 +1954,30 @@ static __always_inline void trace_iterator_reset(struct trace_iterator *iter) } /* Check the name is good for event/group/fields */ -static inline bool is_good_name(const char *name) +static inline bool __is_good_name(const char *name, bool hash_ok) { - if (!isalpha(*name) && *name != '_') + if (!isalpha(*name) && *name != '_' && (!hash_ok || *name != '-')) return false; while (*++name != '\0') { - if (!isalpha(*name) && !isdigit(*name) && *name != '_') + if (!isalpha(*name) && !isdigit(*name) && *name != '_' && + (!hash_ok || *name != '-')) return false; } return true; } +/* Check the name is good for event/group/fields */ +static inline bool is_good_name(const char *name) +{ + return __is_good_name(name, false); +} + +/* Check the name is good for system */ +static inline bool is_good_system_name(const char *name) +{ + return __is_good_name(name, true); +} + /* Convert certain expected symbols into '_' when generating event names */ static inline void sanitize_event_name(char *name) { diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 36dff277de46..bb2f95d7175c 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -246,7 +246,7 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, return -EINVAL; } strlcpy(buf, event, slash - event + 1); - if (!is_good_name(buf)) { + if (!is_good_system_name(buf)) { trace_probe_log_err(offset, BAD_GROUP_NAME); return -EINVAL; } From 2d31d84d4eb8a7c6fcf7952fa780bf918555b757 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Wed, 7 Dec 2022 17:15:57 +0800 Subject: [PATCH 095/207] tracing: Fix issue of missing one synthetic field commit ff4837f7fe59ff018eca4705a70eca5e0b486b97 upstream. The maximum number of synthetic fields supported is defined as SYNTH_FIELDS_MAX which value currently is 64, but it actually fails when try to generate a synthetic event with 64 fields by executing like: # echo "my_synth_event int v1; int v2; int v3; int v4; int v5; int v6;\ int v7; int v8; int v9; int v10; int v11; int v12; int v13; int v14;\ int v15; int v16; int v17; int v18; int v19; int v20; int v21; int v22;\ int v23; int v24; int v25; int v26; int v27; int v28; int v29; int v30;\ int v31; int v32; int v33; int v34; int v35; int v36; int v37; int v38;\ int v39; int v40; int v41; int v42; int v43; int v44; int v45; int v46;\ int v47; int v48; int v49; int v50; int v51; int v52; int v53; int v54;\ int v55; int v56; int v57; int v58; int v59; int v60; int v61; int v62;\ int v63; int v64" >> /sys/kernel/tracing/synthetic_events Correct the field counting to fix it. Link: https://lore.kernel.org/linux-trace-kernel/20221207091557.3137904-1-zhengyejian1@huawei.com Cc: Cc: Cc: stable@vger.kernel.org Fixes: c9e759b1e845 ("tracing: Rework synthetic event command parsing") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_synth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index c3b582d19b62..67592eed0be8 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1282,12 +1282,12 @@ static int __create_synth_event(const char *name, const char *raw_fields) goto err_free_arg; } - fields[n_fields++] = field; if (n_fields == SYNTH_FIELDS_MAX) { synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0); ret = -EINVAL; goto err_free_arg; } + fields[n_fields++] = field; n_fields_this_loop++; } From 26c57afef2b6ee9e3d7e3654f9fab277bbb0403b Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 29 Nov 2022 19:30:09 +0800 Subject: [PATCH 096/207] tracing: Fix infinite loop in tracing_read_pipe on overflowed print_trace_line commit c1ac03af6ed45d05786c219d102f37eb44880f28 upstream. print_trace_line may overflow seq_file buffer. If the event is not consumed, the while loop keeps peeking this event, causing a infinite loop. Link: https://lkml.kernel.org/r/20221129113009.182425-1-yangjihong1@huawei.com Cc: Masami Hiramatsu Cc: stable@vger.kernel.org Fixes: 088b1e427dbba ("ftrace: pipe fixes") Signed-off-by: Yang Jihong Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 459ff8cf768a..3076af8dbf32 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6797,7 +6797,20 @@ waitagain: ret = print_trace_line(iter); if (ret == TRACE_TYPE_PARTIAL_LINE) { - /* don't print partial lines */ + /* + * If one print_trace_line() fills entire trace_seq in one shot, + * trace_seq_to_user() will returns -EBUSY because save_len == 0, + * In this case, we need to consume it, otherwise, loop will peek + * this event next time, resulting in an infinite loop. + */ + if (save_len == 0) { + iter->seq.full = 0; + trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); + trace_consume(iter); + break; + } + + /* In other cases, don't print partial lines */ iter->seq.seq.len = save_len; break; } From 1fae2197adbebaa1ee95bfc316a0ac33c678c37f Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 2 Nov 2022 12:01:01 +0100 Subject: [PATCH 097/207] staging: media: tegra-video: fix chan->mipi value on error commit 10b5ce6743c839fa75336042c64e2479caec9430 upstream. chan->mipi takes the return value of tegra_mipi_request() which can be a valid pointer or an error. However chan->mipi is checked in several places, including error-cleanup code in tegra_csi_channels_cleanup(), as 'if (chan->mipi)', which suggests the initial intent was that chan->mipi should be either NULL or a valid pointer, never an error. As a consequence, cleanup code in case of tegra_mipi_request() errors would dereference an invalid pointer. Fix by ensuring chan->mipi always contains either NULL or a void pointer. Also add that to the documentation. Fixes: 523c857e34ce ("media: tegra-video: Add CSI MIPI pads calibration") Cc: stable@vger.kernel.org Reported-by: Dan Carpenter Signed-off-by: Luca Ceresoli Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/tegra-video/csi.c | 1 + drivers/staging/media/tegra-video/csi.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/media/tegra-video/csi.c b/drivers/staging/media/tegra-video/csi.c index b26e44adb2be..6b59ef55c525 100644 --- a/drivers/staging/media/tegra-video/csi.c +++ b/drivers/staging/media/tegra-video/csi.c @@ -448,6 +448,7 @@ static int tegra_csi_channel_alloc(struct tegra_csi *csi, chan->mipi = tegra_mipi_request(csi->dev, node); if (IS_ERR(chan->mipi)) { ret = PTR_ERR(chan->mipi); + chan->mipi = NULL; dev_err(csi->dev, "failed to get mipi device: %d\n", ret); } diff --git a/drivers/staging/media/tegra-video/csi.h b/drivers/staging/media/tegra-video/csi.h index 4ee05a1785cf..6960ea2e3d36 100644 --- a/drivers/staging/media/tegra-video/csi.h +++ b/drivers/staging/media/tegra-video/csi.h @@ -56,7 +56,7 @@ struct tegra_csi; * @framerate: active framerate for TPG * @h_blank: horizontal blanking for TPG active format * @v_blank: vertical blanking for TPG active format - * @mipi: mipi device for corresponding csi channel pads + * @mipi: mipi device for corresponding csi channel pads, or NULL if not applicable (TPG, error) * @pixel_rate: active pixel rate from the sensor on this channel */ struct tegra_csi_channel { From 0fd003d3c708c80350a815eaf37b8e1114b976cf Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 2 Nov 2022 12:01:02 +0100 Subject: [PATCH 098/207] staging: media: tegra-video: fix device_node use after free commit c4d344163c3a7f90712525f931a6c016bbb35e18 upstream. At probe time this code path is followed: * tegra_csi_init * tegra_csi_channels_alloc * for_each_child_of_node(node, channel) -- iterates over channels * automatically gets 'channel' * tegra_csi_channel_alloc() * saves into chan->of_node a pointer to the channel OF node * automatically gets and puts 'channel' * now the node saved in chan->of_node has refcount 0, can disappear * tegra_csi_channels_init * iterates over channels * tegra_csi_channel_init -- uses chan->of_node After that, chan->of_node keeps storing the node until the device is removed. of_node_get() the node and of_node_put() it during teardown to avoid any risk. Fixes: 1ebaeb09830f ("media: tegra-video: Add support for external sensor capture") Cc: stable@vger.kernel.org Cc: Sowjanya Komatineni Signed-off-by: Luca Ceresoli Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/tegra-video/csi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/media/tegra-video/csi.c b/drivers/staging/media/tegra-video/csi.c index 6b59ef55c525..426e653bd55d 100644 --- a/drivers/staging/media/tegra-video/csi.c +++ b/drivers/staging/media/tegra-video/csi.c @@ -433,7 +433,7 @@ static int tegra_csi_channel_alloc(struct tegra_csi *csi, for (i = 0; i < chan->numgangports; i++) chan->csi_port_nums[i] = port_num + i * CSI_PORTS_PER_BRICK; - chan->of_node = node; + chan->of_node = of_node_get(node); chan->numpads = num_pads; if (num_pads & 0x2) { chan->pads[0].flags = MEDIA_PAD_FL_SINK; @@ -641,6 +641,7 @@ static void tegra_csi_channels_cleanup(struct tegra_csi *csi) media_entity_cleanup(&subdev->entity); } + of_node_put(chan->of_node); list_del(&chan->list); kfree(chan); } From 2833221e0b0f7a7b3e82e6a68f0407f1808ffc44 Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Fri, 11 Nov 2022 17:55:40 +0800 Subject: [PATCH 099/207] arm64: dts: mediatek: mt8195-demo: fix the memory size of node secmon commit e4a4175201014c0222f6bab1895a17b3d1b92f08 upstream. The size of device tree node secmon (bl31_secmon_reserved) was incorrect. It should be increased to 2MiB (0x200000). The origin setting will cause some abnormal behavior due to trusted-firmware-a and related firmware didn't load correctly. The incorrect behavior may vary because of different software stacks. For example, it will cause build error in some Yocto project because it will check if there was enough memory to load trusted-firmware-a to the reserved memory. When mt8195-demo.dts sent to the upstream, at that time the size of BL31 was small. Because supported functions and modules in BL31 are basic sets when the board was under early development stage. Now BL31 includes more firmwares of coprocessors and maturer functions so the size has grown bigger in real applications. According to the value reported by customers, we think reserved 2MiB for BL31 might be enough for maybe the following 2 or 3 years. Cc: stable@vger.kernel.org # v5.19 Fixes: 6147314aeedc ("arm64: dts: mediatek: Add device-tree for MT8195 Demo board") Signed-off-by: Macpaul Lin Reviewed-by: Miles Chen Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20221111095540.28881-1-macpaul.lin@mediatek.com Signed-off-by: Matthias Brugger Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt8195-demo.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts index 4fbd99eb496a..dec85d254838 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts @@ -56,10 +56,10 @@ #size-cells = <2>; ranges; - /* 192 KiB reserved for ARM Trusted Firmware (BL31) */ + /* 2 MiB reserved for ARM Trusted Firmware (BL31) */ bl31_secmon_reserved: secmon@54600000 { no-map; - reg = <0 0x54600000 0x0 0x30000>; + reg = <0 0x54600000 0x0 0x200000>; }; /* 12 MiB reserved for OP-TEE (BL32) From 94d23f8b78400a7f04da6d77abc294dd56daaea0 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 11 Oct 2022 20:00:12 +0100 Subject: [PATCH 100/207] ARM: 9256/1: NWFPE: avoid compiler-generated __aeabi_uldivmod commit 3220022038b9a3845eea762af85f1c5694b9f861 upstream. clang-15's ability to elide loops completely became more aggressive when it can deduce how a variable is being updated in a loop. Counting down one variable by an increment of another can be replaced by a modulo operation. For 64b variables on 32b ARM EABI targets, this can result in the compiler generating calls to __aeabi_uldivmod, which it does for a do while loop in float64_rem(). For the kernel, we'd generally prefer that developers not open code 64b division via binary / operators and instead use the more explicit helpers from div64.h. On arm-linux-gnuabi targets, failure to do so can result in linkage failures due to undefined references to __aeabi_uldivmod(). While developers can avoid open coding divisions on 64b variables, the compiler doesn't know that the Linux kernel has a partial implementation of a compiler runtime (--rtlib) to enforce this convention. It's also undecidable for the compiler whether the code in question would be faster to execute the loop vs elide it and do the 64b division. While I actively avoid using the internal -mllvm command line flags, I think we get better code than using barrier() here, which will force reloads+spills in the loop for all toolchains. Link: https://github.com/ClangBuiltLinux/linux/issues/1666 Reported-by: Nathan Chancellor Reviewed-by: Arnd Bergmann Signed-off-by: Nick Desaulniers Tested-by: Nathan Chancellor Cc: stable@vger.kernel.org Signed-off-by: Russell King (Oracle) Signed-off-by: Greg Kroah-Hartman --- arch/arm/nwfpe/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/nwfpe/Makefile b/arch/arm/nwfpe/Makefile index 303400fa2cdf..2aec85ab1e8b 100644 --- a/arch/arm/nwfpe/Makefile +++ b/arch/arm/nwfpe/Makefile @@ -11,3 +11,9 @@ nwfpe-y += fpa11.o fpa11_cpdo.o fpa11_cpdt.o \ entry.o nwfpe-$(CONFIG_FPE_NWFPE_XP) += extended_cpdo.o + +# Try really hard to avoid generating calls to __aeabi_uldivmod() from +# float64_rem() due to loop elision. +ifdef CONFIG_CC_IS_CLANG +CFLAGS_softfloat.o += -mllvm -replexitval=never +endif From 123eddf92a114e03919942641d2c2b1f4ca56ea6 Mon Sep 17 00:00:00 2001 From: Keita Suzuki Date: Tue, 26 Apr 2022 06:29:19 +0100 Subject: [PATCH 101/207] media: dvb-core: Fix double free in dvb_register_device() commit 6b0d0477fce747d4137aa65856318b55fba72198 upstream. In function dvb_register_device() -> dvb_register_media_device() -> dvb_create_media_entity(), dvb->entity is allocated and initialized. If the initialization fails, it frees the dvb->entity, and return an error code. The caller takes the error code and handles the error by calling dvb_media_device_free(), which unregisters the entity and frees the field again if it is not NULL. As dvb->entity may not NULLed in dvb_create_media_entity() when the allocation of dvbdev->pad fails, a double free may occur. This may also cause an Use After free in media_device_unregister_entity(). Fix this by storing NULL to dvb->entity when it is freed. Link: https://lore.kernel.org/linux-media/20220426052921.2088416-1-keitasuzuki.park@sslab.ics.keio.ac.jp Fixes: fcd5ce4b3936 ("media: dvb-core: fix a memory leak bug") Cc: stable@vger.kernel.org Cc: Wenwen Wang Signed-off-by: Keita Suzuki Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-core/dvbdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c index 9934728734af..a31d52cb6d62 100644 --- a/drivers/media/dvb-core/dvbdev.c +++ b/drivers/media/dvb-core/dvbdev.c @@ -335,6 +335,7 @@ static int dvb_create_media_entity(struct dvb_device *dvbdev, GFP_KERNEL); if (!dvbdev->pads) { kfree(dvbdev->entity); + dvbdev->entity = NULL; return -ENOMEM; } } From 530ca64b44625f7d39eb1d5efb6f9ff21da991e2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 31 Oct 2022 11:02:45 +0100 Subject: [PATCH 102/207] media: dvb-core: Fix UAF due to refcount races at releasing commit fd3d91ab1c6ab0628fe642dd570b56302c30a792 upstream. The dvb-core tries to sync the releases of opened files at dvb_dmxdev_release() with two refcounts: dvbdev->users and dvr_dvbdev->users. A problem is present in those two syncs: when yet another dvb_demux_open() is called during those sync waits, dvb_demux_open() continues to process even if the device is being closed. This includes the increment of the former refcount, resulting in the leftover refcount after the sync of the latter refcount at dvb_dmxdev_release(). It ends up with use-after-free, since the function believes that all usages were gone and releases the resources. This patch addresses the problem by adding the check of dmxdev->exit flag at dvb_demux_open(), just like dvb_dvr_open() already does. With the exit flag check, the second call of dvb_demux_open() fails, hence the further corruption can be avoided. Also for avoiding the races of the dmxdev->exit flag reference, this patch serializes the dmxdev->exit set up and the sync waits with the dmxdev->mutex lock at dvb_dmxdev_release(). Without the mutex lock, dvb_demux_open() (or dvb_dvr_open()) may run concurrently with dvb_dmxdev_release(), which allows to skip the exit flag check and continue the open process that is being closed. CVE-2022-41218 is assigned to those bugs above. Reported-by: Hyunwoo Kim Cc: Link: https://lore.kernel.org/20220908132754.30532-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-core/dmxdev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index f6ee678107d3..9ce5f010de3f 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -790,6 +790,11 @@ static int dvb_demux_open(struct inode *inode, struct file *file) if (mutex_lock_interruptible(&dmxdev->mutex)) return -ERESTARTSYS; + if (dmxdev->exit) { + mutex_unlock(&dmxdev->mutex); + return -ENODEV; + } + for (i = 0; i < dmxdev->filternum; i++) if (dmxdev->filter[i].state == DMXDEV_STATE_FREE) break; @@ -1448,7 +1453,10 @@ EXPORT_SYMBOL(dvb_dmxdev_init); void dvb_dmxdev_release(struct dmxdev *dmxdev) { + mutex_lock(&dmxdev->mutex); dmxdev->exit = 1; + mutex_unlock(&dmxdev->mutex); + if (dmxdev->dvbdev->users > 1) { wait_event(dmxdev->dvbdev->wait_queue, dmxdev->dvbdev->users == 1); From e9f7a3bbaa5c0bc1c9dab5bf3ea5f2802034e50b Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 16 Dec 2022 22:03:41 -0300 Subject: [PATCH 103/207] cifs: fix confusing debug message commit a85ceafd41927e41a4103d228a993df7edd8823b upstream. Since rc was initialised to -ENOMEM in cifs_get_smb_ses(), when an existing smb session was found, free_xid() would be called and then print CIFS: fs/cifs/connect.c: Existing tcp session with server found CIFS: fs/cifs/connect.c: VFS: in cifs_get_smb_ses as Xid: 44 with uid: 0 CIFS: fs/cifs/connect.c: Existing smb sess found (status=1) CIFS: fs/cifs/connect.c: VFS: leaving cifs_get_smb_ses (xid = 44) rc = -12 Fix this by initialising rc to 0 and then let free_xid() print this instead CIFS: fs/cifs/connect.c: Existing tcp session with server found CIFS: fs/cifs/connect.c: VFS: in cifs_get_smb_ses as Xid: 14 with uid: 0 CIFS: fs/cifs/connect.c: Existing smb sess found (status=1) CIFS: fs/cifs/connect.c: VFS: leaving cifs_get_smb_ses (xid = 14) rc = 0 Signed-off-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/connect.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 9db9527c61cf..ede655d62d74 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2157,7 +2157,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx __attribute__((unused)), struct cifs_ses * cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) { - int rc = -ENOMEM; + int rc = 0; unsigned int xid; struct cifs_ses *ses; struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; @@ -2206,6 +2206,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) return ses; } + rc = -ENOMEM; + cifs_dbg(FYI, "Existing smb sess not found\n"); ses = sesInfoAlloc(); if (ses == NULL) From 66d65a6fba330ac8152b4f4d5f897c63676ba675 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 11 Dec 2022 13:54:21 -0600 Subject: [PATCH 104/207] cifs: fix missing display of three mount options commit 2bfd81043e944af0e52835ef6d9b41795af22341 upstream. Three mount options: "tcpnodelay" and "noautotune" and "noblocksend" were not displayed when passed in on cifs/smb3 mounts (e.g. displayed in /proc/mounts e.g.). No change to defaults so these are not displayed if not specified on mount. Cc: stable@vger.kernel.org Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsfs.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 712a43161448..6094cb2ff099 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -678,9 +678,15 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",echo_interval=%lu", tcon->ses->server->echo_interval / HZ); - /* Only display max_credits if it was overridden on mount */ + /* Only display the following if overridden on mount */ if (tcon->ses->server->max_credits != SMB2_MAX_CREDITS_AVAILABLE) seq_printf(s, ",max_credits=%u", tcon->ses->server->max_credits); + if (tcon->ses->server->tcp_nodelay) + seq_puts(s, ",tcpnodelay"); + if (tcon->ses->server->noautotune) + seq_puts(s, ",noautotune"); + if (tcon->ses->server->noblocksnd) + seq_puts(s, ",noblocksend"); if (tcon->snapshot_time) seq_printf(s, ",snapshot=%llu", tcon->snapshot_time); From 2d8cbc14707a79121fa019f8b570e181075f0d56 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 13 Dec 2022 09:15:23 -0300 Subject: [PATCH 105/207] cifs: set correct tcon status after initial tree connect commit b248586a49a7729f73c504b1e7b958caea45e927 upstream. cifs_tcon::status wasn't correctly updated to TID_GOOD after initial tree connect thus staying at TID_NEW as long as it was connected. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/connect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ede655d62d74..191fc133f93a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2602,6 +2602,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) tcon->nodelete = ctx->nodelete; tcon->local_lease = ctx->local_lease; INIT_LIST_HEAD(&tcon->pending_opens); + tcon->status = TID_GOOD; /* schedule query interfaces poll */ INIT_DELAYED_WORK(&tcon->query_interfaces, From a37d718a0e0b1d3417c3d0098399266cb6d02ef8 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 16 Dec 2022 14:00:19 -0300 Subject: [PATCH 106/207] cifs: set correct ipc status after initial tree connect commit 86fe0fa8747fb1bc4cc44fc1966e0959fe752f38 upstream. cifs_tcon::status wasn't correctly updated to TID_GOOD after establishing initial IPC connection thus staying at TID_NEW as long as it wasn't reconnected. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/connect.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 191fc133f93a..0d0e1735aa00 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1871,6 +1871,9 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) cifs_dbg(FYI, "IPC tcon rc=%d ipc tid=0x%x\n", rc, tcon->tid); + spin_lock(&tcon->tc_lock); + tcon->status = TID_GOOD; + spin_unlock(&tcon->tc_lock); ses->tcon_ipc = tcon; out: return rc; @@ -2280,10 +2283,10 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) list_add(&ses->smb_ses_list, &server->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); - free_xid(xid); - cifs_setup_ipc(ses, ctx); + free_xid(xid); + return ses; get_ses_fail: From f22532d6bbafc6ef3599b1f71b81e056406d5c51 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Sat, 17 Dec 2022 21:04:14 -0300 Subject: [PATCH 107/207] cifs: set correct status of tcon ipc when reconnecting commit 25cf01b7c9200d6ace5a59125d8166435dd9dea7 upstream. The status of tcon ipcs were not being set to TID_NEED_RECO when marking sessions and tcons to be reconnected, therefore not sending tree connect to those ipcs in cifs_tree_connect() and leaving them disconnected. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/connect.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0d0e1735aa00..7e7f712f97fd 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -279,8 +279,10 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, tcon->need_reconnect = true; tcon->status = TID_NEED_RECON; } - if (ses->tcon_ipc) + if (ses->tcon_ipc) { ses->tcon_ipc->need_reconnect = true; + ses->tcon_ipc->status = TID_NEED_RECON; + } next_session: spin_unlock(&ses->chan_lock); From 135b9ed45347ef22fd05e15a891d23c9b33d72f9 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Wed, 14 Dec 2022 10:51:18 +0000 Subject: [PATCH 108/207] ravb: Fix "failed to switch device to config mode" message during unbind commit c72a7e42592b2e18d862cf120876070947000d7a upstream. This patch fixes the error "ravb 11c20000.ethernet eth0: failed to switch device to config mode" during unbind. We are doing register access after pm_runtime_put_sync(). We usually do cleanup in reverse order of init. Currently in remove(), the "pm_runtime_put_sync" is not in reverse order. Probe reset_control_deassert(rstc); pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); remove pm_runtime_put_sync(&pdev->dev); unregister_netdev(ndev); .. ravb_mdio_release(priv); pm_runtime_disable(&pdev->dev); Consider the call to unregister_netdev() unregister_netdev->unregister_netdevice_queue->rollback_registered_many that calls the below functions which access the registers after pm_runtime_put_sync() 1) ravb_get_stats 2) ravb_close Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Cc: stable@vger.kernel.org Signed-off-by: Biju Das Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20221214105118.2495313-1-biju.das.jz@bp.renesas.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/ravb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 33f723a9f471..b4e0fc7f65bd 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2903,12 +2903,12 @@ static int ravb_remove(struct platform_device *pdev) priv->desc_bat_dma); /* Set reset mode */ ravb_write(ndev, CCC_OPC_RESET, CCC); - pm_runtime_put_sync(&pdev->dev); unregister_netdev(ndev); if (info->nc_queues) netif_napi_del(&priv->napi[RAVB_NC]); netif_napi_del(&priv->napi[RAVB_BE]); ravb_mdio_release(priv); + pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); reset_control_assert(priv->rstc); free_netdev(ndev); From 08c83264e6906615a7792f24de6e5785651d62ac Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Thu, 27 Oct 2022 17:32:49 +0100 Subject: [PATCH 109/207] rtc: ds1347: fix value written to century register commit 4dfe05bdc1ade79b943d4979a2e2a8b5ef68fbb5 upstream. In `ds1347_set_time()`, the wrong value is being written to the `DS1347_CENTURY_REG` register. It needs to be converted to BCD. Fix it. Fixes: 147dae76dbb9 ("rtc: ds1347: handle century register") Cc: # v5.5+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20221027163249.447416-1-abbotti@mev.co.uk Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-ds1347.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-ds1347.c b/drivers/rtc/rtc-ds1347.c index 157bf5209ac4..a40c1a52df65 100644 --- a/drivers/rtc/rtc-ds1347.c +++ b/drivers/rtc/rtc-ds1347.c @@ -112,7 +112,7 @@ static int ds1347_set_time(struct device *dev, struct rtc_time *dt) return err; century = (dt->tm_year / 100) + 19; - err = regmap_write(map, DS1347_CENTURY_REG, century); + err = regmap_write(map, DS1347_CENTURY_REG, bin2bcd(century)); if (err) return err; From 51c107f91bf1d49fdd1538e46770e6f146f40267 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 5 Dec 2022 21:16:26 +0800 Subject: [PATCH 110/207] drm/amdgpu: fix mmhub register base coding error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 347fafe0eb46df941965c355c77ce480e4d49f1f upstream. fix MMHUB register base coding error. Fixes: ec6837591f992 ("drm/amdgpu/gmc10: program the smallK fragment size") Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 2 +- drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c | 2 +- drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 998b5d17b271..0e664d0cc8d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -319,7 +319,7 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev) tmp = mmMMVM_L2_CNTL5_DEFAULT; tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); - WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp); } static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 1b027d069ab4..4638ea7c2eec 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -243,7 +243,7 @@ static void mmhub_v2_3_init_cache_regs(struct amdgpu_device *adev) tmp = mmMMVM_L2_CNTL5_DEFAULT; tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); - WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp); } static void mmhub_v2_3_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c index a1d26c4d80b8..16cc82215e2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c @@ -275,7 +275,7 @@ static void mmhub_v3_0_init_cache_regs(struct amdgpu_device *adev) tmp = regMMVM_L2_CNTL5_DEFAULT; tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); - WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp); } static void mmhub_v3_0_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index e8058edc1d10..6bdf2ef0298d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -269,7 +269,7 @@ static void mmhub_v3_0_1_init_cache_regs(struct amdgpu_device *adev) tmp = regMMVM_L2_CNTL5_DEFAULT; tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); - WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp); } static void mmhub_v3_0_1_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c index 770be0a8f7ce..45465acaa943 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c @@ -268,7 +268,7 @@ static void mmhub_v3_0_2_init_cache_regs(struct amdgpu_device *adev) tmp = regMMVM_L2_CNTL5_DEFAULT; tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); - WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp); } static void mmhub_v3_0_2_enable_system_domain(struct amdgpu_device *adev) From b0544dd860b3c9e55abd37d418334463d7375436 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 24 Nov 2022 11:12:07 +0900 Subject: [PATCH 111/207] block: mq-deadline: Fix dd_finish_request() for zoned devices commit 2820e5d0820ac4daedff1272616a53d9c7682fd2 upstream. dd_finish_request() tests if the per prio fifo_list is not empty to determine if request dispatching must be restarted for handling blocked write requests to zoned devices with a call to blk_mq_sched_mark_restart_hctx(). While simple, this implementation has 2 problems: 1) Only the priority level of the completed request is considered. However, writes to a zone may be blocked due to other writes to the same zone using a different priority level. While this is unlikely to happen in practice, as writing a zone with different IO priorirites does not make sense, nothing in the code prevents this from happening. 2) The use of list_empty() is dangerous as dd_finish_request() does not take dd->lock and may run concurrently with the insert and dispatch code. Fix these 2 problems by testing the write fifo list of all priority levels using the new helper dd_has_write_work(), and by testing each fifo list using list_empty_careful(). Fixes: c807ab520fc3 ("block/mq-deadline: Add I/O priority support") Cc: Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20221124021208.242541-2-damien.lemoal@opensource.wdc.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/mq-deadline.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 5639921dfa92..36374481cb87 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -789,6 +789,18 @@ static void dd_prepare_request(struct request *rq) rq->elv.priv[0] = NULL; } +static bool dd_has_write_work(struct blk_mq_hw_ctx *hctx) +{ + struct deadline_data *dd = hctx->queue->elevator->elevator_data; + enum dd_prio p; + + for (p = 0; p <= DD_PRIO_MAX; p++) + if (!list_empty_careful(&dd->per_prio[p].fifo_list[DD_WRITE])) + return true; + + return false; +} + /* * Callback from inside blk_mq_free_request(). * @@ -828,9 +840,10 @@ static void dd_finish_request(struct request *rq) spin_lock_irqsave(&dd->zone_lock, flags); blk_req_zone_write_unlock(rq); - if (!list_empty(&per_prio->fifo_list[DD_WRITE])) - blk_mq_sched_mark_restart_hctx(rq->mq_hctx); spin_unlock_irqrestore(&dd->zone_lock, flags); + + if (dd_has_write_work(rq->mq_hctx)) + blk_mq_sched_mark_restart_hctx(rq->mq_hctx); } } From ab87d6f26e40bef8792ec6c2aca61b0cd476ddd7 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 24 Nov 2022 11:12:08 +0900 Subject: [PATCH 112/207] block: mq-deadline: Do not break sequential write streams to zoned HDDs commit 015d02f48537cf2d1a65eeac50717566f9db6eec upstream. mq-deadline ensures an in order dispatching of write requests to zoned block devices using a per zone lock (a bit). This implies that for any purely sequential write workload, the drive is exercised most of the time at a maximum queue depth of one. However, when such sequential write workload crosses a zone boundary (when sequentially writing multiple contiguous zones), zone write locking may prevent the last write to one zone to be issued (as the previous write is still being executed) but allow the first write to the following zone to be issued (as that zone is not yet being writen and not locked). This result in an out of order delivery of the sequential write commands to the device every time a zone boundary is crossed. While such behavior does not break the sequential write constraint of zoned block devices (and does not generate any write error), some zoned hard-disks react badly to seeing these out of order writes, resulting in lower write throughput. This problem can be addressed by always dispatching the first request of a stream of sequential write requests, regardless of the zones targeted by these sequential writes. To do so, the function deadline_skip_seq_writes() is introduced and used in deadline_next_request() to select the next write command to issue if the target device is an HDD (blk_queue_nonrot() being false). deadline_fifo_request() is modified using the new deadline_earlier_request() and deadline_is_seq_write() helpers to ignore requests in the fifo list that have a preceding request in lba order that is sequential. With this fix, a sequential write workload executed with the following fio command: fio --name=seq-write --filename=/dev/sda --zonemode=zbd --direct=1 \ --size=68719476736 --ioengine=libaio --iodepth=32 --rw=write \ --bs=65536 results in an increase from 225 MB/s to 250 MB/s of the write throughput of an SMR HDD (11% increase). Cc: Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20221124021208.242541-3-damien.lemoal@opensource.wdc.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/mq-deadline.c | 66 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 4 deletions(-) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 36374481cb87..6672f1bce379 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -130,6 +130,20 @@ static u8 dd_rq_ioclass(struct request *rq) return IOPRIO_PRIO_CLASS(req_get_ioprio(rq)); } +/* + * get the request before `rq' in sector-sorted order + */ +static inline struct request * +deadline_earlier_request(struct request *rq) +{ + struct rb_node *node = rb_prev(&rq->rb_node); + + if (node) + return rb_entry_rq(node); + + return NULL; +} + /* * get the request after `rq' in sector-sorted order */ @@ -277,6 +291,39 @@ static inline int deadline_check_fifo(struct dd_per_prio *per_prio, return 0; } +/* + * Check if rq has a sequential request preceding it. + */ +static bool deadline_is_seq_writes(struct deadline_data *dd, struct request *rq) +{ + struct request *prev = deadline_earlier_request(rq); + + if (!prev) + return false; + + return blk_rq_pos(prev) + blk_rq_sectors(prev) == blk_rq_pos(rq); +} + +/* + * Skip all write requests that are sequential from @rq, even if we cross + * a zone boundary. + */ +static struct request *deadline_skip_seq_writes(struct deadline_data *dd, + struct request *rq) +{ + sector_t pos = blk_rq_pos(rq); + sector_t skipped_sectors = 0; + + while (rq) { + if (blk_rq_pos(rq) != pos + skipped_sectors) + break; + skipped_sectors += blk_rq_sectors(rq); + rq = deadline_latter_request(rq); + } + + return rq; +} + /* * For the specified data direction, return the next request to * dispatch using arrival ordered lists. @@ -297,11 +344,16 @@ deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio, /* * Look for a write request that can be dispatched, that is one with - * an unlocked target zone. + * an unlocked target zone. For some HDDs, breaking a sequential + * write stream can lead to lower throughput, so make sure to preserve + * sequential write streams, even if that stream crosses into the next + * zones and these zones are unlocked. */ spin_lock_irqsave(&dd->zone_lock, flags); list_for_each_entry(rq, &per_prio->fifo_list[DD_WRITE], queuelist) { - if (blk_req_can_dispatch_to_zone(rq)) + if (blk_req_can_dispatch_to_zone(rq) && + (blk_queue_nonrot(rq->q) || + !deadline_is_seq_writes(dd, rq))) goto out; } rq = NULL; @@ -331,13 +383,19 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio, /* * Look for a write request that can be dispatched, that is one with - * an unlocked target zone. + * an unlocked target zone. For some HDDs, breaking a sequential + * write stream can lead to lower throughput, so make sure to preserve + * sequential write streams, even if that stream crosses into the next + * zones and these zones are unlocked. */ spin_lock_irqsave(&dd->zone_lock, flags); while (rq) { if (blk_req_can_dispatch_to_zone(rq)) break; - rq = deadline_latter_request(rq); + if (blk_queue_nonrot(rq->q)) + rq = deadline_latter_request(rq); + else + rq = deadline_skip_seq_writes(dd, rq); } spin_unlock_irqrestore(&dd->zone_lock, flags); From aa4c785566effdd84760522f8b2e140888a61759 Mon Sep 17 00:00:00 2001 From: Florian-Ewald Mueller Date: Tue, 25 Oct 2022 09:37:05 +0200 Subject: [PATCH 113/207] md/bitmap: Fix bitmap chunk size overflow issues commit 4555211190798b6b6fa2c37667d175bf67945c78 upstream. - limit bitmap chunk size internal u64 variable to values not overflowing the u32 bitmap superblock structure variable stored on persistent media - assign bitmap chunk size internal u64 variable from unsigned values to avoid possible sign extension artifacts when assigning from a s32 value The bug has been there since at least kernel 4.0. Steps to reproduce it: 1: mdadm -C /dev/mdx -l 1 --bitmap=internal --bitmap-chunk=256M -e 1.2 -n2 /dev/rnbd1 /dev/rnbd2 2 resize member device rnbd1 and rnbd2 to 8 TB 3 mdadm --grow /dev/mdx --size=max The bitmap_chunksize will overflow without patch. Cc: stable@vger.kernel.org Signed-off-by: Florian-Ewald Mueller Signed-off-by: Jack Wang Signed-off-by: Song Liu Signed-off-by: Greg Kroah-Hartman --- drivers/md/md-bitmap.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 63ece30114e5..e7cc6ba1b657 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -486,7 +486,7 @@ void md_bitmap_print_sb(struct bitmap *bitmap) sb = kmap_atomic(bitmap->storage.sb_page); pr_debug("%s: bitmap file superblock:\n", bmname(bitmap)); pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); - pr_debug(" version: %d\n", le32_to_cpu(sb->version)); + pr_debug(" version: %u\n", le32_to_cpu(sb->version)); pr_debug(" uuid: %08x.%08x.%08x.%08x\n", le32_to_cpu(*(__le32 *)(sb->uuid+0)), le32_to_cpu(*(__le32 *)(sb->uuid+4)), @@ -497,11 +497,11 @@ void md_bitmap_print_sb(struct bitmap *bitmap) pr_debug("events cleared: %llu\n", (unsigned long long) le64_to_cpu(sb->events_cleared)); pr_debug(" state: %08x\n", le32_to_cpu(sb->state)); - pr_debug(" chunksize: %d B\n", le32_to_cpu(sb->chunksize)); - pr_debug(" daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); + pr_debug(" chunksize: %u B\n", le32_to_cpu(sb->chunksize)); + pr_debug(" daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep)); pr_debug(" sync size: %llu KB\n", (unsigned long long)le64_to_cpu(sb->sync_size)/2); - pr_debug("max write behind: %d\n", le32_to_cpu(sb->write_behind)); + pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind)); kunmap_atomic(sb); } @@ -2105,7 +2105,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, bytes = DIV_ROUND_UP(chunks, 8); if (!bitmap->mddev->bitmap_info.external) bytes += sizeof(bitmap_super_t); - } while (bytes > (space << 9)); + } while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) < + (BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1)); } else chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT; @@ -2150,7 +2151,7 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, bitmap->counts.missing_pages = pages; bitmap->counts.chunkshift = chunkshift; bitmap->counts.chunks = chunks; - bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift + + bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift + BITMAP_BLOCK_SHIFT); blocks = min(old_counts.chunks << old_counts.chunkshift, @@ -2176,8 +2177,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, bitmap->counts.missing_pages = old_counts.pages; bitmap->counts.chunkshift = old_counts.chunkshift; bitmap->counts.chunks = old_counts.chunks; - bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift + - BITMAP_BLOCK_SHIFT); + bitmap->mddev->bitmap_info.chunksize = + 1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT); blocks = old_counts.chunks << old_counts.chunkshift; pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n"); break; @@ -2537,6 +2538,9 @@ chunksize_store(struct mddev *mddev, const char *buf, size_t len) if (csize < 512 || !is_power_of_2(csize)) return -EINVAL; + if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE * + sizeof(((bitmap_super_t *)0)->chunksize)))) + return -EOVERFLOW; mddev->bitmap_info.chunksize = csize; return len; } From 19d5b47b0e09e61d0bc213d85c75f1e03172e72a Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Thu, 27 Oct 2022 10:01:43 +0000 Subject: [PATCH 114/207] efi: Add iMac Pro 2017 to uefi skip cert quirk commit 0be56a116220f9e5731a6609e66a11accfe8d8e2 upstream. The iMac Pro 2017 is also a T2 Mac. Thus add it to the list of uefi skip cert. Cc: stable@vger.kernel.org Fixes: 155ca952c7ca ("efi: Do not import certificates from UEFI Secure Boot for T2 Macs") Link: https://lore.kernel.org/linux-integrity/9D46D92F-1381-4F10-989C-1A12CD2FFDD8@live.com/ Signed-off-by: Aditya Garg Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/platform_certs/load_uefi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index b78753d27d8e..d1fdd113450a 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -35,6 +35,7 @@ static const struct dmi_system_id uefi_skip_cert[] = { { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacPro7,1") }, { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMac20,1") }, { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMac20,2") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMacPro1,1") }, { } }; From 360ef3bcf73054d9e84a263f7e9e87618cdb5aa0 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 27 Oct 2022 19:12:21 +0200 Subject: [PATCH 115/207] wifi: wilc1000: sdio: fix module autoloading commit 57d545b5a3d6ce3a8fb6b093f02bfcbb908973f3 upstream. There are no SDIO module aliases included in the driver, therefore, module autoloading isn't working. Add the proper MODULE_DEVICE_TABLE(). Cc: stable@vger.kernel.org Signed-off-by: Michael Walle Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221027171221.491937-1-michael@walle.cc Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/microchip/wilc1000/sdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/microchip/wilc1000/sdio.c b/drivers/net/wireless/microchip/wilc1000/sdio.c index 7390f94cd4ca..a05bda7b9a3b 100644 --- a/drivers/net/wireless/microchip/wilc1000/sdio.c +++ b/drivers/net/wireless/microchip/wilc1000/sdio.c @@ -20,6 +20,7 @@ static const struct sdio_device_id wilc_sdio_ids[] = { { SDIO_DEVICE(SDIO_VENDOR_ID_MICROCHIP_WILC, SDIO_DEVICE_ID_MICROCHIP_WILC1000) }, { }, }; +MODULE_DEVICE_TABLE(sdio, wilc_sdio_ids); #define WILC_SDIO_BLOCK_SIZE 512 From 9526c179b9cbd54aaeaa54548c9e36655ad6100b Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Sun, 23 Oct 2022 15:33:20 +0100 Subject: [PATCH 116/207] ASoC: jz4740-i2s: Handle independent FIFO flush bits commit 8b3a9ad86239f80ed569e23c3954a311f66481d6 upstream. On the JZ4740, there is a single bit that flushes (empties) both the transmit and receive FIFO. Later SoCs have independent flush bits for each FIFO. Independent FIFOs can be flushed before the snd_soc_dai_active() check because it won't disturb other active streams. This ensures that the FIFO we're about to use is always flushed before starting up. With shared FIFOs we can't do that because if another substream is active, flushing its FIFO would cause underrun errors. This also fixes a bug: since we were only setting the JZ4740's flush bit, which corresponds to the TX FIFO flush bit on other SoCs, other SoCs were not having their RX FIFO flushed at all. Fixes: 967beb2e8777 ("ASoC: jz4740: Add jz4780 support") Reviewed-by: Paul Cercueil Cc: stable@vger.kernel.org Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20221023143328.160866-2-aidanmacdonald.0x0@gmail.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/jz4740/jz4740-i2s.c | 39 ++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/sound/soc/jz4740/jz4740-i2s.c b/sound/soc/jz4740/jz4740-i2s.c index c4c1e89b47c1..83cb81999c6f 100644 --- a/sound/soc/jz4740/jz4740-i2s.c +++ b/sound/soc/jz4740/jz4740-i2s.c @@ -55,7 +55,8 @@ #define JZ_AIC_CTRL_MONO_TO_STEREO BIT(11) #define JZ_AIC_CTRL_SWITCH_ENDIANNESS BIT(10) #define JZ_AIC_CTRL_SIGNED_TO_UNSIGNED BIT(9) -#define JZ_AIC_CTRL_FLUSH BIT(8) +#define JZ_AIC_CTRL_TFLUSH BIT(8) +#define JZ_AIC_CTRL_RFLUSH BIT(7) #define JZ_AIC_CTRL_ENABLE_ROR_INT BIT(6) #define JZ_AIC_CTRL_ENABLE_TUR_INT BIT(5) #define JZ_AIC_CTRL_ENABLE_RFS_INT BIT(4) @@ -90,6 +91,8 @@ enum jz47xx_i2s_version { struct i2s_soc_info { enum jz47xx_i2s_version version; struct snd_soc_dai_driver *dai; + + bool shared_fifo_flush; }; struct jz4740_i2s { @@ -116,19 +119,44 @@ static inline void jz4740_i2s_write(const struct jz4740_i2s *i2s, writel(value, i2s->base + reg); } +static inline void jz4740_i2s_set_bits(const struct jz4740_i2s *i2s, + unsigned int reg, uint32_t bits) +{ + uint32_t value = jz4740_i2s_read(i2s, reg); + value |= bits; + jz4740_i2s_write(i2s, reg, value); +} + static int jz4740_i2s_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct jz4740_i2s *i2s = snd_soc_dai_get_drvdata(dai); - uint32_t conf, ctrl; + uint32_t conf; int ret; + /* + * When we can flush FIFOs independently, only flush the FIFO + * that is starting up. We can do this when the DAI is active + * because it does not disturb other active substreams. + */ + if (!i2s->soc_info->shared_fifo_flush) { + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + jz4740_i2s_set_bits(i2s, JZ_REG_AIC_CTRL, JZ_AIC_CTRL_TFLUSH); + else + jz4740_i2s_set_bits(i2s, JZ_REG_AIC_CTRL, JZ_AIC_CTRL_RFLUSH); + } + if (snd_soc_dai_active(dai)) return 0; - ctrl = jz4740_i2s_read(i2s, JZ_REG_AIC_CTRL); - ctrl |= JZ_AIC_CTRL_FLUSH; - jz4740_i2s_write(i2s, JZ_REG_AIC_CTRL, ctrl); + /* + * When there is a shared flush bit for both FIFOs, the TFLUSH + * bit flushes both FIFOs. Flushing while the DAI is active would + * cause FIFO underruns in other active substreams so we have to + * guard this behind the snd_soc_dai_active() check. + */ + if (i2s->soc_info->shared_fifo_flush) + jz4740_i2s_set_bits(i2s, JZ_REG_AIC_CTRL, JZ_AIC_CTRL_TFLUSH); ret = clk_prepare_enable(i2s->clk_i2s); if (ret) @@ -443,6 +471,7 @@ static struct snd_soc_dai_driver jz4740_i2s_dai = { static const struct i2s_soc_info jz4740_i2s_soc_info = { .version = JZ_I2S_JZ4740, .dai = &jz4740_i2s_dai, + .shared_fifo_flush = true, }; static const struct i2s_soc_info jz4760_i2s_soc_info = { From 5038ee677606106c91564f9c4557d808d14bad70 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Thu, 8 Sep 2022 00:44:09 +0200 Subject: [PATCH 117/207] ipu3-imgu: Fix NULL pointer dereference in imgu_subdev_set_selection() commit dc608edf7d45ba0c2ad14c06eccd66474fec7847 upstream. Calling v4l2_subdev_get_try_crop() and v4l2_subdev_get_try_compose() with a subdev state of NULL leads to a NULL pointer dereference. This can currently happen in imgu_subdev_set_selection() when the state passed in is NULL, as this method first gets pointers to both the "try" and "active" states and only then decides which to use. The same issue has been addressed for imgu_subdev_get_selection() with commit 30d03a0de650 ("ipu3-imgu: Fix NULL pointer dereference in active selection access"). However the issue still persists in imgu_subdev_set_selection(). Therefore, apply a similar fix as done in the aforementioned commit to imgu_subdev_set_selection(). To keep things a bit cleaner, introduce helper functions for "crop" and "compose" access and use them in both imgu_subdev_set_selection() and imgu_subdev_get_selection(). Fixes: 0d346d2a6f54 ("media: v4l2-subdev: add subdev-wide state struct") Cc: stable@vger.kernel.org # for v5.14 and later Signed-off-by: Maximilian Luz Signed-off-by: Sakari Ailus Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/ipu3/ipu3-v4l2.c | 57 +++++++++++++++----------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/drivers/staging/media/ipu3/ipu3-v4l2.c b/drivers/staging/media/ipu3/ipu3-v4l2.c index ce13e746c15f..e530767e80a5 100644 --- a/drivers/staging/media/ipu3/ipu3-v4l2.c +++ b/drivers/staging/media/ipu3/ipu3-v4l2.c @@ -188,6 +188,28 @@ static int imgu_subdev_set_fmt(struct v4l2_subdev *sd, return 0; } +static struct v4l2_rect * +imgu_subdev_get_crop(struct imgu_v4l2_subdev *sd, + struct v4l2_subdev_state *sd_state, unsigned int pad, + enum v4l2_subdev_format_whence which) +{ + if (which == V4L2_SUBDEV_FORMAT_TRY) + return v4l2_subdev_get_try_crop(&sd->subdev, sd_state, pad); + else + return &sd->rect.eff; +} + +static struct v4l2_rect * +imgu_subdev_get_compose(struct imgu_v4l2_subdev *sd, + struct v4l2_subdev_state *sd_state, unsigned int pad, + enum v4l2_subdev_format_whence which) +{ + if (which == V4L2_SUBDEV_FORMAT_TRY) + return v4l2_subdev_get_try_compose(&sd->subdev, sd_state, pad); + else + return &sd->rect.bds; +} + static int imgu_subdev_get_selection(struct v4l2_subdev *sd, struct v4l2_subdev_state *sd_state, struct v4l2_subdev_selection *sel) @@ -200,18 +222,12 @@ static int imgu_subdev_get_selection(struct v4l2_subdev *sd, switch (sel->target) { case V4L2_SEL_TGT_CROP: - if (sel->which == V4L2_SUBDEV_FORMAT_TRY) - sel->r = *v4l2_subdev_get_try_crop(sd, sd_state, - sel->pad); - else - sel->r = imgu_sd->rect.eff; + sel->r = *imgu_subdev_get_crop(imgu_sd, sd_state, sel->pad, + sel->which); return 0; case V4L2_SEL_TGT_COMPOSE: - if (sel->which == V4L2_SUBDEV_FORMAT_TRY) - sel->r = *v4l2_subdev_get_try_compose(sd, sd_state, - sel->pad); - else - sel->r = imgu_sd->rect.bds; + sel->r = *imgu_subdev_get_compose(imgu_sd, sd_state, sel->pad, + sel->which); return 0; default: return -EINVAL; @@ -223,10 +239,9 @@ static int imgu_subdev_set_selection(struct v4l2_subdev *sd, struct v4l2_subdev_selection *sel) { struct imgu_device *imgu = v4l2_get_subdevdata(sd); - struct imgu_v4l2_subdev *imgu_sd = container_of(sd, - struct imgu_v4l2_subdev, - subdev); - struct v4l2_rect *rect, *try_sel; + struct imgu_v4l2_subdev *imgu_sd = + container_of(sd, struct imgu_v4l2_subdev, subdev); + struct v4l2_rect *rect; dev_dbg(&imgu->pci_dev->dev, "set subdev %u sel which %u target 0x%4x rect [%ux%u]", @@ -238,22 +253,18 @@ static int imgu_subdev_set_selection(struct v4l2_subdev *sd, switch (sel->target) { case V4L2_SEL_TGT_CROP: - try_sel = v4l2_subdev_get_try_crop(sd, sd_state, sel->pad); - rect = &imgu_sd->rect.eff; + rect = imgu_subdev_get_crop(imgu_sd, sd_state, sel->pad, + sel->which); break; case V4L2_SEL_TGT_COMPOSE: - try_sel = v4l2_subdev_get_try_compose(sd, sd_state, sel->pad); - rect = &imgu_sd->rect.bds; + rect = imgu_subdev_get_compose(imgu_sd, sd_state, sel->pad, + sel->which); break; default: return -EINVAL; } - if (sel->which == V4L2_SUBDEV_FORMAT_TRY) - *try_sel = sel->r; - else - *rect = sel->r; - + *rect = sel->r; return 0; } From 9c476590feffaf16bfaed8e826afdf7b0f3d6d6e Mon Sep 17 00:00:00 2001 From: Zhang Yuchen Date: Fri, 7 Oct 2022 17:26:16 +0800 Subject: [PATCH 118/207] ipmi: fix long wait in unload when IPMI disconnect commit f6f1234d98cce69578bfac79df147a1f6660596c upstream. When fixing the problem mentioned in PATCH1, we also found the following problem: If the IPMI is disconnected and in the sending process, the uninstallation driver will be stuck for a long time. The main problem is that uninstalling the driver waits for curr_msg to be sent or HOSED. After stopping tasklet, the only place to trigger the timeout mechanism is the circular poll in shutdown_smi. The poll function delays 10us and calls smi_event_handler(smi_info,10). Smi_event_handler deducts 10us from kcs->ibf_timeout. But the poll func is followed by schedule_timeout_uninterruptible(1). The time consumed here is not counted in kcs->ibf_timeout. So when 10us is deducted from kcs->ibf_timeout, at least 1 jiffies has actually passed. The waiting time has increased by more than a hundredfold. Now instead of calling poll(). call smi_event_handler() directly and calculate the elapsed time. For verification, you can directly use ebpf to check the kcs-> ibf_timeout for each call to kcs_event() when IPMI is disconnected. Decrement at normal rate before unloading. The decrement rate becomes very slow after unloading. $ bpftrace -e 'kprobe:kcs_event {printf("kcs->ibftimeout : %d\n", *(arg0+584));}' Signed-off-by: Zhang Yuchen Message-Id: <20221007092617.87597-3-zhangyuchen.lcr@bytedance.com> Signed-off-by: Corey Minyard Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_si_intf.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 6e357ad76f2e..abddd7e43a9a 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2153,6 +2153,20 @@ skip_fallback_noirq: } module_init(init_ipmi_si); +static void wait_msg_processed(struct smi_info *smi_info) +{ + unsigned long jiffies_now; + long time_diff; + + while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)) { + jiffies_now = jiffies; + time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies) + * SI_USEC_PER_JIFFY); + smi_event_handler(smi_info, time_diff); + schedule_timeout_uninterruptible(1); + } +} + static void shutdown_smi(void *send_info) { struct smi_info *smi_info = send_info; @@ -2187,16 +2201,13 @@ static void shutdown_smi(void *send_info) * in the BMC. Note that timers and CPU interrupts are off, * so no need for locks. */ - while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)) { - poll(smi_info); - schedule_timeout_uninterruptible(1); - } + wait_msg_processed(smi_info); + if (smi_info->handlers) disable_si_irq(smi_info); - while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)) { - poll(smi_info); - schedule_timeout_uninterruptible(1); - } + + wait_msg_processed(smi_info); + if (smi_info->handlers) smi_info->handlers->cleanup(smi_info->si_sm); From 9944a141e5eaaef6f2298cd3be1783b027b4a0dd Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 19 Nov 2021 09:14:12 +0100 Subject: [PATCH 119/207] mtd: spi-nor: Check for zero erase size in spi_nor_find_best_erase_type() commit 2ebc336be08160debfe27f87660cf550d710f3e9 upstream. Erase can be zeroed in spi_nor_parse_4bait() or spi_nor_init_non_uniform_erase_map(). In practice it happened with mt25qu256a, which supports 4K, 32K, 64K erases with 3b address commands, but only 4K and 64K erase with 4b address commands. Fixes: dc92843159a7 ("mtd: spi-nor: fix erase_type array to indicate current map conf") Signed-off-by: Alexander Sverdlin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211119081412.29732-1-alexander.sverdlin@nokia.com Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 0cf1a1797ea3..2e0655c0b606 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -1184,6 +1184,8 @@ spi_nor_find_best_erase_type(const struct spi_nor_erase_map *map, continue; erase = &map->erase_type[i]; + if (!erase->size) + continue; /* Alignment is not mandatory for overlaid regions */ if (region->offset & SNOR_OVERLAID_REGION && From 3ac71fd8ffa11b78f54200f3fd988f887c658ded Mon Sep 17 00:00:00 2001 From: Huaxin Lu Date: Thu, 3 Nov 2022 00:09:49 +0800 Subject: [PATCH 120/207] ima: Fix a potential NULL pointer access in ima_restore_measurement_list commit 11220db412edae8dba58853238f53258268bdb88 upstream. In restore_template_fmt, when kstrdup fails, a non-NULL value will still be returned, which causes a NULL pointer access in template_desc_init_fields. Fixes: c7d09367702e ("ima: support restoring multiple template formats") Cc: stable@kernel.org Co-developed-by: Jiaming Li Signed-off-by: Jiaming Li Signed-off-by: Huaxin Lu Reviewed-by: Stefan Berger Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima_template.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c index 195ac18f0927..04c49f05cb74 100644 --- a/security/integrity/ima/ima_template.c +++ b/security/integrity/ima/ima_template.c @@ -340,8 +340,11 @@ static struct ima_template_desc *restore_template_fmt(char *template_name) template_desc->name = ""; template_desc->fmt = kstrdup(template_name, GFP_KERNEL); - if (!template_desc->fmt) + if (!template_desc->fmt) { + kfree(template_desc); + template_desc = NULL; goto out; + } spin_lock(&template_list); list_add_tail_rcu(&template_desc->list, &defined_templates); From 1fc9b20a7688000fcf4d7fbaa58e415a3cdda961 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 15 Nov 2022 16:17:43 +0300 Subject: [PATCH 121/207] ipmi: fix use after free in _ipmi_destroy_user() commit a92ce570c81dc0feaeb12a429b4bc65686d17967 upstream. The intf_free() function frees the "intf" pointer so we cannot dereference it again on the next line. Fixes: cbb79863fc31 ("ipmi: Don't allow device module unload when in use") Signed-off-by: Dan Carpenter Message-Id: Cc: # 5.5+ Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_msghandler.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index d5ee52be176d..5d403fb5bd92 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -1330,6 +1330,7 @@ static void _ipmi_destroy_user(struct ipmi_user *user) unsigned long flags; struct cmd_rcvr *rcvr; struct cmd_rcvr *rcvrs = NULL; + struct module *owner; if (!acquire_ipmi_user(user, &i)) { /* @@ -1392,8 +1393,9 @@ static void _ipmi_destroy_user(struct ipmi_user *user) kfree(rcvr); } + owner = intf->owner; kref_put(&intf->refcount, intf_free); - module_put(intf->owner); + module_put(owner); } int ipmi_destroy_user(struct ipmi_user *user) From 59b6c0ea1a8dcc6ce2cca6f7135f05fd4355b9de Mon Sep 17 00:00:00 2001 From: Yaliang Wang Date: Mon, 17 Oct 2022 01:19:01 +0800 Subject: [PATCH 122/207] mtd: spi-nor: gigadevice: gd25q256: replace gd25q256_default_init with gd25q256_post_bfpt commit 4dc49062a7e9c0c7261807fb855df1c611eb78c3 upstream. When utilizing PARSE_SFDP to initialize the flash parameter, the deprecated initializing method spi_nor_init_params_deprecated() and the function spi_nor_manufacturer_init_params() within it will never be executed, which results in the default_init hook function will also never be executed. This is okay for 'D' generation of GD25Q256, because 'D' generation is implementing the JESD216B standards, it has QER field defined in BFPT, parsing the SFDP can properly set the quad_enable function. The 'E' generation also implements the JESD216B standards, and it has the same status register definitions as 'D' generation, parsing the SFDP to set the quad_enable function should also work for 'E' generation. However, the same thing can't apply to 'C' generation. 'C' generation 'GD25Q256C' implements the JESD216 standards, and it doesn't have the QER field defined in BFPT, since it does have QE bit in status register 1, the quad_enable hook needs to be tweaked to properly set the quad_enable function, this can be done in post_bfpt fixup hook. Fixes: 047275f7de18 ("mtd: spi-nor: gigadevice: gd25q256: Init flash based on SFDP") Reported-by: kernel test robot Signed-off-by: Yaliang Wang [tudor.ambarus@microchip.com: Update comment in gd25q256_post_bfpt] Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221016171901.1483542-2-yaliang.wang@windriver.com Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/gigadevice.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/mtd/spi-nor/gigadevice.c b/drivers/mtd/spi-nor/gigadevice.c index 119b38e6fc2a..d57ddaf1525b 100644 --- a/drivers/mtd/spi-nor/gigadevice.c +++ b/drivers/mtd/spi-nor/gigadevice.c @@ -8,19 +8,29 @@ #include "core.h" -static void gd25q256_default_init(struct spi_nor *nor) +static int +gd25q256_post_bfpt(struct spi_nor *nor, + const struct sfdp_parameter_header *bfpt_header, + const struct sfdp_bfpt *bfpt) { /* - * Some manufacturer like GigaDevice may use different - * bit to set QE on different memories, so the MFR can't - * indicate the quad_enable method for this case, we need - * to set it in the default_init fixup hook. + * GD25Q256C supports the first version of JESD216 which does not define + * the Quad Enable methods. Overwrite the default Quad Enable method. + * + * GD25Q256 GENERATION | SFDP MAJOR VERSION | SFDP MINOR VERSION + * GD25Q256C | SFDP_JESD216_MAJOR | SFDP_JESD216_MINOR + * GD25Q256D | SFDP_JESD216_MAJOR | SFDP_JESD216B_MINOR + * GD25Q256E | SFDP_JESD216_MAJOR | SFDP_JESD216B_MINOR */ - nor->params->quad_enable = spi_nor_sr1_bit6_quad_enable; + if (bfpt_header->major == SFDP_JESD216_MAJOR && + bfpt_header->minor == SFDP_JESD216_MINOR) + nor->params->quad_enable = spi_nor_sr1_bit6_quad_enable; + + return 0; } static const struct spi_nor_fixups gd25q256_fixups = { - .default_init = gd25q256_default_init, + .post_bfpt = gd25q256_post_bfpt, }; static const struct flash_info gigadevice_nor_parts[] = { From f375bcf69f58fd0744c9dfd1b6b891a27301d67b Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Wed, 2 Nov 2022 17:30:06 +0100 Subject: [PATCH 123/207] ima: Fix memory leak in __ima_inode_hash() commit 8c1d6a050a0f16e0a9d32eaf53b965c77279c6f8 upstream. Commit f3cc6b25dcc5 ("ima: always measure and audit files in policy") lets measurement or audit happen even if the file digest cannot be calculated. As a result, iint->ima_hash could have been allocated despite ima_collect_measurement() returning an error. Since ima_hash belongs to a temporary inode metadata structure, declared at the beginning of __ima_inode_hash(), just add a kfree() call if ima_collect_measurement() returns an error different from -ENOMEM (in that case, ima_hash should not have been allocated). Cc: stable@vger.kernel.org Fixes: 280fe8367b0d ("ima: Always return a file measurement in ima_file_hash()") Signed-off-by: Roberto Sassu Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 040b03ddc1c7..4a207a3ef7ef 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -542,8 +542,13 @@ static int __ima_inode_hash(struct inode *inode, struct file *file, char *buf, rc = ima_collect_measurement(&tmp_iint, file, NULL, 0, ima_hash_algo, NULL); - if (rc < 0) + if (rc < 0) { + /* ima_hash could be allocated in case of failure. */ + if (rc != -ENOMEM) + kfree(tmp_iint.ima_hash); + return -EOPNOTSUPP; + } iint = &tmp_iint; mutex_lock(&iint->mutex); From d93b6a8a34e1062cf25b31ac4dc94a3d4130faca Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 1 Dec 2022 16:23:25 -0800 Subject: [PATCH 124/207] um: virt-pci: Avoid GCC non-NULL warning commit bdc77507fecd00ddad2f502f86a48a9ec38f0f84 upstream. GCC gets confused about the return value of get_cpu_var() possibly being NULL, so explicitly test for it before calls to memcpy() and memset(). Avoids warnings like this: arch/um/drivers/virt-pci.c: In function 'um_pci_send_cmd': include/linux/fortify-string.h:48:33: warning: argument 1 null where non-null expected [-Wnonnull] 48 | #define __underlying_memcpy __builtin_memcpy | ^ include/linux/fortify-string.h:438:9: note: in expansion of macro '__underlying_memcpy' 438 | __underlying_##op(p, q, __fortify_size); \ | ^~~~~~~~~~~~~ include/linux/fortify-string.h:483:26: note: in expansion of macro '__fortify_memcpy_chk' 483 | #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ | ^~~~~~~~~~~~~~~~~~~~ arch/um/drivers/virt-pci.c:100:9: note: in expansion of macro 'memcpy' 100 | memcpy(buf, cmd, cmd_size); | ^~~~~~ While at it, avoid literal "8" and use stored sizeof(buf->data) in memset() and um_pci_send_cmd(). Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202211271212.SUZSC9f9-lkp@intel.com Fixes: ba38961a069b ("um: Enable FORTIFY_SOURCE") Cc: Richard Weinberger Cc: Anton Ivanov Cc: Johannes Berg Cc: "Michael S. Tsirkin" Cc: Al Viro Cc: Xiu Jianfeng Cc: Vincent Whitchurch Cc: linux-um@lists.infradead.org Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- arch/um/drivers/virt-pci.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index acb55b302b14..3ac220dafec4 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -97,7 +97,8 @@ static int um_pci_send_cmd(struct um_pci_device *dev, } buf = get_cpu_var(um_pci_msg_bufs); - memcpy(buf, cmd, cmd_size); + if (buf) + memcpy(buf, cmd, cmd_size); if (posted) { u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC); @@ -182,6 +183,7 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, struct um_pci_message_buffer *buf; u8 *data; unsigned long ret = ULONG_MAX; + size_t bytes = sizeof(buf->data); if (!dev) return ULONG_MAX; @@ -189,7 +191,8 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, buf = get_cpu_var(um_pci_msg_bufs); data = buf->data; - memset(buf->data, 0xff, sizeof(buf->data)); + if (buf) + memset(data, 0xff, bytes); switch (size) { case 1: @@ -204,7 +207,7 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, goto out; } - if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, 8)) + if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, bytes)) goto out; switch (size) { From 06bc122d9b5f130ec23647a37e316215aa577c6a Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 25 Nov 2022 20:18:11 +0800 Subject: [PATCH 125/207] crypto: ccree,hisilicon - Fix dependencies to correct algorithm commit 2ae6feb1a1f6678fe11864f1b6920ed10b09ad6a upstream. Commit d2825fa9365d ("crypto: sm3,sm4 - move into crypto directory") moves the SM3 and SM4 stand-alone library and the algorithm implementation for the Crypto API into the same directory, and the corresponding relationship of Kconfig is modified, CONFIG_CRYPTO_SM3/4 corresponds to the stand-alone library of SM3/4, and CONFIG_CRYPTO_SM3/4_GENERIC corresponds to the algorithm implementation for the Crypto API. Therefore, it is necessary for this module to depend on the correct algorithm. Fixes: d2825fa9365d ("crypto: sm3,sm4 - move into crypto directory") Cc: Jason A. Donenfeld Cc: stable@vger.kernel.org # v5.19+ Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/Kconfig | 4 ++-- drivers/crypto/hisilicon/Kconfig | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index c30b5a39c2ac..4a618d80e106 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -790,8 +790,8 @@ config CRYPTO_DEV_CCREE select CRYPTO_ECB select CRYPTO_CTR select CRYPTO_XTS - select CRYPTO_SM4 - select CRYPTO_SM3 + select CRYPTO_SM4_GENERIC + select CRYPTO_SM3_GENERIC help Say 'Y' to enable a driver for the REE interface of the Arm TrustZone CryptoCell family of processors. Currently the diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig index 27e1fa912063..743ce4fc3158 100644 --- a/drivers/crypto/hisilicon/Kconfig +++ b/drivers/crypto/hisilicon/Kconfig @@ -26,7 +26,7 @@ config CRYPTO_DEV_HISI_SEC2 select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 - select CRYPTO_SM4 + select CRYPTO_SM4_GENERIC depends on PCI && PCI_MSI depends on UACCE || UACCE=n depends on ARM64 || (COMPILE_TEST && 64BIT) From 518573988a2f14f517403db2ece5ddaefba21e94 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 26 Oct 2022 02:11:21 -0400 Subject: [PATCH 126/207] PCI: Fix pci_device_is_present() for VFs by checking PF commit 98b04dd0b4577894520493d96bc4623387767445 upstream. pci_device_is_present() previously didn't work for VFs because it reads the Vendor and Device ID, which are 0xffff for VFs, which looks like they aren't present. Check the PF instead. Wei Gong reported that if virtio I/O is in progress when the driver is unbound or "0" is written to /sys/.../sriov_numvfs, the virtio I/O operation hangs, which may result in output like this: task:bash state:D stack: 0 pid: 1773 ppid: 1241 flags:0x00004002 Call Trace: schedule+0x4f/0xc0 blk_mq_freeze_queue_wait+0x69/0xa0 blk_mq_freeze_queue+0x1b/0x20 blk_cleanup_queue+0x3d/0xd0 virtblk_remove+0x3c/0xb0 [virtio_blk] virtio_dev_remove+0x4b/0x80 ... device_unregister+0x1b/0x60 unregister_virtio_device+0x18/0x30 virtio_pci_remove+0x41/0x80 pci_device_remove+0x3e/0xb0 This happened because pci_device_is_present(VF) returned "false" in virtio_pci_remove(), so it called virtio_break_device(). The broken vq meant that vring_interrupt() skipped the vq.callback() that would have completed the virtio I/O operation via virtblk_done(). [bhelgaas: commit log, simplify to always use pci_physfn(), add stable tag] Link: https://lore.kernel.org/r/20221026060912.173250-1-mst@redhat.com Reported-by: Wei Gong Tested-by: Wei Gong Signed-off-by: Michael S. Tsirkin Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 2127aba3550b..ab615ab4e440 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6447,6 +6447,8 @@ bool pci_device_is_present(struct pci_dev *pdev) { u32 v; + /* Check PF if pdev is a VF, since VF Vendor/Device IDs are 0xffff */ + pdev = pci_physfn(pdev); if (pci_dev_is_disconnected(pdev)) return false; return pci_bus_read_dev_vendor_id(pdev->bus, pdev->devfn, &v, 0); From b33528863d07e7f6619264f3ce774ae414027fce Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 8 Nov 2022 17:05:59 -0600 Subject: [PATCH 127/207] PCI/sysfs: Fix double free in error path commit aa382ffa705bea9931ec92b6f3c70e1fdb372195 upstream. When pci_create_attr() fails, pci_remove_resource_files() is called which will iterate over the res_attr[_wc] arrays and frees every non NULL entry. To avoid a double free here set the array entry only after it's clear we successfully initialized it. Fixes: b562ec8f74e4 ("PCI: Don't leak memory if sysfs_create_bin_file() fails") Link: https://lore.kernel.org/r/20221007070735.GX986@pengutronix.de/ Signed-off-by: Sascha Hauer Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-sysfs.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 0a2eeb82cebd..ba38fc47d35e 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -1175,11 +1175,9 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine) sysfs_bin_attr_init(res_attr); if (write_combine) { - pdev->res_attr_wc[num] = res_attr; sprintf(res_attr_name, "resource%d_wc", num); res_attr->mmap = pci_mmap_resource_wc; } else { - pdev->res_attr[num] = res_attr; sprintf(res_attr_name, "resource%d", num); if (pci_resource_flags(pdev, num) & IORESOURCE_IO) { res_attr->read = pci_read_resource_io; @@ -1197,10 +1195,17 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine) res_attr->size = pci_resource_len(pdev, num); res_attr->private = (void *)(unsigned long)num; retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr); - if (retval) + if (retval) { kfree(res_attr); + return retval; + } - return retval; + if (write_combine) + pdev->res_attr_wc[num] = res_attr; + else + pdev->res_attr[num] = res_attr; + + return 0; } /** From dc387c34d8dd10b02a333df098f8fd9bba177a45 Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Fri, 4 Nov 2022 17:56:57 +0800 Subject: [PATCH 128/207] RISC-V: kexec: Fix memory leak of fdt buffer commit 96df59b1ae23f5c11698c3c2159aeb2ecd4944a4 upstream. This is reported by kmemleak detector: unreferenced object 0xff60000082864000 (size 9588): comm "kexec", pid 146, jiffies 4294900634 (age 64.788s) hex dump (first 32 bytes): d0 0d fe ed 00 00 12 ed 00 00 00 48 00 00 11 40 ...........H...@ 00 00 00 28 00 00 00 11 00 00 00 02 00 00 00 00 ...(............ backtrace: [<00000000f95b17c4>] kmemleak_alloc+0x34/0x3e [<00000000b9ec8e3e>] kmalloc_order+0x9c/0xc4 [<00000000a95cf02e>] kmalloc_order_trace+0x34/0xb6 [<00000000f01e68b4>] __kmalloc+0x5c2/0x62a [<000000002bd497b2>] kvmalloc_node+0x66/0xd6 [<00000000906542fa>] of_kexec_alloc_and_setup_fdt+0xa6/0x6ea [<00000000e1166bde>] elf_kexec_load+0x206/0x4ec [<0000000036548e09>] kexec_image_load_default+0x40/0x4c [<0000000079fbe1b4>] sys_kexec_file_load+0x1c4/0x322 [<0000000040c62c03>] ret_from_syscall+0x0/0x2 In elf_kexec_load(), a buffer is allocated via kvmalloc() to store fdt. While it's not freed back to system when kexec kernel is reloaded or unloaded. Then memory leak is caused. Fix it by introducing riscv specific function arch_kimage_file_post_load_cleanup(), and freeing the buffer there. Fixes: 6261586e0c91 ("RISC-V: Add kexec_file support") Signed-off-by: Li Huafei Reviewed-by: Conor Dooley Reviewed-by: Liao Chang Link: https://lore.kernel.org/r/20221104095658.141222-1-lihuafei1@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/kexec.h | 5 +++++ arch/riscv/kernel/elf_kexec.c | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index eee260e8ab30..2b56769cb530 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -39,6 +39,7 @@ crash_setup_regs(struct pt_regs *newregs, #define ARCH_HAS_KIMAGE_ARCH struct kimage_arch { + void *fdt; /* For CONFIG_KEXEC_FILE */ unsigned long fdt_addr; }; @@ -62,6 +63,10 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *relsec, const Elf_Shdr *symtab); #define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add + +struct kimage; +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup #endif #endif diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 0cb94992c15b..ff30fcb43f47 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -21,6 +21,14 @@ #include #include +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + kvfree(image->arch.fdt); + image->arch.fdt = NULL; + + return kexec_image_post_load_cleanup_default(image); +} + static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, struct kexec_elf_info *elf_info, unsigned long old_pbase, unsigned long new_pbase) @@ -298,6 +306,8 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, pr_err("Error add DTB kbuf ret=%d\n", ret); goto out_free_fdt; } + /* Cache the fdt buffer address for memory cleanup */ + image->arch.fdt = fdt; pr_notice("Loaded device tree at 0x%lx\n", kbuf.mem); goto out; From abbb887da77408892c0c8fb4cbbc2a5bb03b140e Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 7 Dec 2022 04:11:12 -0500 Subject: [PATCH 129/207] riscv: Fixup compile error with !MMU commit c528ef0888b75f673f7d48022de8d31d5b451e8c upstream. Current nommu_virt_defconfig can't compile: In file included from arch/riscv/kernel/crash_core.c:3: arch/riscv/kernel/crash_core.c: In function 'arch_crash_save_vmcoreinfo': arch/riscv/kernel/crash_core.c:8:27: error: 'VA_BITS' undeclared (first use in this function) 8 | VMCOREINFO_NUMBER(VA_BITS); | ^~~~~~~ Add MMU dependency for KEXEC_FILE. Fixes: 6261586e0c91 ("RISC-V: Add kexec_file support") Reported-by: Conor Dooley Reported-by: kernel test robot Signed-off-by: Guo Ren Signed-off-by: Guo Ren Tested-by: Conor Dooley Link: https://lore.kernel.org/r/20221207091112.2258674-1-guoren@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 593cf09264d8..8e5fd5682018 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -502,7 +502,7 @@ config KEXEC_FILE select KEXEC_CORE select KEXEC_ELF select HAVE_IMA_KEXEC if IMA - depends on 64BIT + depends on 64BIT && MMU help This is new version of kexec system call. This system call is file based and takes file descriptors as system call argument From cdea2da6787583ecca43594132533a2ac8d7cd21 Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Fri, 4 Nov 2022 17:56:58 +0800 Subject: [PATCH 130/207] RISC-V: kexec: Fix memory leak of elf header buffer commit cbc32023ddbdf4baa3d9dc513a2184a84080a5a2 upstream. This is reported by kmemleak detector: unreferenced object 0xff2000000403d000 (size 4096): comm "kexec", pid 146, jiffies 4294900633 (age 64.792s) hex dump (first 32 bytes): 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 .ELF............ 04 00 f3 00 01 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000566ca97c>] kmemleak_vmalloc+0x3c/0xbe [<00000000979283d8>] __vmalloc_node_range+0x3ac/0x560 [<00000000b4b3712a>] __vmalloc_node+0x56/0x62 [<00000000854f75e2>] vzalloc+0x2c/0x34 [<00000000e9a00db9>] crash_prepare_elf64_headers+0x80/0x30c [<0000000067e8bf48>] elf_kexec_load+0x3e8/0x4ec [<0000000036548e09>] kexec_image_load_default+0x40/0x4c [<0000000079fbe1b4>] sys_kexec_file_load+0x1c4/0x322 [<0000000040c62c03>] ret_from_syscall+0x0/0x2 In elf_kexec_load(), a buffer is allocated via vzalloc() to store elf headers. While it's not freed back to system when kdump kernel is reloaded or unloaded, or when image->elf_header is successfully set and then fails to load kdump kernel for some reason. Fix it by freeing the buffer in arch_kimage_file_post_load_cleanup(). Fixes: 8acea455fafa ("RISC-V: Support for kexec_file on panic") Signed-off-by: Li Huafei Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20221104095658.141222-2-lihuafei1@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/elf_kexec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index ff30fcb43f47..5372b708fae2 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -26,6 +26,10 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) kvfree(image->arch.fdt); image->arch.fdt = NULL; + vfree(image->elf_headers); + image->elf_headers = NULL; + image->elf_headers_sz = 0; + return kexec_image_post_load_cleanup_default(image); } From 00777a099574ad99a07d3a6db2f62c95b420ff2e Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 9 Nov 2022 01:49:36 -0500 Subject: [PATCH 131/207] riscv: stacktrace: Fixup ftrace_graph_ret_addr retp argument commit 5c3022e4a616d800cf5f4c3a981d7992179e44a1 upstream. The 'retp' is a pointer to the return address on the stack, so we must pass the current return address pointer as the 'retp' argument to ftrace_push_return_trace(). Not parent function's return address on the stack. Fixes: b785ec129bd9 ("riscv/ftrace: Add HAVE_FUNCTION_GRAPH_RET_ADDR_PTR support") Signed-off-by: Guo Ren Signed-off-by: Guo Ren Link: https://lore.kernel.org/r/20221109064937.3643993-2-guoren@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 08d11a53f39e..bcfe9eb55f80 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -58,7 +58,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, } else { fp = frame->fp; pc = ftrace_graph_ret_addr(current, NULL, frame->ra, - (unsigned long *)(fp - 8)); + &frame->ra); } } From 85292a29689a740b36ae2ff41b92bc51eb97e01f Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 29 Aug 2022 23:52:19 +0300 Subject: [PATCH 132/207] riscv: mm: notify remote harts about mmu cache updates commit 4bd1d80efb5af640f99157f39b50fb11326ce641 upstream. Current implementation of update_mmu_cache function performs local TLB flush. It does not take into account ASID information. Besides, it does not take into account other harts currently running the same mm context or possible migration of the running context to other harts. Meanwhile TLB flush is not performed for every context switch if ASID support is enabled. Patch [1] proposed to add ASID support to update_mmu_cache to avoid flushing local TLB entirely. This patch takes into account other harts currently running the same mm context as well as possible migration of this context to other harts. For this purpose the approach from flush_icache_mm is reused. Remote harts currently running the same mm context are informed via SBI calls that they need to flush their local TLBs. All the other harts are marked as needing a deferred TLB flush when this mm context runs on them. [1] https://lore.kernel.org/linux-riscv/20220821013926.8968-1-tjytimi@163.com/ Signed-off-by: Sergey Matyukevich Fixes: 65d4b9c53017 ("RISC-V: Implement ASID allocator") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-riscv/20220829205219.283543-1-geomatsi@gmail.com/#t Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/mmu.h | 2 ++ arch/riscv/include/asm/pgtable.h | 2 +- arch/riscv/include/asm/tlbflush.h | 18 ++++++++++++++++++ arch/riscv/mm/context.c | 10 ++++++++++ arch/riscv/mm/tlbflush.c | 28 +++++++++++----------------- 5 files changed, 42 insertions(+), 18 deletions(-) diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 0099dc116168..5ff1f19fd45c 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -19,6 +19,8 @@ typedef struct { #ifdef CONFIG_SMP /* A local icache flush is needed before user execution can resume. */ cpumask_t icache_stale_mask; + /* A local tlb flush is needed before user execution can resume. */ + cpumask_t tlb_stale_mask; #endif } mm_context_t; diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 92ec2d9d7273..ec6fb83349ce 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -415,7 +415,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * Relying on flush_tlb_fix_spurious_fault would suffice, but * the extra traps reduce performance. So, eagerly SFENCE.VMA. */ - local_flush_tlb_page(address); + flush_tlb_page(vma, address); } static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 801019381dea..907b9efd39a8 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -22,6 +22,24 @@ static inline void local_flush_tlb_page(unsigned long addr) { ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); } + +static inline void local_flush_tlb_all_asid(unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma x0, %0" + : + : "r" (asid) + : "memory"); +} + +static inline void local_flush_tlb_page_asid(unsigned long addr, + unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma %0, %1" + : + : "r" (addr), "r" (asid) + : "memory"); +} + #else /* CONFIG_MMU */ #define local_flush_tlb_all() do { } while (0) #define local_flush_tlb_page(addr) do { } while (0) diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 7acbfbd14557..80ce9caba8d2 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -196,6 +196,16 @@ switch_mm_fast: if (need_flush_tlb) local_flush_tlb_all(); +#ifdef CONFIG_SMP + else { + cpumask_t *mask = &mm->context.tlb_stale_mask; + + if (cpumask_test_cpu(cpu, mask)) { + cpumask_clear_cpu(cpu, mask); + local_flush_tlb_all_asid(cntx & asid_mask); + } + } +#endif } static void set_mm_noasid(struct mm_struct *mm) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 37ed760d007c..ce7dfc81bb3f 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -5,23 +5,7 @@ #include #include #include - -static inline void local_flush_tlb_all_asid(unsigned long asid) -{ - __asm__ __volatile__ ("sfence.vma x0, %0" - : - : "r" (asid) - : "memory"); -} - -static inline void local_flush_tlb_page_asid(unsigned long addr, - unsigned long asid) -{ - __asm__ __volatile__ ("sfence.vma %0, %1" - : - : "r" (addr), "r" (asid) - : "memory"); -} +#include void flush_tlb_all(void) { @@ -31,6 +15,7 @@ void flush_tlb_all(void) static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, unsigned long size, unsigned long stride) { + struct cpumask *pmask = &mm->context.tlb_stale_mask; struct cpumask *cmask = mm_cpumask(mm); unsigned int cpuid; bool broadcast; @@ -44,6 +29,15 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, if (static_branch_unlikely(&use_asid_allocator)) { unsigned long asid = atomic_long_read(&mm->context.id); + /* + * TLB will be immediately flushed on harts concurrently + * executing this MM context. TLB flush on other harts + * is deferred until this MM context migrates there. + */ + cpumask_setall(pmask); + cpumask_clear_cpu(cpuid, pmask); + cpumask_andnot(pmask, pmask, cmask); + if (broadcast) { sbi_remote_sfence_vma_asid(cmask, start, size, asid); } else if (size <= stride) { From b70acb182f45544aa66f6c343d1faac4a3e5e49a Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 6 Oct 2022 04:34:19 +0000 Subject: [PATCH 133/207] crypto: n2 - add missing hash statesize commit 76a4e874593543a2dff91d249c95bac728df2774 upstream. Add missing statesize to hash templates. This is mandatory otherwise no algorithms can be registered as the core requires statesize to be set. CC: stable@kernel.org # 4.3+ Reported-by: Rolf Eike Beer Tested-by: Rolf Eike Beer Fixes: 0a625fd2abaa ("crypto: n2 - Add Niagara2 crypto driver") Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/n2_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 31e24df18877..20d0dcd50344 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1229,6 +1229,7 @@ struct n2_hash_tmpl { const u8 *hash_init; u8 hw_op_hashsz; u8 digest_size; + u8 statesize; u8 block_size; u8 auth_type; u8 hmac_type; @@ -1260,6 +1261,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = { .hmac_type = AUTH_TYPE_HMAC_MD5, .hw_op_hashsz = MD5_DIGEST_SIZE, .digest_size = MD5_DIGEST_SIZE, + .statesize = sizeof(struct md5_state), .block_size = MD5_HMAC_BLOCK_SIZE }, { .name = "sha1", .hash_zero = sha1_zero_message_hash, @@ -1268,6 +1270,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = { .hmac_type = AUTH_TYPE_HMAC_SHA1, .hw_op_hashsz = SHA1_DIGEST_SIZE, .digest_size = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct sha1_state), .block_size = SHA1_BLOCK_SIZE }, { .name = "sha256", .hash_zero = sha256_zero_message_hash, @@ -1276,6 +1279,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = { .hmac_type = AUTH_TYPE_HMAC_SHA256, .hw_op_hashsz = SHA256_DIGEST_SIZE, .digest_size = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), .block_size = SHA256_BLOCK_SIZE }, { .name = "sha224", .hash_zero = sha224_zero_message_hash, @@ -1284,6 +1288,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = { .hmac_type = AUTH_TYPE_RESERVED, .hw_op_hashsz = SHA256_DIGEST_SIZE, .digest_size = SHA224_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), .block_size = SHA224_BLOCK_SIZE }, }; #define NUM_HASH_TMPLS ARRAY_SIZE(hash_tmpls) @@ -1424,6 +1429,7 @@ static int __n2_register_one_ahash(const struct n2_hash_tmpl *tmpl) halg = &ahash->halg; halg->digestsize = tmpl->digest_size; + halg->statesize = tmpl->statesize; base = &halg->base; snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name); From cced6678325bd4b400018e0e616f4cd98179f784 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 28 Sep 2022 13:45:05 -0500 Subject: [PATCH 134/207] crypto: ccp - Add support for TEE for PCI ID 0x14CA commit 10da230a4df1dfe32a58eb09246f5ffe82346f27 upstream. SoCs containing 0x14CA are present both in datacenter parts that support SEV as well as client parts that support TEE. Cc: stable@vger.kernel.org # 5.15+ Tested-by: Rijo-john Thomas Signed-off-by: Mario Limonciello Acked-by: Tom Lendacky Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/sp-pci.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c index 792d6da7f0c0..084d052fddcc 100644 --- a/drivers/crypto/ccp/sp-pci.c +++ b/drivers/crypto/ccp/sp-pci.c @@ -381,6 +381,15 @@ static const struct psp_vdata pspv3 = { .inten_reg = 0x10690, .intsts_reg = 0x10694, }; + +static const struct psp_vdata pspv4 = { + .sev = &sevv2, + .tee = &teev1, + .feature_reg = 0x109fc, + .inten_reg = 0x10690, + .intsts_reg = 0x10694, +}; + #endif static const struct sp_dev_vdata dev_vdata[] = { @@ -426,7 +435,7 @@ static const struct sp_dev_vdata dev_vdata[] = { { /* 5 */ .bar = 2, #ifdef CONFIG_CRYPTO_DEV_SP_PSP - .psp_vdata = &pspv2, + .psp_vdata = &pspv4, #endif }, { /* 6 */ From 878d15dd43bcb2d7d9f807b0cc57ece7d23c7e29 Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Tue, 20 Sep 2022 17:14:13 -0700 Subject: [PATCH 135/207] driver core: Fix bus_type.match() error handling in __driver_attach() commit 27c0d217340e47ec995557f61423ef415afba987 upstream. When a driver registers with a bus, it will attempt to match with every device on the bus through the __driver_attach() function. Currently, if the bus_type.match() function encounters an error that is not -EPROBE_DEFER, __driver_attach() will return a negative error code, which causes the driver registration logic to stop trying to match with the remaining devices on the bus. This behavior is not correct; a failure while matching a driver to a device does not mean that the driver won't be able to match and bind with other devices on the bus. Update the logic in __driver_attach() to reflect this. Fixes: 656b8035b0ee ("ARM: 8524/1: driver cohandle -EPROBE_DEFER from bus_type.match()") Cc: stable@vger.kernel.org Cc: Saravana Kannan Signed-off-by: Isaac J. Manjarres Link: https://lore.kernel.org/r/20220921001414.4046492-1-isaacmanjarres@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 3dda62503102..9ae2b5c4fc49 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -1162,7 +1162,11 @@ static int __driver_attach(struct device *dev, void *data) return 0; } else if (ret < 0) { dev_dbg(dev, "Bus failed to match device: %d\n", ret); - return ret; + /* + * Driver could not match with device, but may match with + * another device on the bus. + */ + return 0; } /* ret > 0 means positive match */ if (driver_allows_async_probing(drv)) { From 0a1129c4b445e1a2b82c915c5cd932442539b2fc Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Sun, 16 Oct 2022 11:05:32 +0800 Subject: [PATCH 136/207] bus: mhi: host: Fix race between channel preparation and M0 event commit 869a99907faea6d1835b0bd0d0422ae3519c6ea9 upstream. There is a race condition where mhi_prepare_channel() updates the read and write pointers as the base address and in parallel, if an M0 transition occurs, the tasklet goes ahead and rings doorbells for all channels with a delta in TRE rings assuming they are already enabled. This causes a null pointer access. Fix it by adding a channel enabled check before ringing channel doorbells. Cc: stable@vger.kernel.org # 5.19 Fixes: a6e2e3522f29 "bus: mhi: core: Add support for PM state transitions" Signed-off-by: Qiang Yu Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1665889532-13634-1-git-send-email-quic_qianyu@quicinc.com [mani: CCed stable list] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/host/pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bus/mhi/host/pm.c b/drivers/bus/mhi/host/pm.c index 4a42186ff111..083459028a4b 100644 --- a/drivers/bus/mhi/host/pm.c +++ b/drivers/bus/mhi/host/pm.c @@ -301,7 +301,8 @@ int mhi_pm_m0_transition(struct mhi_controller *mhi_cntrl) read_lock_irq(&mhi_chan->lock); /* Only ring DB if ring is not empty */ - if (tre_ring->base && tre_ring->wp != tre_ring->rp) + if (tre_ring->base && tre_ring->wp != tre_ring->rp && + mhi_chan->ch_state == MHI_CH_STATE_ENABLED) mhi_ring_chan_db(mhi_cntrl, mhi_chan); read_unlock_irq(&mhi_chan->lock); } From 1066b58cc02d223ee7b83f8cbde43b1efda56bed Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 09:13:42 +0100 Subject: [PATCH 137/207] phy: qcom-qmp-combo: fix sdm845 reset commit e965ab8216a419fadb4520b65a95dc7017daa800 upstream. The SDM845 has two resets but the DP configuration erroneously described only one. In case the DP part of the PHY is initialised before the USB part (e.g. depending on probe order), then only the first reset would be asserted. Add a dedicated configuration for SDM845 rather than reuse the incompatible SC7180 configuration. Fixes: d88497fb6bbd ("phy: qualcomm: phy-qcom-qmp: add support for combo USB3+DP phy on SDM845") Cc: stable@vger.kernel.org # 6.1 Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114081346.5116-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 39 ++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 8e46d7a66edd..1f1947476803 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1121,9 +1121,46 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; +static const struct qmp_phy_cfg sdm845_dpphy_cfg = { + .type = PHY_TYPE_DP, + .lanes = 2, + + .serdes_tbl = qmp_v3_dp_serdes_tbl, + .serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), + .tx_tbl = qmp_v3_dp_tx_tbl, + .tx_tbl_num = ARRAY_SIZE(qmp_v3_dp_tx_tbl), + + .serdes_tbl_rbr = qmp_v3_dp_serdes_tbl_rbr, + .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl_rbr), + .serdes_tbl_hbr = qmp_v3_dp_serdes_tbl_hbr, + .serdes_tbl_hbr_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl_hbr), + .serdes_tbl_hbr2 = qmp_v3_dp_serdes_tbl_hbr2, + .serdes_tbl_hbr2_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl_hbr2), + .serdes_tbl_hbr3 = qmp_v3_dp_serdes_tbl_hbr3, + .serdes_tbl_hbr3_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl_hbr3), + + .swing_hbr_rbr = &qmp_dp_v3_voltage_swing_hbr_rbr, + .pre_emphasis_hbr_rbr = &qmp_dp_v3_pre_emphasis_hbr_rbr, + .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, + .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, + + .clk_list = qmp_v3_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), + .reset_list = msm8996_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v3_usb3phy_regs_layout, + + .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, + .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, + .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, + .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, +}; + static const struct qmp_phy_combo_cfg sdm845_usb3dpphy_cfg = { .usb_cfg = &sdm845_usb3phy_cfg, - .dp_cfg = &sc7180_dpphy_cfg, + .dp_cfg = &sdm845_dpphy_cfg, }; static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { From fdbbb40583195c1b060a1bb26dd1beaf85ea02f9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 09:13:43 +0100 Subject: [PATCH 138/207] phy: qcom-qmp-combo: fix sc8180x reset commit 910dd4883d757af5faac92590f33f0f7da963032 upstream. The SC8180X has two resets but the DP configuration erroneously described only one. In case the DP part of the PHY is initialised before the USB part (e.g. depending on probe order), then only the first reset would be asserted. Fixes: 1633802cd4ac ("phy: qcom: qmp: Add SC8180x USB/DP combo") Cc: stable@vger.kernel.org # 5.15 Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114081346.5116-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 1f1947476803..91f8ee79000d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1221,8 +1221,8 @@ static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { .clk_list = qmp_v3_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), - .reset_list = sc7180_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(sc7180_usb3phy_reset_l), + .reset_list = msm8996_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v3_usb3phy_regs_layout, From 808948f4922f6b0bc49547cd8e928b67b8a0b47e Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 19 Sep 2022 10:56:37 -0500 Subject: [PATCH 139/207] iommu/amd: Fix ivrs_acpihid cmdline parsing code commit 5f18e9f8868c6d4eae71678e7ebd4977b7d8c8cf upstream. The second (UID) strcmp in acpi_dev_hid_uid_match considers "0" and "00" different, which can prevent device registration. Have the AMD IOMMU driver's ivrs_acpihid parsing code remove any leading zeroes to make the UID strcmp succeed. Now users can safely specify "AMDxxxxx:00" or "AMDxxxxx:0" and expect the same behaviour. Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter") Signed-off-by: Kim Phillips Cc: stable@vger.kernel.org Cc: Suravee Suthikulpanit Cc: Joerg Roedel Link: https://lore.kernel.org/r/20220919155638.391481-1-kim.phillips@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd/init.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 1a2d425bf568..d14da30b8706 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3488,6 +3488,13 @@ static int __init parse_ivrs_acpihid(char *str) return 1; } + /* + * Ignore leading zeroes after ':', so e.g., AMDI0095:00 + * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match + */ + while (*uid == '0' && *(uid + 1)) + uid++; + i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); From 7e8834776c3706c04c77dcfcb5556e1b2be10323 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 19 Sep 2022 10:56:38 -0500 Subject: [PATCH 140/207] iommu/amd: Fix ill-formed ivrs_ioapic, ivrs_hpet and ivrs_acpihid options commit 1198d2316dc4265a97d0e8445a22c7a6d17580a4 upstream. Currently, these options cause the following libkmod error: libkmod: ERROR ../libkmod/libkmod-config.c:489 kcmdline_parse_result: \ Ignoring bad option on kernel command line while parsing module \ name: 'ivrs_xxxx[XX:XX' Fix by introducing a new parameter format for these options and throw a warning for the deprecated format. Users are still allowed to omit the PCI Segment if zero. Adding a Link: to the reason why we're modding the syntax parsing in the driver and not in libkmod. Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-modules/20200310082308.14318-2-lucas.demarchi@intel.com/ Reported-by: Kim Phillips Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Kim Phillips Link: https://lore.kernel.org/r/20220919155638.391481-2-kim.phillips@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 27 +++++-- drivers/iommu/amd/init.c | 77 +++++++++++++------ 2 files changed, 75 insertions(+), 29 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 42af9ca0127e..6b838869554b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2300,7 +2300,13 @@ Provide an override to the IOAPIC-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. By default, PCI segment is 0, and can be omitted. - For example: + + For example, to map IOAPIC-ID decimal 10 to + PCI segment 0x1 and PCI device 00:14.0, + write the parameter as: + ivrs_ioapic=10@0001:00:14.0 + + Deprecated formats: * To map IOAPIC-ID decimal 10 to PCI device 00:14.0 write the parameter as: ivrs_ioapic[10]=00:14.0 @@ -2312,7 +2318,13 @@ Provide an override to the HPET-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. By default, PCI segment is 0, and can be omitted. - For example: + + For example, to map HPET-ID decimal 10 to + PCI segment 0x1 and PCI device 00:14.0, + write the parameter as: + ivrs_hpet=10@0001:00:14.0 + + Deprecated formats: * To map HPET-ID decimal 0 to PCI device 00:14.0 write the parameter as: ivrs_hpet[0]=00:14.0 @@ -2323,15 +2335,20 @@ ivrs_acpihid [HW,X86-64] Provide an override to the ACPI-HID:UID<->DEVICE-ID mapping provided in the IVRS ACPI table. + By default, PCI segment is 0, and can be omitted. For example, to map UART-HID:UID AMD0020:0 to PCI segment 0x1 and PCI device ID 00:14.5, write the parameter as: - ivrs_acpihid[0001:00:14.5]=AMD0020:0 + ivrs_acpihid=AMD0020:0@0001:00:14.5 - By default, PCI segment is 0, and can be omitted. - For example, PCI device 00:14.5 write the parameter as: + Deprecated formats: + * To map UART-HID:UID AMD0020:0 to PCI segment is 0, + PCI device ID 00:14.5, write the parameter as: ivrs_acpihid[00:14.5]=AMD0020:0 + * To map UART-HID:UID AMD0020:0 to PCI segment 0x1 and + PCI device ID 00:14.5, write the parameter as: + ivrs_acpihid[0001:00:14.5]=AMD0020:0 js= [HW,JOY] Analog joystick See Documentation/input/joydev/joystick.rst. diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index d14da30b8706..34029d116107 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3402,18 +3402,24 @@ static int __init parse_amd_iommu_options(char *str) static int __init parse_ivrs_ioapic(char *str) { u32 seg = 0, bus, dev, fn; - int ret, id, i; + int id, i; u32 devid; - ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); - if (ret != 4) { - ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn); - if (ret != 5) { - pr_err("Invalid command line: ivrs_ioapic%s\n", str); - return 1; - } + if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) + goto found; + + if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { + pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n", + str, id, seg, bus, dev, fn); + goto found; } + pr_err("Invalid command line: ivrs_ioapic%s\n", str); + return 1; + +found: if (early_ioapic_map_size == EARLY_MAP_SIZE) { pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", str); @@ -3434,18 +3440,24 @@ static int __init parse_ivrs_ioapic(char *str) static int __init parse_ivrs_hpet(char *str) { u32 seg = 0, bus, dev, fn; - int ret, id, i; + int id, i; u32 devid; - ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); - if (ret != 4) { - ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn); - if (ret != 5) { - pr_err("Invalid command line: ivrs_hpet%s\n", str); - return 1; - } + if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) + goto found; + + if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { + pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n", + str, id, seg, bus, dev, fn); + goto found; } + pr_err("Invalid command line: ivrs_hpet%s\n", str); + return 1; + +found: if (early_hpet_map_size == EARLY_MAP_SIZE) { pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n", str); @@ -3466,19 +3478,36 @@ static int __init parse_ivrs_hpet(char *str) static int __init parse_ivrs_acpihid(char *str) { u32 seg = 0, bus, dev, fn; - char *hid, *uid, *p; + char *hid, *uid, *p, *addr; char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0}; - int ret, i; + int i; - ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid); - if (ret != 4) { - ret = sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid); - if (ret != 5) { - pr_err("Invalid command line: ivrs_acpihid(%s)\n", str); - return 1; + addr = strchr(str, '@'); + if (!addr) { + if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 || + sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) { + pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n", + str, acpiid, seg, bus, dev, fn); + goto found; } + goto not_found; } + /* We have the '@', make it the terminator to get just the acpiid */ + *addr++ = 0; + + if (sscanf(str, "=%s", acpiid) != 1) + goto not_found; + + if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 || + sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4) + goto found; + +not_found: + pr_err("Invalid command line: ivrs_acpihid%s\n", str); + return 1; + +found: p = acpiid; hid = strsep(&p, ":"); uid = p; From ee29001a637fda7e3025487f24653494b2a196b5 Mon Sep 17 00:00:00 2001 From: Li Hua Date: Mon, 21 Nov 2022 11:06:20 +0800 Subject: [PATCH 141/207] test_kprobes: Fix implicit declaration error of test_kprobes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 63a4dc0a0bb0e9bfeb2c88ccda81abdde4cdd6b8 upstream. If KPROBES_SANITY_TEST and ARCH_CORRECT_STACKTRACE_ON_KRETPROBE is enabled, but STACKTRACE is not set. Build failed as below: lib/test_kprobes.c: In function ‘stacktrace_return_handler’: lib/test_kprobes.c:228:8: error: implicit declaration of function ‘stack_trace_save’; did you mean ‘stacktrace_driver’? [-Werror=implicit-function-declaration] ret = stack_trace_save(stack_buf, STACK_BUF_SIZE, 0); ^~~~~~~~~~~~~~~~ stacktrace_driver cc1: all warnings being treated as errors scripts/Makefile.build:250: recipe for target 'lib/test_kprobes.o' failed make[2]: *** [lib/test_kprobes.o] Error 1 To fix this error, Select STACKTRACE if ARCH_CORRECT_STACKTRACE_ON_KRETPROBE is enabled. Link: https://lore.kernel.org/all/20221121030620.63181-1-hucool.lihua@huawei.com/ Fixes: 1f6d3a8f5e39 ("kprobes: Add a test case for stacktrace from kretprobe handler") Cc: stable@vger.kernel.org Signed-off-by: Li Hua Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 3638b3424be5..12dfe6691dd5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2092,6 +2092,7 @@ config TEST_MIN_HEAP config TEST_SORT tristate "Array-based sort test" if !KUNIT_ALL_TESTS depends on KUNIT + select STACKTRACE if ARCH_CORRECT_STACKTRACE_ON_KRETPROBE default KUNIT_ALL_TESTS help This option enables the self-test function of 'sort()' at boot, From 17183187dc862a828f8e54380d0596eafa0b09f8 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Mon, 12 Dec 2022 15:50:41 -0800 Subject: [PATCH 142/207] hugetlb: really allocate vma lock for all sharable vmas commit e700898fa075c69b3ae02b702ab57fb75e1a82ec upstream. Commit bbff39cc6cbc ("hugetlb: allocate vma lock for all sharable vmas") removed the pmd sharable checks in the vma lock helper routines. However, it left the functional version of helper routines behind #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE. Therefore, the vma lock is not being used for sharable vmas on architectures that do not support pmd sharing. On these architectures, a potential fault/truncation race is exposed that could leave pages in a hugetlb file past i_size until the file is removed. Move the functional vma lock helpers outside the ifdef, and remove the non-functional stubs. Since the vma lock is not just for pmd sharing, rename the routine __vma_shareable_flags_pmd. Link: https://lkml.kernel.org/r/20221212235042.178355-1-mike.kravetz@oracle.com Fixes: bbff39cc6cbc ("hugetlb: allocate vma lock for all sharable vmas") Signed-off-by: Mike Kravetz Reviewed-by: Miaohe Lin Cc: "Aneesh Kumar K.V" Cc: David Hildenbrand Cc: James Houghton Cc: Mina Almasry Cc: Muchun Song Cc: Naoya Horiguchi Cc: Peter Xu Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 333 +++++++++++++++++++++++---------------------------- 1 file changed, 148 insertions(+), 185 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e36ca75311a5..9c251faeb6f5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -255,6 +255,152 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma) return subpool_inode(file_inode(vma->vm_file)); } +/* + * hugetlb vma_lock helper routines + */ +static bool __vma_shareable_lock(struct vm_area_struct *vma) +{ + return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) && + vma->vm_private_data; +} + +void hugetlb_vma_lock_read(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + down_read(&vma_lock->rw_sema); + } +} + +void hugetlb_vma_unlock_read(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + up_read(&vma_lock->rw_sema); + } +} + +void hugetlb_vma_lock_write(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + down_write(&vma_lock->rw_sema); + } +} + +void hugetlb_vma_unlock_write(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + up_write(&vma_lock->rw_sema); + } +} + +int hugetlb_vma_trylock_write(struct vm_area_struct *vma) +{ + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + if (!__vma_shareable_lock(vma)) + return 1; + + return down_write_trylock(&vma_lock->rw_sema); +} + +void hugetlb_vma_assert_locked(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + lockdep_assert_held(&vma_lock->rw_sema); + } +} + +void hugetlb_vma_lock_release(struct kref *kref) +{ + struct hugetlb_vma_lock *vma_lock = container_of(kref, + struct hugetlb_vma_lock, refs); + + kfree(vma_lock); +} + +static void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock) +{ + struct vm_area_struct *vma = vma_lock->vma; + + /* + * vma_lock structure may or not be released as a result of put, + * it certainly will no longer be attached to vma so clear pointer. + * Semaphore synchronizes access to vma_lock->vma field. + */ + vma_lock->vma = NULL; + vma->vm_private_data = NULL; + up_write(&vma_lock->rw_sema); + kref_put(&vma_lock->refs, hugetlb_vma_lock_release); +} + +static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + __hugetlb_vma_unlock_write_put(vma_lock); + } +} + +static void hugetlb_vma_lock_free(struct vm_area_struct *vma) +{ + /* + * Only present in sharable vmas. + */ + if (!vma || !__vma_shareable_lock(vma)) + return; + + if (vma->vm_private_data) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + down_write(&vma_lock->rw_sema); + __hugetlb_vma_unlock_write_put(vma_lock); + } +} + +static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) +{ + struct hugetlb_vma_lock *vma_lock; + + /* Only establish in (flags) sharable vmas */ + if (!vma || !(vma->vm_flags & VM_MAYSHARE)) + return; + + /* Should never get here with non-NULL vm_private_data */ + if (vma->vm_private_data) + return; + + vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL); + if (!vma_lock) { + /* + * If we can not allocate structure, then vma can not + * participate in pmd sharing. This is only a possible + * performance enhancement and memory saving issue. + * However, the lock is also used to synchronize page + * faults with truncation. If the lock is not present, + * unlikely races could leave pages in a file past i_size + * until the file is removed. Warn in the unlikely case of + * allocation failure. + */ + pr_warn_once("HugeTLB: unable to allocate vma specific lock\n"); + return; + } + + kref_init(&vma_lock->refs); + init_rwsem(&vma_lock->rw_sema); + vma_lock->vma = vma; + vma->vm_private_data = vma_lock; +} + /* Helper that removes a struct file_region from the resv_map cache and returns * it for use. */ @@ -6557,7 +6703,8 @@ bool hugetlb_reserve_pages(struct inode *inode, } /* - * vma specific semaphore used for pmd sharing synchronization + * vma specific semaphore used for pmd sharing and fault/truncation + * synchronization */ hugetlb_vma_lock_alloc(vma); @@ -6813,149 +6960,6 @@ void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, *end = ALIGN(*end, PUD_SIZE); } -static bool __vma_shareable_flags_pmd(struct vm_area_struct *vma) -{ - return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) && - vma->vm_private_data; -} - -void hugetlb_vma_lock_read(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - down_read(&vma_lock->rw_sema); - } -} - -void hugetlb_vma_unlock_read(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - up_read(&vma_lock->rw_sema); - } -} - -void hugetlb_vma_lock_write(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - down_write(&vma_lock->rw_sema); - } -} - -void hugetlb_vma_unlock_write(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - up_write(&vma_lock->rw_sema); - } -} - -int hugetlb_vma_trylock_write(struct vm_area_struct *vma) -{ - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - if (!__vma_shareable_flags_pmd(vma)) - return 1; - - return down_write_trylock(&vma_lock->rw_sema); -} - -void hugetlb_vma_assert_locked(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - lockdep_assert_held(&vma_lock->rw_sema); - } -} - -void hugetlb_vma_lock_release(struct kref *kref) -{ - struct hugetlb_vma_lock *vma_lock = container_of(kref, - struct hugetlb_vma_lock, refs); - - kfree(vma_lock); -} - -static void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock) -{ - struct vm_area_struct *vma = vma_lock->vma; - - /* - * vma_lock structure may or not be released as a result of put, - * it certainly will no longer be attached to vma so clear pointer. - * Semaphore synchronizes access to vma_lock->vma field. - */ - vma_lock->vma = NULL; - vma->vm_private_data = NULL; - up_write(&vma_lock->rw_sema); - kref_put(&vma_lock->refs, hugetlb_vma_lock_release); -} - -static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - __hugetlb_vma_unlock_write_put(vma_lock); - } -} - -static void hugetlb_vma_lock_free(struct vm_area_struct *vma) -{ - /* - * Only present in sharable vmas. - */ - if (!vma || !__vma_shareable_flags_pmd(vma)) - return; - - if (vma->vm_private_data) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - down_write(&vma_lock->rw_sema); - __hugetlb_vma_unlock_write_put(vma_lock); - } -} - -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) -{ - struct hugetlb_vma_lock *vma_lock; - - /* Only establish in (flags) sharable vmas */ - if (!vma || !(vma->vm_flags & VM_MAYSHARE)) - return; - - /* Should never get here with non-NULL vm_private_data */ - if (vma->vm_private_data) - return; - - vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL); - if (!vma_lock) { - /* - * If we can not allocate structure, then vma can not - * participate in pmd sharing. This is only a possible - * performance enhancement and memory saving issue. - * However, the lock is also used to synchronize page - * faults with truncation. If the lock is not present, - * unlikely races could leave pages in a file past i_size - * until the file is removed. Warn in the unlikely case of - * allocation failure. - */ - pr_warn_once("HugeTLB: unable to allocate vma specific lock\n"); - return; - } - - kref_init(&vma_lock->refs); - init_rwsem(&vma_lock->rw_sema); - vma_lock->vma = vma; - vma->vm_private_data = vma_lock; -} - /* * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() * and returns the corresponding pte. While this is not necessary for the @@ -7044,47 +7048,6 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, #else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ -void hugetlb_vma_lock_read(struct vm_area_struct *vma) -{ -} - -void hugetlb_vma_unlock_read(struct vm_area_struct *vma) -{ -} - -void hugetlb_vma_lock_write(struct vm_area_struct *vma) -{ -} - -void hugetlb_vma_unlock_write(struct vm_area_struct *vma) -{ -} - -int hugetlb_vma_trylock_write(struct vm_area_struct *vma) -{ - return 1; -} - -void hugetlb_vma_assert_locked(struct vm_area_struct *vma) -{ -} - -void hugetlb_vma_lock_release(struct kref *kref) -{ -} - -static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma) -{ -} - -static void hugetlb_vma_lock_free(struct vm_area_struct *vma) -{ -} - -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) -{ -} - pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pud_t *pud) { From b9693304b7133b81741add5bfb56f022596df012 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Fri, 30 Sep 2022 15:50:16 +0800 Subject: [PATCH 143/207] remoteproc: imx_dsp_rproc: Add mutex protection for workqueue commit 47e6ab07018edebf94ce873cf50a05ec76ff2dde upstream. The workqueue may execute late even after remoteproc is stopped or stopping, some resources (rpmsg device and endpoint) have been released in rproc_stop_subdevices(), then rproc_vq_interrupt() accessing these resources will cause kennel dump. Call trace: virtqueue_add_split+0x1ac/0x560 virtqueue_add_inbuf+0x4c/0x60 rpmsg_recv_done+0x15c/0x294 vring_interrupt+0x6c/0xa4 rproc_vq_interrupt+0x30/0x50 imx_dsp_rproc_vq_work+0x24/0x40 [imx_dsp_rproc] process_one_work+0x1d0/0x354 worker_thread+0x13c/0x470 kthread+0x154/0x160 ret_from_fork+0x10/0x20 Add mutex protection in imx_dsp_rproc_vq_work(), if the state is not running, then just skip calling rproc_vq_interrupt(). Also the flush workqueue operation can't be added in rproc stop for the same reason. The call sequence is rproc_shutdown -> rproc_stop ->rproc_stop_subdevices ->rproc->ops->stop() ->imx_dsp_rproc_stop ->flush_work -> rproc_vq_interrupt The resource needed by rproc_vq_interrupt has been released in rproc_stop_subdevices, so flush_work is not safe to be called in imx_dsp_rproc_stop. Fixes: ec0e5549f358 ("remoteproc: imx_dsp_rproc: Add remoteproc driver for DSP on i.MX") Signed-off-by: Shengjiu Wang Reviewed-by: Peng Fan Cc: stable Link: https://lore.kernel.org/r/1664524216-19949-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/imx_dsp_rproc.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/remoteproc/imx_dsp_rproc.c b/drivers/remoteproc/imx_dsp_rproc.c index 899aa8dd12f0..95da1cbefacf 100644 --- a/drivers/remoteproc/imx_dsp_rproc.c +++ b/drivers/remoteproc/imx_dsp_rproc.c @@ -347,9 +347,6 @@ static int imx_dsp_rproc_stop(struct rproc *rproc) struct device *dev = rproc->dev.parent; int ret = 0; - /* Make sure work is finished */ - flush_work(&priv->rproc_work); - if (rproc->state == RPROC_CRASHED) { priv->flags &= ~REMOTE_IS_READY; return 0; @@ -432,9 +429,18 @@ static void imx_dsp_rproc_vq_work(struct work_struct *work) { struct imx_dsp_rproc *priv = container_of(work, struct imx_dsp_rproc, rproc_work); + struct rproc *rproc = priv->rproc; + + mutex_lock(&rproc->lock); + + if (rproc->state != RPROC_RUNNING) + goto unlock_mutex; rproc_vq_interrupt(priv->rproc, 0); rproc_vq_interrupt(priv->rproc, 1); + +unlock_mutex: + mutex_unlock(&rproc->lock); } /** From e2e8d55f044e7ccece277609b1fcf8577f1aced8 Mon Sep 17 00:00:00 2001 From: Maria Yu Date: Tue, 6 Dec 2022 09:59:57 +0800 Subject: [PATCH 144/207] remoteproc: core: Do pm_relax when in RPROC_OFFLINE state commit 11c7f9e3131ad14b27a957496088fa488b153a48 upstream. Make sure that pm_relax() happens even when the remoteproc is stopped before the crash handler work is scheduled. Signed-off-by: Maria Yu Cc: stable Fixes: a781e5aa5911 ("remoteproc: core: Prevent system suspend during remoteproc recovery") Link: https://lore.kernel.org/r/20221206015957.2616-2-quic_aiquny@quicinc.com Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/remoteproc_core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index cb1d414a2389..c3f194d9384d 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -1868,12 +1868,18 @@ static void rproc_crash_handler_work(struct work_struct *work) mutex_lock(&rproc->lock); - if (rproc->state == RPROC_CRASHED || rproc->state == RPROC_OFFLINE) { + if (rproc->state == RPROC_CRASHED) { /* handle only the first crash detected */ mutex_unlock(&rproc->lock); return; } + if (rproc->state == RPROC_OFFLINE) { + /* Don't recover if the remote processor was stopped */ + mutex_unlock(&rproc->lock); + goto out; + } + rproc->state = RPROC_CRASHED; dev_err(dev, "handling crash #%u in %s\n", ++rproc->crash_cnt, rproc->name); @@ -1883,6 +1889,7 @@ static void rproc_crash_handler_work(struct work_struct *work) if (!rproc->recovery_disabled) rproc_trigger_recovery(rproc); +out: pm_relax(rproc->dev.parent); } From dc88a50bc3398d229ffd20383b2b1cbadbfa2c38 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 2 Nov 2022 19:14:10 +0800 Subject: [PATCH 145/207] remoteproc: imx_rproc: Correct i.MX93 DRAM mapping commit ee18f2715e85f4ef051851a0c4831ee7ad7d83b3 upstream. According to updated reference mannual, the M33 DRAM view of 0x[C,D]0000000 maps to A55 0xC0000000, so correct it. Fixes: 9222fabf0e39 ("remoteproc: imx_rproc: Support i.MX93") Signed-off-by: Peng Fan Cc: stable Link: https://lore.kernel.org/r/20221102111410.38737-1-peng.fan@oss.nxp.com Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/imx_rproc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c index 7cc4fd207e2d..596e1440cca5 100644 --- a/drivers/remoteproc/imx_rproc.c +++ b/drivers/remoteproc/imx_rproc.c @@ -113,8 +113,8 @@ static const struct imx_rproc_att imx_rproc_att_imx93[] = { { 0x80000000, 0x80000000, 0x10000000, 0 }, { 0x90000000, 0x80000000, 0x10000000, 0 }, - { 0xC0000000, 0xa0000000, 0x10000000, 0 }, - { 0xD0000000, 0xa0000000, 0x10000000, 0 }, + { 0xC0000000, 0xC0000000, 0x10000000, 0 }, + { 0xD0000000, 0xC0000000, 0x10000000, 0 }, }; static const struct imx_rproc_att imx_rproc_att_imx8mn[] = { From 67c98fec87ed76b1feb2ae810051afd88dfa9df6 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 17 Nov 2022 10:45:14 +0800 Subject: [PATCH 146/207] parisc: led: Fix potential null-ptr-deref in start_task() commit 41f563ab3c33698bdfc3403c7c2e6c94e73681e4 upstream. start_task() calls create_singlethread_workqueue() and not checked the ret value, which may return NULL. And a null-ptr-deref may happen: start_task() create_singlethread_workqueue() # failed, led_wq is NULL queue_delayed_work() queue_delayed_work_on() __queue_delayed_work() # warning here, but continue __queue_work() # access wq->flags, null-ptr-deref Check the ret value and return -ENOMEM if it is NULL. Fixes: 3499495205a6 ("[PARISC] Use work queue in LED/LCD driver instead of tasklet.") Signed-off-by: Shang XiaoJing Signed-off-by: Helge Deller Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/parisc/led.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index d4be9d2ee74d..8bdc5e043831 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -137,6 +137,9 @@ static int start_task(void) /* Create the work queue and queue the LED task */ led_wq = create_singlethread_workqueue("led_wq"); + if (!led_wq) + return -ENOMEM; + queue_delayed_work(led_wq, &led_task, 0); return 0; From 6bbba171235e1887acd853fbf4db51b6f85adb70 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 17 Dec 2022 17:45:40 +0100 Subject: [PATCH 147/207] parisc: Drop locking in pdc console code commit 7dc4dbfe750e1f18c511e73c8ed114da8de9ff85 upstream. No need to have specific locking for console I/O since the PDC functions provide an own locking. Signed-off-by: Helge Deller Cc: # 6.1+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/pdc_cons.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/parisc/kernel/pdc_cons.c b/arch/parisc/kernel/pdc_cons.c index 7d0989f523d0..cf3bf8232374 100644 --- a/arch/parisc/kernel/pdc_cons.c +++ b/arch/parisc/kernel/pdc_cons.c @@ -12,37 +12,27 @@ #include /* for PAGE0 */ #include /* for iodc_call() proto and friends */ -static DEFINE_SPINLOCK(pdc_console_lock); - static void pdc_console_write(struct console *co, const char *s, unsigned count) { int i = 0; - unsigned long flags; - spin_lock_irqsave(&pdc_console_lock, flags); do { i += pdc_iodc_print(s + i, count - i); } while (i < count); - spin_unlock_irqrestore(&pdc_console_lock, flags); } #ifdef CONFIG_KGDB static int kgdb_pdc_read_char(void) { - int c; - unsigned long flags; - - spin_lock_irqsave(&pdc_console_lock, flags); - c = pdc_iodc_getc(); - spin_unlock_irqrestore(&pdc_console_lock, flags); + int c = pdc_iodc_getc(); return (c <= 0) ? NO_POLL_CHAR : c; } static void kgdb_pdc_write_char(u8 chr) { - if (PAGE0->mem_cons.cl_class != CL_DUPLEX) - pdc_console_write(NULL, &chr, 1); + /* no need to print char as it's shown on standard console */ + /* pdc_iodc_print(&chr, 1); */ } static struct kgdb_io kgdb_pdc_io_ops = { From 553bc5890ed96a8d006224c3a4673c47fee0d12a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 26 Nov 2022 21:29:31 +0100 Subject: [PATCH 148/207] parisc: Fix locking in pdc_iodc_print() firmware call commit 7236aae5f81f3efbd93d0601e74fc05994bc2580 upstream. Utilize pdc_lock spinlock to protect parallel modifications of the iodc_dbuf[] buffer, check length to prevent buffer overflow of iodc_dbuf[], drop the iodc_retbuf[] buffer and fix some wrong indentings. Signed-off-by: Helge Deller Cc: # 6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/firmware.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 6a7e315bcc2e..a115315d88e6 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -1288,9 +1288,8 @@ void pdc_io_reset_devices(void) #endif /* defined(BOOTLOADER) */ -/* locked by pdc_console_lock */ -static int __attribute__((aligned(8))) iodc_retbuf[32]; -static char __attribute__((aligned(64))) iodc_dbuf[4096]; +/* locked by pdc_lock */ +static char iodc_dbuf[4096] __page_aligned_bss; /** * pdc_iodc_print - Console print using IODC. @@ -1307,6 +1306,9 @@ int pdc_iodc_print(const unsigned char *str, unsigned count) unsigned int i; unsigned long flags; + count = min_t(unsigned int, count, sizeof(iodc_dbuf)); + + spin_lock_irqsave(&pdc_lock, flags); for (i = 0; i < count;) { switch(str[i]) { case '\n': @@ -1322,12 +1324,11 @@ int pdc_iodc_print(const unsigned char *str, unsigned count) } print: - spin_lock_irqsave(&pdc_lock, flags); - real32_call(PAGE0->mem_cons.iodc_io, - (unsigned long)PAGE0->mem_cons.hpa, ENTRY_IO_COUT, - PAGE0->mem_cons.spa, __pa(PAGE0->mem_cons.dp.layers), - __pa(iodc_retbuf), 0, __pa(iodc_dbuf), i, 0); - spin_unlock_irqrestore(&pdc_lock, flags); + real32_call(PAGE0->mem_cons.iodc_io, + (unsigned long)PAGE0->mem_cons.hpa, ENTRY_IO_COUT, + PAGE0->mem_cons.spa, __pa(PAGE0->mem_cons.dp.layers), + __pa(pdc_result), 0, __pa(iodc_dbuf), i, 0); + spin_unlock_irqrestore(&pdc_lock, flags); return i; } @@ -1354,10 +1355,11 @@ int pdc_iodc_getc(void) real32_call(PAGE0->mem_kbd.iodc_io, (unsigned long)PAGE0->mem_kbd.hpa, ENTRY_IO_CIN, PAGE0->mem_kbd.spa, __pa(PAGE0->mem_kbd.dp.layers), - __pa(iodc_retbuf), 0, __pa(iodc_dbuf), 1, 0); + __pa(pdc_result), 0, __pa(iodc_dbuf), 1, 0); ch = *iodc_dbuf; - status = *iodc_retbuf; + /* like convert_to_wide() but for first return value only: */ + status = *(int *)&pdc_result; spin_unlock_irqrestore(&pdc_lock, flags); if (status == 0) From d97a584e350dcbe67ab2ee95b43dd7c91cc48235 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 17 Dec 2022 20:05:43 +0100 Subject: [PATCH 149/207] parisc: Add missing FORCE prerequisites in Makefile commit 9086e6017957c5cd6ea28d94b70e0d513d6b7800 upstream. Fix those make warnings: arch/parisc/kernel/vdso32/Makefile:30: FORCE prerequisite is missing arch/parisc/kernel/vdso64/Makefile:30: FORCE prerequisite is missing Add the missing FORCE prerequisites for all build targets identified by "make help". Fixes: e1f86d7b4b2a5213 ("kbuild: warn if FORCE is missing for if_changed(_dep,_rule) and filechk") Signed-off-by: Helge Deller Cc: # 5.18+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/vdso32/Makefile | 4 ++-- arch/parisc/kernel/vdso64/Makefile | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/parisc/kernel/vdso32/Makefile b/arch/parisc/kernel/vdso32/Makefile index 85b1c6d261d1..4459a48d2303 100644 --- a/arch/parisc/kernel/vdso32/Makefile +++ b/arch/parisc/kernel/vdso32/Makefile @@ -26,7 +26,7 @@ $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so FORCE # Force dependency (incbin is bad) # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) $(obj-cvdso32) $(VDSO_LIBGCC) +$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) $(obj-cvdso32) $(VDSO_LIBGCC) FORCE $(call if_changed,vdso32ld) # assembly rules for the .S files @@ -38,7 +38,7 @@ $(obj-cvdso32): %.o: %.c FORCE # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $(filter-out FORCE, $^) -o $@ quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $< quiet_cmd_vdso32cc = VDSO32C $@ diff --git a/arch/parisc/kernel/vdso64/Makefile b/arch/parisc/kernel/vdso64/Makefile index a30f5ec5eb4b..f3d6045793f4 100644 --- a/arch/parisc/kernel/vdso64/Makefile +++ b/arch/parisc/kernel/vdso64/Makefile @@ -26,7 +26,7 @@ $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so FORCE # Force dependency (incbin is bad) # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64) $(VDSO_LIBGCC) +$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64) $(VDSO_LIBGCC) FORCE $(call if_changed,vdso64ld) # assembly rules for the .S files @@ -35,7 +35,7 @@ $(obj-vdso64): %.o: %.S FORCE # actual build commands quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter-out FORCE, $^) -o $@ quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< From 790aba4492d62a3cbc932d5bb7904b0217428ec6 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 26 Nov 2022 21:35:29 +0100 Subject: [PATCH 150/207] parisc: Drop duplicate kgdb_pdc console commit 7e6652c79ecd74e1112500668d956367dc3772a5 upstream. The kgdb console is already implemented and registered in pdc_cons.c, so the duplicate code can be dropped. Signed-off-by: Helge Deller Cc: # 6.1+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/kgdb.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/arch/parisc/kernel/kgdb.c b/arch/parisc/kernel/kgdb.c index ab7620f695be..b16fa9bac5f4 100644 --- a/arch/parisc/kernel/kgdb.c +++ b/arch/parisc/kernel/kgdb.c @@ -208,23 +208,3 @@ int kgdb_arch_handle_exception(int trap, int signo, } return -1; } - -/* KGDB console driver which uses PDC to read chars from keyboard */ - -static void kgdb_pdc_write_char(u8 chr) -{ - /* no need to print char. kgdb will do it. */ -} - -static struct kgdb_io kgdb_pdc_io_ops = { - .name = "kgdb_pdc", - .read_char = pdc_iodc_getc, - .write_char = kgdb_pdc_write_char, -}; - -static int __init kgdb_pdc_init(void) -{ - kgdb_register_io_module(&kgdb_pdc_io_ops); - return 0; -} -early_initcall(kgdb_pdc_init); From 4def68cc15f37287a6b3bb8ccaaaba2aee6c5185 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 14 Dec 2022 22:17:57 +0100 Subject: [PATCH 151/207] parisc: Drop PMD_SHIFT from calculation in pgtable.h commit fe94cb1a614d2df2764d49ac959d8b7e4cb98e15 upstream. PMD_SHIFT isn't defined if CONFIG_PGTABLE_LEVELS == 3, and as such the kernel test robot found this warning: In file included from include/linux/pgtable.h:6, from arch/parisc/kernel/head.S:23: arch/parisc/include/asm/pgtable.h:169:32: warning: "PMD_SHIFT" is not defined, evaluates to 0 [-Wundef] 169 | #if (KERNEL_INITIAL_ORDER) >= (PMD_SHIFT) Avoid the warning by using PLD_SHIFT and BITS_PER_PTE. Signed-off-by: Helge Deller Reported-by: kernel test robot Cc: # 6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index ecd028854469..68ae77069d23 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -166,8 +166,8 @@ extern void __update_cache(pte_t pte); /* This calculates the number of initial pages we need for the initial * page tables */ -#if (KERNEL_INITIAL_ORDER) >= (PMD_SHIFT) -# define PT_INITIAL (1 << (KERNEL_INITIAL_ORDER - PMD_SHIFT)) +#if (KERNEL_INITIAL_ORDER) >= (PLD_SHIFT + BITS_PER_PTE) +# define PT_INITIAL (1 << (KERNEL_INITIAL_ORDER - PLD_SHIFT - BITS_PER_PTE)) #else # define PT_INITIAL (1) /* all initial PTEs fit into one page */ #endif From 35fe1c238437155153c1aeeb94572b04fa60e0b5 Mon Sep 17 00:00:00 2001 From: Wang Weiyang Date: Tue, 25 Oct 2022 19:31:01 +0800 Subject: [PATCH 152/207] device_cgroup: Roll back to original exceptions after copy failure commit e68bfbd3b3c3a0ec3cf8c230996ad8cabe90322f upstream. When add the 'a *:* rwm' entry to devcgroup A's whitelist, at first A's exceptions will be cleaned and A's behavior is changed to DEVCG_DEFAULT_ALLOW. Then parent's exceptions will be copyed to A's whitelist. If copy failure occurs, just return leaving A to grant permissions to all devices. And A may grant more permissions than parent. Backup A's whitelist and recover original exceptions after copy failure. Cc: stable@vger.kernel.org Fixes: 4cef7299b478 ("device_cgroup: add proper checking when changing default behavior") Signed-off-by: Wang Weiyang Reviewed-by: Aristeu Rozanski Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/device_cgroup.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/security/device_cgroup.c b/security/device_cgroup.c index a9f8c63a96d1..bef2b9285fb3 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -82,6 +82,17 @@ free_and_exit: return -ENOMEM; } +static void dev_exceptions_move(struct list_head *dest, struct list_head *orig) +{ + struct dev_exception_item *ex, *tmp; + + lockdep_assert_held(&devcgroup_mutex); + + list_for_each_entry_safe(ex, tmp, orig, list) { + list_move_tail(&ex->list, dest); + } +} + /* * called under devcgroup_mutex */ @@ -604,11 +615,13 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, int count, rc = 0; struct dev_exception_item ex; struct dev_cgroup *parent = css_to_devcgroup(devcgroup->css.parent); + struct dev_cgroup tmp_devcgrp; if (!capable(CAP_SYS_ADMIN)) return -EPERM; memset(&ex, 0, sizeof(ex)); + memset(&tmp_devcgrp, 0, sizeof(tmp_devcgrp)); b = buffer; switch (*b) { @@ -620,15 +633,27 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, if (!may_allow_all(parent)) return -EPERM; - dev_exception_clean(devcgroup); - devcgroup->behavior = DEVCG_DEFAULT_ALLOW; - if (!parent) + if (!parent) { + devcgroup->behavior = DEVCG_DEFAULT_ALLOW; + dev_exception_clean(devcgroup); break; + } - rc = dev_exceptions_copy(&devcgroup->exceptions, - &parent->exceptions); + INIT_LIST_HEAD(&tmp_devcgrp.exceptions); + rc = dev_exceptions_copy(&tmp_devcgrp.exceptions, + &devcgroup->exceptions); if (rc) return rc; + dev_exception_clean(devcgroup); + rc = dev_exceptions_copy(&devcgroup->exceptions, + &parent->exceptions); + if (rc) { + dev_exceptions_move(&devcgroup->exceptions, + &tmp_devcgrp.exceptions); + return rc; + } + devcgroup->behavior = DEVCG_DEFAULT_ALLOW; + dev_exception_clean(&tmp_devcgrp); break; case DEVCG_DENY: if (css_has_online_children(&devcgroup->css)) From d988f0bcf579b4bcb0b7aba217a882ec150bcc2a Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Mon, 17 Oct 2022 15:32:01 +0000 Subject: [PATCH 153/207] drm/connector: send hotplug uevent on connector cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6fdc2d490ea1369d17afd7e6eb66fecc5b7209bc upstream. A typical DP-MST unplug removes a KMS connector. However care must be taken to properly synchronize with user-space. The expected sequence of events is the following: 1. The kernel notices that the DP-MST port is gone. 2. The kernel marks the connector as disconnected, then sends a uevent to make user-space re-scan the connector list. 3. User-space notices the connector goes from connected to disconnected, disables it. 4. Kernel handles the IOCTL disabling the connector. On success, the very last reference to the struct drm_connector is dropped and drm_connector_cleanup() is called. 5. The connector is removed from the list, and a uevent is sent to tell user-space that the connector disappeared. The very last step was missing. As a result, user-space thought the connector still existed and could try to disable it again. Since the kernel no longer knows about the connector, that would end up with EINVAL and confused user-space. Fix this by sending a hotplug uevent from drm_connector_cleanup(). Signed-off-by: Simon Ser Cc: stable@vger.kernel.org Cc: Daniel Vetter Cc: Lyude Paul Cc: Jonas Ådahl Tested-by: Jonas Ådahl Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20221017153150.60675-2-contact@emersion.fr Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_connector.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 61c29ce74b03..27de2a97f1d1 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -582,6 +582,9 @@ void drm_connector_cleanup(struct drm_connector *connector) mutex_destroy(&connector->mutex); memset(connector, 0, sizeof(*connector)); + + if (dev->registered) + drm_sysfs_hotplug_event(dev); } EXPORT_SYMBOL(drm_connector_cleanup); From 622d527decaac0eb65512acada935a0fdc1d0202 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 25 Oct 2022 23:19:35 -0400 Subject: [PATCH 154/207] drm/vmwgfx: Validate the box size for the snooped cursor commit 4cf949c7fafe21e085a4ee386bb2dade9067316e upstream. Invalid userspace dma surface copies could potentially overflow the memcpy from the surface to the snooped image leading to crashes. To fix it the dimensions of the copybox have to be validated against the expected size of the snooped cursor. Signed-off-by: Zack Rusin Fixes: 2ac863719e51 ("vmwgfx: Snoop DMA transfers with non-covering sizes") Cc: # v3.2+ Reviewed-by: Michael Banack Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20221026031936.1004280-1-zack@kde.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 214829c32ed8..7a2f262414ad 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -308,7 +308,8 @@ void vmw_kms_cursor_snoop(struct vmw_surface *srf, if (cmd->dma.guest.ptr.offset % PAGE_SIZE || box->x != 0 || box->y != 0 || box->z != 0 || box->srcx != 0 || box->srcy != 0 || box->srcz != 0 || - box->d != 1 || box_count != 1) { + box->d != 1 || box_count != 1 || + box->w > 64 || box->h > 64) { /* TODO handle none page aligned offsets */ /* TODO handle more dst & src != 0 */ /* TODO handle more then one copy */ From 3650c063f22d03795026bd6f3d473e5bbdabb442 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 13 Oct 2022 15:28:10 +0200 Subject: [PATCH 155/207] drm/mgag200: Fix PLL setup for G200_SE_A rev >=4 commit b389286d0234e1edbaf62ed8bc0892a568c33662 upstream. For G200_SE_A, PLL M setting is wrong, which leads to blank screen, or "signal out of range" on VGA display. previous code had "m |= 0x80" which was changed to m |= ((pixpllcn & BIT(8)) >> 1); Tested on G200_SE_A rev 42 This line of code was moved to another file with commit 877507bb954e ("drm/mgag200: Provide per-device callbacks for PIXPLLC") but can be easily backported before this commit. v2: * put BIT(7) First to respect MSB-to-LSB (Thomas) * Add a comment to explain that this bit must be set (Thomas) Fixes: 2dd040946ecf ("drm/mgag200: Store values (not bits) in struct mgag200_pll_values") Cc: stable@vger.kernel.org Signed-off-by: Jocelyn Falempe Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20221013132810.521945-1-jfalempe@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/mgag200/mgag200_g200se.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mgag200/mgag200_g200se.c b/drivers/gpu/drm/mgag200/mgag200_g200se.c index be389ed91cbd..bd6e573c9a1a 100644 --- a/drivers/gpu/drm/mgag200/mgag200_g200se.c +++ b/drivers/gpu/drm/mgag200/mgag200_g200se.c @@ -284,7 +284,8 @@ static void mgag200_g200se_04_pixpllc_atomic_update(struct drm_crtc *crtc, pixpllcp = pixpllc->p - 1; pixpllcs = pixpllc->s; - xpixpllcm = pixpllcm | ((pixpllcn & BIT(8)) >> 1); + // For G200SE A, BIT(7) should be set unconditionally. + xpixpllcm = BIT(7) | pixpllcm; xpixpllcn = pixpllcn; xpixpllcp = (pixpllcs << 3) | pixpllcp; From 84bcb7d09aaa7e7a6ec00213fd17770dbb6c0ca7 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 14 Jul 2022 12:31:42 +0200 Subject: [PATCH 156/207] drm/etnaviv: move idle mapping reaping into separate function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5a40837debaa9dcc71765d32ce1a15be068b6cc2 upstream. The same logic is already used in two different places and now it will also be needed outside of the compilation unit, so split it into a separate function. Cc: stable@vger.kernel.org # 5.19 Signed-off-by: Lucas Stach Reviewed-by: Guido Günther Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/etnaviv/etnaviv_mmu.c | 23 +++++++++++++++-------- drivers/gpu/drm/etnaviv/etnaviv_mmu.h | 1 + 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index dc1aa738c4f1..55479cb8b1ac 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -135,6 +135,19 @@ static void etnaviv_iommu_remove_mapping(struct etnaviv_iommu_context *context, drm_mm_remove_node(&mapping->vram_node); } +void etnaviv_iommu_reap_mapping(struct etnaviv_vram_mapping *mapping) +{ + struct etnaviv_iommu_context *context = mapping->context; + + lockdep_assert_held(&context->lock); + WARN_ON(mapping->use); + + etnaviv_iommu_remove_mapping(context, mapping); + etnaviv_iommu_context_put(mapping->context); + mapping->context = NULL; + list_del_init(&mapping->mmu_node); +} + static int etnaviv_iommu_find_iova(struct etnaviv_iommu_context *context, struct drm_mm_node *node, size_t size) { @@ -202,10 +215,7 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu_context *context, * this mapping. */ list_for_each_entry_safe(m, n, &list, scan_node) { - etnaviv_iommu_remove_mapping(context, m); - etnaviv_iommu_context_put(m->context); - m->context = NULL; - list_del_init(&m->mmu_node); + etnaviv_iommu_reap_mapping(m); list_del_init(&m->scan_node); } @@ -257,10 +267,7 @@ static int etnaviv_iommu_insert_exact(struct etnaviv_iommu_context *context, } list_for_each_entry_safe(m, n, &scan_list, scan_node) { - etnaviv_iommu_remove_mapping(context, m); - etnaviv_iommu_context_put(m->context); - m->context = NULL; - list_del_init(&m->mmu_node); + etnaviv_iommu_reap_mapping(m); list_del_init(&m->scan_node); } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h index e4a0b7d09c2e..c01a147f0dfd 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h @@ -91,6 +91,7 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu_context *context, struct etnaviv_vram_mapping *mapping, u64 va); void etnaviv_iommu_unmap_gem(struct etnaviv_iommu_context *context, struct etnaviv_vram_mapping *mapping); +void etnaviv_iommu_reap_mapping(struct etnaviv_vram_mapping *mapping); int etnaviv_iommu_get_suballoc_va(struct etnaviv_iommu_context *ctx, struct etnaviv_vram_mapping *mapping, From 556a7e74af59048bf73777b25395cf90be9b377f Mon Sep 17 00:00:00 2001 From: Mikko Kovanen Date: Sat, 26 Nov 2022 13:27:13 +0000 Subject: [PATCH 157/207] drm/i915/dsi: fix VBT send packet port selection for dual link DSI commit f9cdf4130671d767071607d0a7568c9bd36a68d0 upstream. intel_dsi->ports contains bitmask of enabled ports and correspondingly logic for selecting port for VBT packet sending must use port specific bitmask when deciding appropriate port. Fixes: 08c59dde71b7 ("drm/i915/dsi: fix VBT send packet port selection for ICL+") Cc: stable@vger.kernel.org Signed-off-by: Mikko Kovanen Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/DBBPR09MB466592B16885D99ABBF2393A91119@DBBPR09MB4665.eurprd09.prod.outlook.com (cherry picked from commit 8d58bb7991c45f6b60710cc04c9498c6ea96db90) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 75e8cc4337c9..fce69fa446d5 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -137,9 +137,9 @@ static enum port intel_dsi_seq_port_to_port(struct intel_dsi *intel_dsi, return ffs(intel_dsi->ports) - 1; if (seq_port) { - if (intel_dsi->ports & PORT_B) + if (intel_dsi->ports & BIT(PORT_B)) return PORT_B; - else if (intel_dsi->ports & PORT_C) + else if (intel_dsi->ports & BIT(PORT_C)) return PORT_C; } From babd82f10749da6ce3521690759d771ba6652f0e Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Fri, 4 Nov 2022 06:45:12 +0000 Subject: [PATCH 158/207] drm/ingenic: Fix missing platform_driver_unregister() call in ingenic_drm_init() commit 47078311b8efebdefd5b3b2f87e2b02b14f49c66 upstream. A problem about modprobe ingenic-drm failed is triggered with the following log given: [ 303.561088] Error: Driver 'ingenic-ipu' is already registered, aborting... modprobe: ERROR: could not insert 'ingenic_drm': Device or resource busy The reason is that ingenic_drm_init() returns platform_driver_register() directly without checking its return value, if platform_driver_register() failed, it returns without unregistering ingenic_ipu_driver_ptr, resulting the ingenic-drm can never be installed later. A simple call graph is shown as below: ingenic_drm_init() platform_driver_register() # ingenic_ipu_driver_ptr are registered platform_driver_register() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without unregister ingenic_ipu_driver_ptr Fixing this problem by checking the return value of platform_driver_register() and do platform_unregister_drivers() if error happened. Fixes: fc1acf317b01 ("drm/ingenic: Add support for the IPU") Signed-off-by: Yuan Can Cc: stable@vger.kernel.org Signed-off-by: Paul Cercueil Link: https://patchwork.freedesktop.org/patch/msgid/20221104064512.8569-1-yuancan@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ingenic/ingenic-drm-drv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c index ab0515d2c420..4499a04f7c13 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c +++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c @@ -1629,7 +1629,11 @@ static int ingenic_drm_init(void) return err; } - return platform_driver_register(&ingenic_drm_driver); + err = platform_driver_register(&ingenic_drm_driver); + if (IS_ENABLED(CONFIG_DRM_INGENIC_IPU) && err) + platform_driver_unregister(ingenic_ipu_driver_ptr); + + return err; } module_init(ingenic_drm_init); From 12cfb0c1c27e8699581388514513a92fabda5379 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 14 Jul 2022 12:31:43 +0200 Subject: [PATCH 159/207] drm/etnaviv: reap idle mapping if it doesn't match the softpin address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 332f847212e43d584019a8264895f25cf92aa647 upstream. When a idle BO, which is held open by another process, gets freed by userspace and subsequently referenced again by e.g. importing it again, userspace may assign a different softpin VA than the last time around. As the kernel GEM object still exists, we likely have a idle mapping with the old VA still cached, if it hasn't been reaped in the meantime. As the context matches, we then simply try to resurrect this mapping by increasing the refcount. As the VA in this mapping does not match the new softpin address, we consequently fail the otherwise valid submit. Instead of failing, reap the idle mapping. Cc: stable@vger.kernel.org # 5.19 Signed-off-by: Lucas Stach Reviewed-by: Guido Günther Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index cc386f8a7116..5cf13e52f7c9 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -258,7 +258,12 @@ struct etnaviv_vram_mapping *etnaviv_gem_mapping_get( if (mapping->use == 0) { mutex_lock(&mmu_context->lock); if (mapping->context == mmu_context) - mapping->use += 1; + if (va && mapping->iova != va) { + etnaviv_iommu_reap_mapping(mapping); + mapping = NULL; + } else { + mapping->use += 1; + } else mapping = NULL; mutex_unlock(&mmu_context->lock); From b085fb43feede48ebf80ab7e2dd150c8d9902932 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 29 Jun 2022 19:26:46 +0800 Subject: [PATCH 160/207] ext4: silence the warning when evicting inode with dioread_nolock commit bc12ac98ea2e1b70adc6478c8b473a0003b659d3 upstream. When evicting an inode with default dioread_nolock, it could be raced by the unwritten extents converting kworker after writeback some new allocated dirty blocks. It convert unwritten extents to written, the extents could be merged to upper level and free extent blocks, so it could mark the inode dirty again even this inode has been marked I_FREEING. But the inode->i_io_list check and warning in ext4_evict_inode() missing this corner case. Fortunately, ext4_evict_inode() will wait all extents converting finished before this check, so it will not lead to inode use-after-free problem, every thing is OK besides this warning. The WARN_ON_ONCE was originally designed for finding inode use-after-free issues in advance, but if we add current dioread_nolock case in, it will become not quite useful, so fix this warning by just remove this check. ====== WARNING: CPU: 7 PID: 1092 at fs/ext4/inode.c:227 ext4_evict_inode+0x875/0xc60 ... RIP: 0010:ext4_evict_inode+0x875/0xc60 ... Call Trace: evict+0x11c/0x2b0 iput+0x236/0x3a0 do_unlinkat+0x1b4/0x490 __x64_sys_unlinkat+0x4c/0xb0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa933c1115b ====== rm kworker ext4_end_io_end() vfs_unlink() ext4_unlink() ext4_convert_unwritten_io_end_vec() ext4_convert_unwritten_extents() ext4_map_blocks() ext4_ext_map_blocks() ext4_ext_try_to_merge_up() __mark_inode_dirty() check !I_FREEING locked_inode_to_wb_and_lock_list() iput() iput_final() evict() ext4_evict_inode() truncate_inode_pages_final() //wait release io_end inode_io_list_move_locked() ext4_release_io_end() trigger WARN_ON_ONCE() Cc: stable@kernel.org Fixes: ceff86fddae8 ("ext4: Avoid freeing inodes on dirty list") Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220629112647.4141034-1-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2b5ef1b64249..7c5f5dabe0fd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -222,13 +222,13 @@ void ext4_evict_inode(struct inode *inode) /* * For inodes with journalled data, transaction commit could have - * dirtied the inode. Flush worker is ignoring it because of I_FREEING - * flag but we still need to remove the inode from the writeback lists. + * dirtied the inode. And for inodes with dioread_nolock, unwritten + * extents converting worker could merge extents and also have dirtied + * the inode. Flush worker is ignoring it because of I_FREEING flag but + * we still need to remove the inode from the writeback lists. */ - if (!list_empty_careful(&inode->i_io_list)) { - WARN_ON_ONCE(!ext4_should_journal_data(inode)); + if (!list_empty_careful(&inode->i_io_list)) inode_io_list_del(inode); - } /* * Protect us against freezing - iput() caller didn't have to have any From 248feff7deda6d9d7bd633fe8c6e7d20f4c259d7 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 17 Aug 2022 21:27:01 +0800 Subject: [PATCH 161/207] ext4: add inode table check in __ext4_get_inode_loc to aovid possible infinite loop commit eee22187b53611e173161e38f61de1c7ecbeb876 upstream. In do_writepages, if the value returned by ext4_writepages is "-ENOMEM" and "wbc->sync_mode == WB_SYNC_ALL", retry until the condition is not met. In __ext4_get_inode_loc, if the bh returned by sb_getblk is NULL, the function returns -ENOMEM. In __getblk_slow, if the return value of grow_buffers is less than 0, the function returns NULL. When the three processes are connected in series like the following stack, an infinite loop may occur: do_writepages <--- keep retrying ext4_writepages mpage_map_and_submit_extent mpage_map_one_extent ext4_map_blocks ext4_ext_map_blocks ext4_ext_handle_unwritten_extents ext4_ext_convert_to_initialized ext4_split_extent ext4_split_extent_at __ext4_ext_dirty __ext4_mark_inode_dirty ext4_reserve_inode_write ext4_get_inode_loc __ext4_get_inode_loc <--- return -ENOMEM sb_getblk __getblk_gfp __getblk_slow <--- return NULL grow_buffers grow_dev_page <--- return -ENXIO ret = (block < end_block) ? 1 : -ENXIO; In this issue, bg_inode_table_hi is overwritten as an incorrect value. As a result, `block < end_block` cannot be met in grow_dev_page. Therefore, __ext4_get_inode_loc always returns '-ENOMEM' and do_writepages keeps retrying. As a result, the writeback process is in the D state due to an infinite loop. Add a check on inode table block in the __ext4_get_inode_loc function by referring to ext4_read_inode_bitmap to avoid this infinite loop. Cc: stable@kernel.org Signed-off-by: Baokun Li Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20220817132701.3015912-3-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7c5f5dabe0fd..e7a66789fc3c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4473,9 +4473,17 @@ static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; inode_offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)); - block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); + block = ext4_inode_table(sb, gdp); + if ((block <= le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) || + (block >= ext4_blocks_count(EXT4_SB(sb)->s_es))) { + ext4_error(sb, "Invalid inode table block %llu in " + "block_group %u", block, iloc->block_group); + return -EFSCORRUPTED; + } + block += (inode_offset / inodes_per_block); + bh = sb_getblk(sb, block); if (unlikely(!bh)) return -ENOMEM; From 39364b354f0e4f8ba7885bd27124d52141439d5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= Date: Tue, 11 Oct 2022 16:57:58 +0100 Subject: [PATCH 162/207] ext4: remove trailing newline from ext4_msg() message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 78742d4d056df7d2fad241c90185d281bf924844 upstream. The ext4_msg() function adds a new line to the message. Remove extra '\n' from call to ext4_msg() in ext4_orphan_cleanup(). Signed-off-by: Luís Henriques Link: https://lore.kernel.org/r/20221011155758.15287-1-lhenriques@suse.de Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/orphan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c index 69a9cf9137a6..e5b47dda3317 100644 --- a/fs/ext4/orphan.c +++ b/fs/ext4/orphan.c @@ -412,7 +412,7 @@ void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es) /* don't clear list on RO mount w/ errors */ if (es->s_last_orphan && !(s_flags & SB_RDONLY)) { ext4_msg(sb, KERN_INFO, "Errors on filesystem, " - "clearing orphan list.\n"); + "clearing orphan list."); es->s_last_orphan = 0; } ext4_debug("Skipping orphan recovery on fs with errors.\n"); From 35840a486cffd5bffa3105de382315dfc6182382 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 9 Nov 2022 15:43:43 +0800 Subject: [PATCH 163/207] ext4: correct inconsistent error msg in nojournal mode commit 89481b5fa8c0640e62ba84c6020cee895f7ac643 upstream. When we used the journal_async_commit mounting option in nojournal mode, the kernel told me that "can't mount with journal_checksum", was very confusing. I find that when we mount with journal_async_commit, both the JOURNAL_ASYNC_COMMIT and EXPLICIT_JOURNAL_CHECKSUM flags are set. However, in the error branch, CHECKSUM is checked before ASYNC_COMMIT. As a result, the above inconsistency occurs, and the ASYNC_COMMIT branch becomes dead code that cannot be executed. Therefore, we exchange the positions of the two judgments to make the error msg more accurate. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221109074343.4184862-1-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7cdd2138c897..9ccb7799f0f7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5287,16 +5287,17 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) goto failed_mount3a; } else { /* Nojournal mode, all journal mount options are illegal */ - if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) { - ext4_msg(sb, KERN_ERR, "can't mount with " - "journal_checksum, fs mounted w/o journal"); - goto failed_mount3a; - } if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { ext4_msg(sb, KERN_ERR, "can't mount with " "journal_async_commit, fs mounted w/o journal"); goto failed_mount3a; } + + if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "journal_checksum, fs mounted w/o journal"); + goto failed_mount3a; + } if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { ext4_msg(sb, KERN_ERR, "can't mount with " "commit=%lu, fs mounted w/o journal", From a73f2b2e385390b44d3503ab3464c25140b36b1b Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Mon, 21 Nov 2022 12:21:30 +0100 Subject: [PATCH 164/207] fs: ext4: initialize fsdata in pagecache_write() commit 956510c0c7439e90b8103aaeaf4da92878c622f0 upstream. When aops->write_begin() does not initialize fsdata, KMSAN reports an error passing the latter to aops->write_end(). Fix this by unconditionally initializing fsdata. Cc: Eric Biggers Fixes: c93d8f885809 ("ext4: add basic fs-verity support") Reported-by: syzbot+9767be679ef5016b6082@syzkaller.appspotmail.com Signed-off-by: Alexander Potapenko Reviewed-by: Eric Biggers Link: https://lore.kernel.org/r/20221121112134.407362-1-glider@google.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/verity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index 3c640bd7ecae..30e3b65798b5 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -79,7 +79,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count, size_t n = min_t(size_t, count, PAGE_SIZE - offset_in_page(pos)); struct page *page; - void *fsdata; + void *fsdata = NULL; int res; res = aops->write_begin(NULL, mapping, pos, n, &page, &fsdata); From 7908b8a541b1578cc61b4da7f19b604a931441da Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 2 Nov 2022 16:06:33 +0800 Subject: [PATCH 165/207] ext4: fix use-after-free in ext4_orphan_cleanup commit a71248b1accb2b42e4980afef4fa4a27fa0e36f5 upstream. I caught a issue as follows: ================================================================== BUG: KASAN: use-after-free in __list_add_valid+0x28/0x1a0 Read of size 8 at addr ffff88814b13f378 by task mount/710 CPU: 1 PID: 710 Comm: mount Not tainted 6.1.0-rc3-next #370 Call Trace: dump_stack_lvl+0x73/0x9f print_report+0x25d/0x759 kasan_report+0xc0/0x120 __asan_load8+0x99/0x140 __list_add_valid+0x28/0x1a0 ext4_orphan_cleanup+0x564/0x9d0 [ext4] __ext4_fill_super+0x48e2/0x5300 [ext4] ext4_fill_super+0x19f/0x3a0 [ext4] get_tree_bdev+0x27b/0x450 ext4_get_tree+0x19/0x30 [ext4] vfs_get_tree+0x49/0x150 path_mount+0xaae/0x1350 do_mount+0xe2/0x110 __x64_sys_mount+0xf0/0x190 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd [...] ================================================================== Above issue may happen as follows: ------------------------------------- ext4_fill_super ext4_orphan_cleanup --- loop1: assume last_orphan is 12 --- list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan) ext4_truncate --> return 0 ext4_inode_attach_jinode --> return -ENOMEM iput(inode) --> free inode<12> --- loop2: last_orphan is still 12 --- list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); // use inode<12> and trigger UAF To solve this issue, we need to propagate the return value of ext4_inode_attach_jinode() appropriately. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221102080633.1630225-1-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e7a66789fc3c..bd989e61333a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4225,7 +4225,8 @@ int ext4_truncate(struct inode *inode) /* If we zero-out tail of the page, we have to create jinode for jbd2 */ if (inode->i_size & (inode->i_sb->s_blocksize - 1)) { - if (ext4_inode_attach_jinode(inode) < 0) + err = ext4_inode_attach_jinode(inode); + if (err) goto out_trace; } From 7753d6657873a2523a9989e6c09090cd503bbcda Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 31 Oct 2022 13:58:33 +0800 Subject: [PATCH 166/207] ext4: fix undefined behavior in bit shift for ext4_check_flag_values commit 3bf678a0f9c017c9ba7c581541dbc8453452a7ae upstream. Shifting signed 32-bit value by 31 bits is undefined, so changing significant bit to unsigned. The UBSAN warning calltrace like below: UBSAN: shift-out-of-bounds in fs/ext4/ext4.h:591:2 left shift of 1 by 31 places cannot be represented in type 'int' Call Trace: dump_stack_lvl+0x7d/0xa5 dump_stack+0x15/0x1b ubsan_epilogue+0xe/0x4e __ubsan_handle_shift_out_of_bounds+0x1e7/0x20c ext4_init_fs+0x5a/0x277 do_one_initcall+0x76/0x430 kernel_init_freeable+0x3b3/0x422 kernel_init+0x24/0x1e0 ret_from_fork+0x1f/0x30 Fixes: 9a4c80194713 ("ext4: ensure Inode flags consistency are checked at build time") Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20221031055833.3966222-1-cuigaosheng1@huawei.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8d5453852f98..e2d12e6c998d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -558,7 +558,7 @@ enum { * * It's not paranoia if the Murphy's Law really *is* out to get you. :-) */ -#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG)) +#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1U << EXT4_INODE_##FLAG)) #define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG)) static inline void ext4_check_flag_values(void) From 488a5c2bf7543c3cd3f07a025f2e62be91599430 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 26 Oct 2022 12:23:09 +0800 Subject: [PATCH 167/207] ext4: add EXT4_IGET_BAD flag to prevent unexpected bad inode commit 63b1e9bccb71fe7d7e3ddc9877dbdc85e5d2d023 upstream. There are many places that will get unhappy (and crash) when ext4_iget() returns a bad inode. However, if iget the boot loader inode, allows a bad inode to be returned, because the inode may not be initialized. This mechanism can be used to bypass some checks and cause panic. To solve this problem, we add a special iget flag EXT4_IGET_BAD. Only with this flag we'd be returning bad inode from ext4_iget(), otherwise we always return the error code if the inode is bad inode.(suggested by Jan Kara) Signed-off-by: Baokun Li Reviewed-by: Jason Yan Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221026042310.3839669-4-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 3 ++- fs/ext4/inode.c | 8 +++++++- fs/ext4/ioctl.c | 3 ++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index e2d12e6c998d..3afdd99bb214 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2964,7 +2964,8 @@ int do_journal_get_write_access(handle_t *handle, struct inode *inode, typedef enum { EXT4_IGET_NORMAL = 0, EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */ - EXT4_IGET_HANDLE = 0x0002 /* Inode # is from a handle */ + EXT4_IGET_HANDLE = 0x0002, /* Inode # is from a handle */ + EXT4_IGET_BAD = 0x0004 /* Allow to iget a bad inode */ } ext4_iget_flags; extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bd989e61333a..f53c67909af5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5053,8 +5053,14 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) ext4_error_inode(inode, function, line, 0, "casefold flag without casefold feature"); - brelse(iloc.bh); + if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) { + ext4_error_inode(inode, function, line, 0, + "bad inode without EXT4_IGET_BAD flag"); + ret = -EUCLEAN; + goto bad_inode; + } + brelse(iloc.bh); unlock_new_inode(inode); return inode; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 95dfea28bf4e..9ed7b9fe2132 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -374,7 +374,8 @@ static long swap_inode_boot_loader(struct super_block *sb, blkcnt_t blocks; unsigned short bytes; - inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL); + inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, + EXT4_IGET_SPECIAL | EXT4_IGET_BAD); if (IS_ERR(inode_bl)) return PTR_ERR(inode_bl); ei_bl = EXT4_I(inode_bl); From 7720e1e43315bc9795dbaae51a9d776a982b5d3b Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 26 Oct 2022 12:23:08 +0800 Subject: [PATCH 168/207] ext4: add helper to check quota inums commit 07342ec259df2a35d6a34aebce010567a80a0e15 upstream. Before quota is enabled, a check on the preset quota inums in ext4_super_block is added to prevent wrong quota inodes from being loaded. In addition, when the quota fails to be enabled, the quota type and quota inum are printed to facilitate fault locating. Signed-off-by: Baokun Li Reviewed-by: Jason Yan Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221026042310.3839669-3-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9ccb7799f0f7..a83810cd9041 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6887,6 +6887,20 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, return err; } +static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum) +{ + switch (type) { + case USRQUOTA: + return qf_inum == EXT4_USR_QUOTA_INO; + case GRPQUOTA: + return qf_inum == EXT4_GRP_QUOTA_INO; + case PRJQUOTA: + return qf_inum >= EXT4_GOOD_OLD_FIRST_INO; + default: + BUG(); + } +} + static int ext4_quota_enable(struct super_block *sb, int type, int format_id, unsigned int flags) { @@ -6903,9 +6917,16 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, if (!qf_inums[type]) return -EPERM; + if (!ext4_check_quota_inum(type, qf_inums[type])) { + ext4_error(sb, "Bad quota inum: %lu, type: %d", + qf_inums[type], type); + return -EUCLEAN; + } + qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL); if (IS_ERR(qf_inode)) { - ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]); + ext4_error(sb, "Bad quota inode: %lu, type: %d", + qf_inums[type], type); return PTR_ERR(qf_inode); } @@ -6944,8 +6965,9 @@ int ext4_enable_quotas(struct super_block *sb) if (err) { ext4_warning(sb, "Failed to enable quota tracking " - "(type=%d, err=%d). Please run " - "e2fsck to fix.", type, err); + "(type=%d, err=%d, ino=%lu). " + "Please run e2fsck to fix.", type, + err, qf_inums[type]); for (type--; type >= 0; type--) { struct inode *inode; From 1daff79463d7d76096c84c57cddc30c5d4be2226 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 26 Oct 2022 12:23:07 +0800 Subject: [PATCH 169/207] ext4: fix bug_on in __es_tree_search caused by bad quota inode commit d323877484765aaacbb2769b06e355c2041ed115 upstream. We got a issue as fllows: ================================================================== kernel BUG at fs/ext4/extents_status.c:202! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 1 PID: 810 Comm: mount Not tainted 6.1.0-rc1-next-g9631525255e3 #352 RIP: 0010:__es_tree_search.isra.0+0xb8/0xe0 RSP: 0018:ffffc90001227900 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 0000000077512a0f RCX: 0000000000000000 RDX: 0000000000000002 RSI: 0000000000002a10 RDI: ffff8881004cd0c8 RBP: ffff888177512ac8 R08: 47ffffffffffffff R09: 0000000000000001 R10: 0000000000000001 R11: 00000000000679af R12: 0000000000002a10 R13: ffff888177512d88 R14: 0000000077512a10 R15: 0000000000000000 FS: 00007f4bd76dbc40(0000)GS:ffff88842fd00000(0000)knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005653bf993cf8 CR3: 000000017bfdf000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ext4_es_cache_extent+0xe2/0x210 ext4_cache_extents+0xd2/0x110 ext4_find_extent+0x5d5/0x8c0 ext4_ext_map_blocks+0x9c/0x1d30 ext4_map_blocks+0x431/0xa50 ext4_getblk+0x82/0x340 ext4_bread+0x14/0x110 ext4_quota_read+0xf0/0x180 v2_read_header+0x24/0x90 v2_check_quota_file+0x2f/0xa0 dquot_load_quota_sb+0x26c/0x760 dquot_load_quota_inode+0xa5/0x190 ext4_enable_quotas+0x14c/0x300 __ext4_fill_super+0x31cc/0x32c0 ext4_fill_super+0x115/0x2d0 get_tree_bdev+0x1d2/0x360 ext4_get_tree+0x19/0x30 vfs_get_tree+0x26/0xe0 path_mount+0x81d/0xfc0 do_mount+0x8d/0xc0 __x64_sys_mount+0xc0/0x160 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd ================================================================== Above issue may happen as follows: ------------------------------------- ext4_fill_super ext4_orphan_cleanup ext4_enable_quotas ext4_quota_enable ext4_iget --> get error inode <5> ext4_ext_check_inode --> Wrong imode makes it escape inspection make_bad_inode(inode) --> EXT4_BOOT_LOADER_INO set imode dquot_load_quota_inode vfs_setup_quota_inode --> check pass dquot_load_quota_sb v2_check_quota_file v2_read_header ext4_quota_read ext4_bread ext4_getblk ext4_map_blocks ext4_ext_map_blocks ext4_find_extent ext4_cache_extents ext4_es_cache_extent __es_tree_search.isra.0 ext4_es_end --> Wrong extents trigger BUG_ON In the above issue, s_usr_quota_inum is set to 5, but inode<5> contains incorrect imode and disordered extents. Because 5 is EXT4_BOOT_LOADER_INO, the ext4_ext_check_inode check in the ext4_iget function can be bypassed, finally, the extents that are not checked trigger the BUG_ON in the __es_tree_search function. To solve this issue, check whether the inode is bad_inode in vfs_setup_quota_inode(). Signed-off-by: Baokun Li Reviewed-by: Chaitanya Kulkarni Reviewed-by: Jason Yan Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221026042310.3839669-2-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/quota/dquot.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 0427b44bfee5..f27faf5db554 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2324,6 +2324,8 @@ static int vfs_setup_quota_inode(struct inode *inode, int type) struct super_block *sb = inode->i_sb; struct quota_info *dqopt = sb_dqopt(sb); + if (is_bad_inode(inode)) + return -EUCLEAN; if (!S_ISREG(inode->i_mode)) return -EACCES; if (IS_RDONLY(inode)) From 5e1d519af52fca8d6073b1f2430d2de95969e8cf Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Thu, 8 Dec 2022 11:34:24 +0800 Subject: [PATCH 170/207] ext4: fix reserved cluster accounting in __es_remove_extent() commit 1da18e38cb97e9521e93d63034521a9649524f64 upstream. When bigalloc is enabled, reserved cluster accounting for delayed allocation is handled in extent_status.c. With a corrupted file system, it's possible for this accounting to be incorrect, dsicovered by Syzbot: EXT4-fs error (device loop0): ext4_validate_block_bitmap:398: comm rep: bg 0: block 5: invalid block bitmap EXT4-fs (loop0): Delayed block allocation failed for inode 18 at logical offset 0 with max blocks 32 with error 28 EXT4-fs (loop0): This should not happen!! Data will be lost EXT4-fs (loop0): Total free blocks count 0 EXT4-fs (loop0): Free/Dirty block details EXT4-fs (loop0): free_blocks=0 EXT4-fs (loop0): dirty_blocks=32 EXT4-fs (loop0): Block reservation details EXT4-fs (loop0): i_reserved_data_blocks=2 EXT4-fs (loop0): Inode 18 (00000000845cd634): i_reserved_data_blocks (1) not cleared! Above issue happens as follows: Assume: sbi->s_cluster_ratio = 16 Step1: Insert delay block [0, 31] -> ei->i_reserved_data_blocks=2 Step2: ext4_writepages mpage_map_and_submit_extent -> return failed mpage_release_unused_pages -> to release [0, 30] ext4_es_remove_extent -> remove lblk=0 end=30 __es_remove_extent -> len1=0 len2=31-30=1 __es_remove_extent: ... if (len2 > 0) { ... if (len1 > 0) { ... } else { es->es_lblk = end + 1; es->es_len = len2; ... } if (count_reserved) count_rsvd(inode, lblk, ...); goto out; -> will return but didn't calculate 'reserved' ... Step3: ext4_destroy_inode -> trigger "i_reserved_data_blocks (1) not cleared!" To solve above issue if 'len2>0' call 'get_rsvd()' before goto out. Reported-by: syzbot+05a0f0ccab4a25626e38@syzkaller.appspotmail.com Fixes: 8fcc3a580651 ("ext4: rework reserved cluster accounting when invalidating pages") Signed-off-by: Ye Bin Reviewed-by: Eric Whitney Link: https://lore.kernel.org/r/20221208033426.1832460-2-yebin@huaweicloud.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents_status.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index cd0a861853e3..7ada374ff27d 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -1371,7 +1371,7 @@ retry: if (count_reserved) count_rsvd(inode, lblk, orig_es.es_len - len1 - len2, &orig_es, &rc); - goto out; + goto out_get_reserved; } if (len1 > 0) { @@ -1413,6 +1413,7 @@ retry: } } +out_get_reserved: if (count_reserved) *reserved = get_rsvd(inode, end, es, &rc); out: From 15adfbb2d161086b508b9ce1f91ce81403f265b1 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 4 Oct 2022 15:58:03 +0200 Subject: [PATCH 171/207] ext4: journal_path mount options should follow links commit e3ea75ee651daf5e434afbfdb7dbf75e200ea1f6 upstream. Before the commit 461c3af045d3 ("ext4: Change handle_mount_opt() to use fs_parameter") ext4 mount option journal_path did follow links in the provided path. Bring this behavior back by allowing to pass pathwalk flags to fs_lookup_param(). Fixes: 461c3af045d3 ("ext4: Change handle_mount_opt() to use fs_parameter") Signed-off-by: Lukas Czerner Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20221004135803.32283-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/mount_api.rst | 1 + fs/ext4/super.c | 2 +- fs/fs_parser.c | 3 ++- include/linux/fs_parser.h | 1 + 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/mount_api.rst b/Documentation/filesystems/mount_api.rst index eb358a00be27..1d16787a00e9 100644 --- a/Documentation/filesystems/mount_api.rst +++ b/Documentation/filesystems/mount_api.rst @@ -814,6 +814,7 @@ process the parameters it is given. int fs_lookup_param(struct fs_context *fc, struct fs_parameter *value, bool want_bdev, + unsigned int flags, struct path *_path); This takes a parameter that carries a string or filename type and attempts diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a83810cd9041..9c70da093146 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2247,7 +2247,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; } - error = fs_lookup_param(fc, param, 1, &path); + error = fs_lookup_param(fc, param, 1, LOOKUP_FOLLOW, &path); if (error) { ext4_msg(NULL, KERN_ERR, "error: could not find " "journal device path"); diff --git a/fs/fs_parser.c b/fs/fs_parser.c index ed40ce5742fd..edb3712dcfa5 100644 --- a/fs/fs_parser.c +++ b/fs/fs_parser.c @@ -138,15 +138,16 @@ EXPORT_SYMBOL(__fs_parse); * @fc: The filesystem context to log errors through. * @param: The parameter. * @want_bdev: T if want a blockdev + * @flags: Pathwalk flags passed to filename_lookup() * @_path: The result of the lookup */ int fs_lookup_param(struct fs_context *fc, struct fs_parameter *param, bool want_bdev, + unsigned int flags, struct path *_path) { struct filename *f; - unsigned int flags = 0; bool put_f; int ret; diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index f103c91139d4..01542c4b87a2 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -76,6 +76,7 @@ static inline int fs_parse(struct fs_context *fc, extern int fs_lookup_param(struct fs_context *fc, struct fs_parameter *param, bool want_bdev, + unsigned int flags, struct path *_path); extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found); From f1ec687ebd1bf146333955b7e209d21508c3ba9f Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 29 Jun 2022 19:26:47 +0800 Subject: [PATCH 172/207] ext4: check and assert if marking an no_delete evicting inode dirty commit 318cdc822c63b6e2befcfdc2088378ae6fa18def upstream. In ext4_evict_inode(), if we evicting an inode in the 'no_delete' path, it cannot be raced by another mark_inode_dirty(). If it happens, someone else may accidentally dirty it without holding inode refcount and probably cause use-after-free issues in the writeback procedure. It's indiscoverable and hard to debug, so add an WARN_ON_ONCE() to check and detect this issue in advance. Suggested-by: Jan Kara Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220629112647.4141034-2-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f53c67909af5..181bc161b1ac 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -335,6 +335,12 @@ stop_handle: ext4_xattr_inode_array_free(ea_inode_array); return; no_delete: + /* + * Check out some where else accidentally dirty the evicting inode, + * which may probably cause inode use-after-free issues later. + */ + WARN_ON_ONCE(!list_empty_careful(&inode->i_io_list)); + if (!list_empty(&EXT4_I(inode)->i_fc_list)) ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL); ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ From a125c8806b7d3c3815b6f9f59d395b9d7527b0ef Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 26 Oct 2022 12:23:10 +0800 Subject: [PATCH 173/207] ext4: fix bug_on in __es_tree_search caused by bad boot loader inode commit 991ed014de0840c5dc405b679168924afb2952ac upstream. We got a issue as fllows: ================================================================== kernel BUG at fs/ext4/extents_status.c:203! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 1 PID: 945 Comm: cat Not tainted 6.0.0-next-20221007-dirty #349 RIP: 0010:ext4_es_end.isra.0+0x34/0x42 RSP: 0018:ffffc9000143b768 EFLAGS: 00010203 RAX: 0000000000000000 RBX: ffff8881769cd0b8 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff8fc27cf7 RDI: 00000000ffffffff RBP: ffff8881769cd0bc R08: 0000000000000000 R09: ffffc9000143b5f8 R10: 0000000000000001 R11: 0000000000000001 R12: ffff8881769cd0a0 R13: ffff8881768e5668 R14: 00000000768e52f0 R15: 0000000000000000 FS: 00007f359f7f05c0(0000)GS:ffff88842fd00000(0000)knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f359f5a2000 CR3: 000000017130c000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __es_tree_search.isra.0+0x6d/0xf5 ext4_es_cache_extent+0xfa/0x230 ext4_cache_extents+0xd2/0x110 ext4_find_extent+0x5d5/0x8c0 ext4_ext_map_blocks+0x9c/0x1d30 ext4_map_blocks+0x431/0xa50 ext4_mpage_readpages+0x48e/0xe40 ext4_readahead+0x47/0x50 read_pages+0x82/0x530 page_cache_ra_unbounded+0x199/0x2a0 do_page_cache_ra+0x47/0x70 page_cache_ra_order+0x242/0x400 ondemand_readahead+0x1e8/0x4b0 page_cache_sync_ra+0xf4/0x110 filemap_get_pages+0x131/0xb20 filemap_read+0xda/0x4b0 generic_file_read_iter+0x13a/0x250 ext4_file_read_iter+0x59/0x1d0 vfs_read+0x28f/0x460 ksys_read+0x73/0x160 __x64_sys_read+0x1e/0x30 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd ================================================================== In the above issue, ioctl invokes the swap_inode_boot_loader function to swap inode<5> and inode<12>. However, inode<5> contain incorrect imode and disordered extents, and i_nlink is set to 1. The extents check for inode in the ext4_iget function can be bypassed bacause 5 is EXT4_BOOT_LOADER_INO. While links_count is set to 1, the extents are not initialized in swap_inode_boot_loader. After the ioctl command is executed successfully, the extents are swapped to inode<12>, in this case, run the `cat` command to view inode<12>. And Bug_ON is triggered due to the incorrect extents. When the boot loader inode is not initialized, its imode can be one of the following: 1) the imode is a bad type, which is marked as bad_inode in ext4_iget and set to S_IFREG. 2) the imode is good type but not S_IFREG. 3) the imode is S_IFREG. The BUG_ON may be triggered by bypassing the check in cases 1 and 2. Therefore, when the boot loader inode is bad_inode or its imode is not S_IFREG, initialize the inode to avoid triggering the BUG. Signed-off-by: Baokun Li Reviewed-by: Jason Yan Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221026042310.3839669-5-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 9ed7b9fe2132..e5f60057db5b 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -425,7 +425,7 @@ static long swap_inode_boot_loader(struct super_block *sb, /* Protect extent tree against block allocations via delalloc */ ext4_double_down_write_data_sem(inode, inode_bl); - if (inode_bl->i_nlink == 0) { + if (is_bad_inode(inode_bl) || !S_ISREG(inode_bl->i_mode)) { /* this inode has never been used as a BOOT_LOADER */ set_nlink(inode_bl, 1); i_uid_write(inode_bl, 0); From bcc5057e1781a3ee889225480d995c3b5cbde555 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 1 Nov 2022 22:33:12 -0700 Subject: [PATCH 174/207] ext4: don't allow journal inode to have encrypt flag commit 105c78e12468413e426625831faa7db4284e1fec upstream. Mounting a filesystem whose journal inode has the encrypt flag causes a NULL dereference in fscrypt_limit_io_blocks() when the 'inlinecrypt' mount option is used. The problem is that when jbd2_journal_init_inode() calls bmap(), it eventually finds its way into ext4_iomap_begin(), which calls fscrypt_limit_io_blocks(). fscrypt_limit_io_blocks() requires that if the inode is encrypted, then its encryption key must already be set up. That's not the case here, since the journal inode is never "opened" like a normal file would be. Hence the crash. A reproducer is: mkfs.ext4 -F /dev/vdb debugfs -w /dev/vdb -R "set_inode_field <8> flags 0x80808" mount /dev/vdb /mnt -o inlinecrypt To fix this, make ext4 consider journal inodes with the encrypt flag to be invalid. (Note, maybe other flags should be rejected on the journal inode too. For now, this is just the minimal fix for the above issue.) I've marked this as fixing the commit that introduced the call to fscrypt_limit_io_blocks(), since that's what made an actual crash start being possible. But this fix could be applied to any version of ext4 that supports the encrypt feature. Reported-by: syzbot+ba9dac45bc76c490b7c3@syzkaller.appspotmail.com Fixes: 38ea50daa7a4 ("ext4: support direct I/O with fscrypt using blk-crypto") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20221102053312.189962-1-ebiggers@kernel.org Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9c70da093146..0acfcc5dbf50 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5724,7 +5724,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb, ext4_debug("Journal inode found at %p: %lld bytes\n", journal_inode, journal_inode->i_size); - if (!S_ISREG(journal_inode->i_mode)) { + if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) { ext4_msg(sb, KERN_ERR, "invalid journal inode"); iput(journal_inode); return NULL; From 08d5c8445d15a4b0692dc23707550ad55ddf5ba3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Nov 2022 14:48:35 -0800 Subject: [PATCH 175/207] ext4: disable fast-commit of encrypted dir operations commit 0fbcb5251fc81b58969b272c4fb7374a7b922e3e upstream. fast-commit of create, link, and unlink operations in encrypted directories is completely broken because the unencrypted filenames are being written to the fast-commit journal instead of the encrypted filenames. These operations can't be replayed, as encryption keys aren't present at journal replay time. It is also an information leak. Until if/when we can get this working properly, make encrypted directory operations ineligible for fast-commit. Note that fast-commit operations on encrypted regular files continue to be allowed, as they seem to work. Fixes: aa75f4d3daae ("ext4: main fast-commit commit path") Cc: # v5.10+ Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20221106224841.279231-2-ebiggers@kernel.org Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/fast_commit.c | 41 ++++++++++++++++++++++--------------- fs/ext4/fast_commit.h | 1 + include/trace/events/ext4.h | 7 +++++-- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 0f6d0a80467d..6d98f2b39b77 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -420,25 +420,34 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) struct __track_dentry_update_args *dentry_update = (struct __track_dentry_update_args *)arg; struct dentry *dentry = dentry_update->dentry; - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct inode *dir = dentry->d_parent->d_inode; + struct super_block *sb = inode->i_sb; + struct ext4_sb_info *sbi = EXT4_SB(sb); mutex_unlock(&ei->i_fc_lock); + + if (IS_ENCRYPTED(dir)) { + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_ENCRYPTED_FILENAME, + NULL); + mutex_lock(&ei->i_fc_lock); + return -EOPNOTSUPP; + } + node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); if (!node) { - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, NULL); mutex_lock(&ei->i_fc_lock); return -ENOMEM; } node->fcd_op = dentry_update->op; - node->fcd_parent = dentry->d_parent->d_inode->i_ino; + node->fcd_parent = dir->i_ino; node->fcd_ino = inode->i_ino; if (dentry->d_name.len > DNAME_INLINE_LEN) { node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS); if (!node->fcd_name.name) { kmem_cache_free(ext4_fc_dentry_cachep, node); - ext4_fc_mark_ineligible(inode->i_sb, - EXT4_FC_REASON_NOMEM, NULL); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, NULL); mutex_lock(&ei->i_fc_lock); return -ENOMEM; } @@ -2249,17 +2258,17 @@ void ext4_fc_init(struct super_block *sb, journal_t *journal) journal->j_fc_cleanup_callback = ext4_fc_cleanup; } -static const char *fc_ineligible_reasons[] = { - "Extended attributes changed", - "Cross rename", - "Journal flag changed", - "Insufficient memory", - "Swap boot", - "Resize", - "Dir renamed", - "Falloc range op", - "Data journalling", - "FC Commit Failed" +static const char * const fc_ineligible_reasons[] = { + [EXT4_FC_REASON_XATTR] = "Extended attributes changed", + [EXT4_FC_REASON_CROSS_RENAME] = "Cross rename", + [EXT4_FC_REASON_JOURNAL_FLAG_CHANGE] = "Journal flag changed", + [EXT4_FC_REASON_NOMEM] = "Insufficient memory", + [EXT4_FC_REASON_SWAP_BOOT] = "Swap boot", + [EXT4_FC_REASON_RESIZE] = "Resize", + [EXT4_FC_REASON_RENAME_DIR] = "Dir renamed", + [EXT4_FC_REASON_FALLOC_RANGE] = "Falloc range op", + [EXT4_FC_REASON_INODE_JOURNAL_DATA] = "Data journalling", + [EXT4_FC_REASON_ENCRYPTED_FILENAME] = "Encrypted filename", }; int ext4_fc_info_show(struct seq_file *seq, void *v) diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h index a6154c3ed135..256f2ad27204 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h @@ -96,6 +96,7 @@ enum { EXT4_FC_REASON_RENAME_DIR, EXT4_FC_REASON_FALLOC_RANGE, EXT4_FC_REASON_INODE_JOURNAL_DATA, + EXT4_FC_REASON_ENCRYPTED_FILENAME, EXT4_FC_REASON_MAX }; diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 229e8fae66a3..ced95fec3367 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -104,6 +104,7 @@ TRACE_DEFINE_ENUM(EXT4_FC_REASON_RESIZE); TRACE_DEFINE_ENUM(EXT4_FC_REASON_RENAME_DIR); TRACE_DEFINE_ENUM(EXT4_FC_REASON_FALLOC_RANGE); TRACE_DEFINE_ENUM(EXT4_FC_REASON_INODE_JOURNAL_DATA); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_ENCRYPTED_FILENAME); TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX); #define show_fc_reason(reason) \ @@ -116,7 +117,8 @@ TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX); { EXT4_FC_REASON_RESIZE, "RESIZE"}, \ { EXT4_FC_REASON_RENAME_DIR, "RENAME_DIR"}, \ { EXT4_FC_REASON_FALLOC_RANGE, "FALLOC_RANGE"}, \ - { EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"}) + { EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"}, \ + { EXT4_FC_REASON_ENCRYPTED_FILENAME, "ENCRYPTED_FILENAME"}) TRACE_EVENT(ext4_other_inode_update_time, TP_PROTO(struct inode *inode, ino_t orig_ino), @@ -2764,7 +2766,7 @@ TRACE_EVENT(ext4_fc_stats, ), TP_printk("dev %d,%d fc ineligible reasons:\n" - "%s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u " + "%s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u" "num_commits:%lu, ineligible: %lu, numblks: %lu", MAJOR(__entry->dev), MINOR(__entry->dev), FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), @@ -2776,6 +2778,7 @@ TRACE_EVENT(ext4_fc_stats, FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), + FC_REASON_NAME_STAT(EXT4_FC_REASON_ENCRYPTED_FILENAME), __entry->fc_commits, __entry->fc_ineligible_commits, __entry->fc_numblks) ); From 7c1fb65e8ce85c281d2cba9c236f9edbbc4eaca6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Nov 2022 14:48:37 -0800 Subject: [PATCH 176/207] ext4: fix leaking uninitialized memory in fast-commit journal commit 594bc43b410316d70bb42aeff168837888d96810 upstream. When space at the end of fast-commit journal blocks is unused, make sure to zero it out so that uninitialized memory is not leaked to disk. Fixes: aa75f4d3daae ("ext4: main fast-commit commit path") Cc: # v5.10+ Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20221106224841.279231-4-ebiggers@kernel.org Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/fast_commit.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 6d98f2b39b77..bec5bc514dcd 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -737,6 +737,9 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) *crc = ext4_chksum(sbi, *crc, tl, EXT4_FC_TAG_BASE_LEN); if (pad_len > 0) ext4_fc_memzero(sb, tl + 1, pad_len, crc); + /* Don't leak uninitialized memory in the unused last byte. */ + *((u8 *)(tl + 1) + pad_len) = 0; + ext4_fc_submit_bh(sb, false); ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); @@ -793,6 +796,8 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc) dst += sizeof(tail.fc_tid); tail.fc_crc = cpu_to_le32(crc); ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL); + dst += sizeof(tail.fc_crc); + memset(dst, 0, bsize - off); /* Don't leak uninitialized memory. */ ext4_fc_submit_bh(sb, true); From 1ba993208bcfd691e241483420a2a761d3f15750 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Nov 2022 14:48:36 -0800 Subject: [PATCH 177/207] ext4: don't set up encryption key during jbd2 transaction commit 4c0d5778385cb3618ff26a561ce41de2b7d9de70 upstream. Commit a80f7fcf1867 ("ext4: fixup ext4_fc_track_* functions' signature") extended the scope of the transaction in ext4_unlink() too far, making it include the call to ext4_find_entry(). However, ext4_find_entry() can deadlock when called from within a transaction because it may need to set up the directory's encryption key. Fix this by restoring the transaction to its original scope. Reported-by: syzbot+1a748d0007eeac3ab079@syzkaller.appspotmail.com Fixes: a80f7fcf1867 ("ext4: fixup ext4_fc_track_* functions' signature") Cc: # v5.10+ Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20221106224841.279231-3-ebiggers@kernel.org Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 4 ++-- fs/ext4/fast_commit.c | 2 +- fs/ext4/namei.c | 44 +++++++++++++++++++++++-------------------- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3afdd99bb214..4e739902dc03 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3620,8 +3620,8 @@ extern void ext4_initialize_dirent_tail(struct buffer_head *bh, unsigned int blocksize); extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode, struct buffer_head *bh); -extern int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name, - struct inode *inode); +extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name, + struct inode *inode, struct dentry *dentry); extern int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry); diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index bec5bc514dcd..1e8be0554239 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1402,7 +1402,7 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl, return 0; } - ret = __ext4_unlink(NULL, old_parent, &entry, inode); + ret = __ext4_unlink(old_parent, &entry, inode, NULL); /* -ENOENT ok coz it might not exist anymore. */ if (ret == -ENOENT) ret = 0; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index c08c0aba1883..a789ea9b61a0 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3204,14 +3204,20 @@ end_rmdir: return retval; } -int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name, - struct inode *inode) +int __ext4_unlink(struct inode *dir, const struct qstr *d_name, + struct inode *inode, + struct dentry *dentry /* NULL during fast_commit recovery */) { int retval = -ENOENT; struct buffer_head *bh; struct ext4_dir_entry_2 *de; + handle_t *handle; int skip_remove_dentry = 0; + /* + * Keep this outside the transaction; it may have to set up the + * directory's encryption key, which isn't GFP_NOFS-safe. + */ bh = ext4_find_entry(dir, d_name, &de, NULL); if (IS_ERR(bh)) return PTR_ERR(bh); @@ -3228,7 +3234,14 @@ int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) skip_remove_dentry = 1; else - goto out; + goto out_bh; + } + + handle = ext4_journal_start(dir, EXT4_HT_DIR, + EXT4_DATA_TRANS_BLOCKS(dir->i_sb)); + if (IS_ERR(handle)) { + retval = PTR_ERR(handle); + goto out_bh; } if (IS_DIRSYNC(dir)) @@ -3237,12 +3250,12 @@ int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name if (!skip_remove_dentry) { retval = ext4_delete_entry(handle, dir, de, bh); if (retval) - goto out; + goto out_handle; dir->i_ctime = dir->i_mtime = current_time(dir); ext4_update_dx_flag(dir); retval = ext4_mark_inode_dirty(handle, dir); if (retval) - goto out; + goto out_handle; } else { retval = 0; } @@ -3255,15 +3268,17 @@ int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name ext4_orphan_add(handle, inode); inode->i_ctime = current_time(inode); retval = ext4_mark_inode_dirty(handle, inode); - -out: + if (dentry && !retval) + ext4_fc_track_unlink(handle, dentry); +out_handle: + ext4_journal_stop(handle); +out_bh: brelse(bh); return retval; } static int ext4_unlink(struct inode *dir, struct dentry *dentry) { - handle_t *handle; int retval; if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) @@ -3281,16 +3296,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) if (retval) goto out_trace; - handle = ext4_journal_start(dir, EXT4_HT_DIR, - EXT4_DATA_TRANS_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) { - retval = PTR_ERR(handle); - goto out_trace; - } - - retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry)); - if (!retval) - ext4_fc_track_unlink(handle, dentry); + retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry), dentry); #if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid @@ -3301,8 +3307,6 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) if (IS_CASEFOLDED(dir)) d_invalidate(dentry); #endif - if (handle) - ext4_journal_stop(handle); out_trace: trace_ext4_unlink_exit(dentry, retval); From d1c97077fa603cc52ca8746a46ab13991f5eb99c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Nov 2022 14:48:38 -0800 Subject: [PATCH 178/207] ext4: add missing validation of fast-commit record lengths commit 64b4a25c3de81a69724e888ec2db3533b43816e2 upstream. Validate the inode and filename lengths in fast-commit journal records so that a malicious fast-commit journal cannot cause a crash by having invalid values for these. Also validate EXT4_FC_TAG_DEL_RANGE. Fixes: aa75f4d3daae ("ext4: main fast-commit commit path") Cc: # v5.10+ Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20221106224841.279231-5-ebiggers@kernel.org Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/fast_commit.c | 38 +++++++++++++++++++------------------- fs/ext4/fast_commit.h | 2 +- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 1e8be0554239..d5ad4b2b235d 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1991,32 +1991,31 @@ void ext4_fc_replay_cleanup(struct super_block *sb) kfree(sbi->s_fc_replay_state.fc_modified_inodes); } -static inline bool ext4_fc_tag_len_isvalid(struct ext4_fc_tl *tl, - u8 *val, u8 *end) +static bool ext4_fc_value_len_isvalid(struct ext4_sb_info *sbi, + int tag, int len) { - if (val + tl->fc_len > end) - return false; - - /* Here only check ADD_RANGE/TAIL/HEAD which will read data when do - * journal rescan before do CRC check. Other tags length check will - * rely on CRC check. - */ - switch (tl->fc_tag) { + switch (tag) { case EXT4_FC_TAG_ADD_RANGE: - return (sizeof(struct ext4_fc_add_range) == tl->fc_len); - case EXT4_FC_TAG_TAIL: - return (sizeof(struct ext4_fc_tail) <= tl->fc_len); - case EXT4_FC_TAG_HEAD: - return (sizeof(struct ext4_fc_head) == tl->fc_len); + return len == sizeof(struct ext4_fc_add_range); case EXT4_FC_TAG_DEL_RANGE: + return len == sizeof(struct ext4_fc_del_range); + case EXT4_FC_TAG_CREAT: case EXT4_FC_TAG_LINK: case EXT4_FC_TAG_UNLINK: - case EXT4_FC_TAG_CREAT: + len -= sizeof(struct ext4_fc_dentry_info); + return len >= 1 && len <= EXT4_NAME_LEN; case EXT4_FC_TAG_INODE: + len -= sizeof(struct ext4_fc_inode); + return len >= EXT4_GOOD_OLD_INODE_SIZE && + len <= sbi->s_inode_size; case EXT4_FC_TAG_PAD: - default: - return true; + return true; /* padding can have any length */ + case EXT4_FC_TAG_TAIL: + return len >= sizeof(struct ext4_fc_tail); + case EXT4_FC_TAG_HEAD: + return len == sizeof(struct ext4_fc_head); } + return false; } /* @@ -2079,7 +2078,8 @@ static int ext4_fc_replay_scan(journal_t *journal, cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { ext4_fc_get_tl(&tl, cur); val = cur + EXT4_FC_TAG_BASE_LEN; - if (!ext4_fc_tag_len_isvalid(&tl, val, end)) { + if (tl.fc_len > end - val || + !ext4_fc_value_len_isvalid(sbi, tl.fc_tag, tl.fc_len)) { ret = state->fc_replay_num_tags ? JBD2_FC_REPLAY_STOP : -ECANCELED; goto out_err; diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h index 256f2ad27204..2fadb2c4780c 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h @@ -58,7 +58,7 @@ struct ext4_fc_dentry_info { __u8 fc_dname[]; }; -/* Value structure for EXT4_FC_TAG_INODE and EXT4_FC_TAG_INODE_PARTIAL. */ +/* Value structure for EXT4_FC_TAG_INODE. */ struct ext4_fc_inode { __le32 fc_ino; __u8 fc_raw_inode[]; From 18e66ed75d87bcf82af94bf887174fe77261f3b8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Nov 2022 14:48:39 -0800 Subject: [PATCH 179/207] ext4: fix unaligned memory access in ext4_fc_reserve_space() commit 8415ce07ecf0cc25efdd5db264a7133716e503cf upstream. As is done elsewhere in the file, build the struct ext4_fc_tl on the stack and memcpy() it into the buffer, rather than directly writing it to a potentially-unaligned location in the buffer. Fixes: aa75f4d3daae ("ext4: main fast-commit commit path") Cc: # v5.10+ Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20221106224841.279231-6-ebiggers@kernel.org Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/fast_commit.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index d5ad4b2b235d..892fa7c7a768 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -675,6 +675,15 @@ static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) /* Ext4 commit path routines */ +/* memcpy to fc reserved space and update CRC */ +static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src, + int len, u32 *crc) +{ + if (crc) + *crc = ext4_chksum(EXT4_SB(sb), *crc, src, len); + return memcpy(dst, src, len); +} + /* memzero and update CRC */ static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len, u32 *crc) @@ -700,12 +709,13 @@ static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len, */ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) { - struct ext4_fc_tl *tl; + struct ext4_fc_tl tl; struct ext4_sb_info *sbi = EXT4_SB(sb); struct buffer_head *bh; int bsize = sbi->s_journal->j_blocksize; int ret, off = sbi->s_fc_bytes % bsize; int pad_len; + u8 *dst; /* * After allocating len, we should have space at least for a 0 byte @@ -729,16 +739,18 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) return sbi->s_fc_bh->b_data + off; } /* Need to add PAD tag */ - tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off); - tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); + dst = sbi->s_fc_bh->b_data + off; + tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); pad_len = bsize - off - 1 - EXT4_FC_TAG_BASE_LEN; - tl->fc_len = cpu_to_le16(pad_len); - if (crc) - *crc = ext4_chksum(sbi, *crc, tl, EXT4_FC_TAG_BASE_LEN); - if (pad_len > 0) - ext4_fc_memzero(sb, tl + 1, pad_len, crc); + tl.fc_len = cpu_to_le16(pad_len); + ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc); + dst += EXT4_FC_TAG_BASE_LEN; + if (pad_len > 0) { + ext4_fc_memzero(sb, dst, pad_len, crc); + dst += pad_len; + } /* Don't leak uninitialized memory in the unused last byte. */ - *((u8 *)(tl + 1) + pad_len) = 0; + *dst = 0; ext4_fc_submit_bh(sb, false); @@ -750,15 +762,6 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) return sbi->s_fc_bh->b_data; } -/* memcpy to fc reserved space and update CRC */ -static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src, - int len, u32 *crc) -{ - if (crc) - *crc = ext4_chksum(EXT4_SB(sb), *crc, src, len); - return memcpy(dst, src, len); -} - /* * Complete a fast commit by writing tail tag. * From 5439ad45c0d0c8db41eb6f4dce6f778f15a5ee16 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 6 Nov 2022 14:48:40 -0800 Subject: [PATCH 180/207] ext4: fix off-by-one errors in fast-commit block filling commit 48a6a66db82b8043d298a630f22c62d43550cae5 upstream. Due to several different off-by-one errors, or perhaps due to a late change in design that wasn't fully reflected in the code that was actually merged, there are several very strange constraints on how fast-commit blocks are filled with tlv entries: - tlvs must start at least 10 bytes before the end of the block, even though the minimum tlv length is 8. Otherwise, the replay code will ignore them. (BUG: ext4_fc_reserve_space() could violate this requirement if called with a len of blocksize - 9 or blocksize - 8. Fortunately, this doesn't seem to happen currently.) - tlvs must end at least 1 byte before the end of the block. Otherwise the replay code will consider them to be invalid. This quirk contributed to a bug (fixed by an earlier commit) where uninitialized memory was being leaked to disk in the last byte of blocks. Also, strangely these constraints don't apply to the replay code in e2fsprogs, which will accept any tlvs in the blocks (with no bounds checks at all, but that is a separate issue...). Given that this all seems to be a bug, let's fix it by just filling blocks with tlv entries in the natural way. Note that old kernels will be unable to replay fast-commit journals created by kernels that have this commit. Fixes: aa75f4d3daae ("ext4: main fast-commit commit path") Cc: # v5.10+ Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20221106224841.279231-7-ebiggers@kernel.org Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/fast_commit.c | 68 +++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 892fa7c7a768..7ed71c652f67 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -714,43 +714,43 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) struct buffer_head *bh; int bsize = sbi->s_journal->j_blocksize; int ret, off = sbi->s_fc_bytes % bsize; - int pad_len; + int remaining; u8 *dst; /* - * After allocating len, we should have space at least for a 0 byte - * padding. + * If 'len' is too long to fit in any block alongside a PAD tlv, then we + * cannot fulfill the request. */ - if (len + EXT4_FC_TAG_BASE_LEN > bsize) + if (len > bsize - EXT4_FC_TAG_BASE_LEN) return NULL; - if (bsize - off - 1 > len + EXT4_FC_TAG_BASE_LEN) { - /* - * Only allocate from current buffer if we have enough space for - * this request AND we have space to add a zero byte padding. - */ - if (!sbi->s_fc_bh) { - ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); - if (ret) - return NULL; - sbi->s_fc_bh = bh; - } - sbi->s_fc_bytes += len; - return sbi->s_fc_bh->b_data + off; + if (!sbi->s_fc_bh) { + ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); + if (ret) + return NULL; + sbi->s_fc_bh = bh; } - /* Need to add PAD tag */ dst = sbi->s_fc_bh->b_data + off; - tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); - pad_len = bsize - off - 1 - EXT4_FC_TAG_BASE_LEN; - tl.fc_len = cpu_to_le16(pad_len); - ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc); - dst += EXT4_FC_TAG_BASE_LEN; - if (pad_len > 0) { - ext4_fc_memzero(sb, dst, pad_len, crc); - dst += pad_len; + + /* + * Allocate the bytes in the current block if we can do so while still + * leaving enough space for a PAD tlv. + */ + remaining = bsize - EXT4_FC_TAG_BASE_LEN - off; + if (len <= remaining) { + sbi->s_fc_bytes += len; + return dst; } - /* Don't leak uninitialized memory in the unused last byte. */ - *dst = 0; + + /* + * Else, terminate the current block with a PAD tlv, then allocate a new + * block and allocate the bytes at the start of that new block. + */ + + tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); + tl.fc_len = cpu_to_le16(remaining); + ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc); + ext4_fc_memzero(sb, dst + EXT4_FC_TAG_BASE_LEN, remaining, crc); ext4_fc_submit_bh(sb, false); @@ -758,7 +758,7 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) if (ret) return NULL; sbi->s_fc_bh = bh; - sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len; + sbi->s_fc_bytes += bsize - off + len; return sbi->s_fc_bh->b_data; } @@ -789,7 +789,7 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc) off = sbi->s_fc_bytes % bsize; tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL); - tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail)); + tl.fc_len = cpu_to_le16(bsize - off + sizeof(struct ext4_fc_tail)); sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize); ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, &crc); @@ -2056,7 +2056,7 @@ static int ext4_fc_replay_scan(journal_t *journal, state = &sbi->s_fc_replay_state; start = (u8 *)bh->b_data; - end = (__u8 *)bh->b_data + journal->j_blocksize - 1; + end = start + journal->j_blocksize; if (state->fc_replay_expected_off == 0) { state->fc_cur_tag = 0; @@ -2077,7 +2077,7 @@ static int ext4_fc_replay_scan(journal_t *journal, } state->fc_replay_expected_off++; - for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN; + for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN; cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { ext4_fc_get_tl(&tl, cur); val = cur + EXT4_FC_TAG_BASE_LEN; @@ -2195,9 +2195,9 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, #endif start = (u8 *)bh->b_data; - end = (__u8 *)bh->b_data + journal->j_blocksize - 1; + end = start + journal->j_blocksize; - for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN; + for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN; cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { ext4_fc_get_tl(&tl, cur); val = cur + EXT4_FC_TAG_BASE_LEN; From 9f966e021c20caae639dd0e404c8761e8281a2c4 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Thu, 17 Nov 2022 15:36:03 +0800 Subject: [PATCH 181/207] ext4: fix uninititialized value in 'ext4_evict_inode' commit 7ea71af94eaaaf6d9aed24bc94a05b977a741cb9 upstream. Syzbot found the following issue: ===================================================== BUG: KMSAN: uninit-value in ext4_evict_inode+0xdd/0x26b0 fs/ext4/inode.c:180 ext4_evict_inode+0xdd/0x26b0 fs/ext4/inode.c:180 evict+0x365/0x9a0 fs/inode.c:664 iput_final fs/inode.c:1747 [inline] iput+0x985/0xdd0 fs/inode.c:1773 __ext4_new_inode+0xe54/0x7ec0 fs/ext4/ialloc.c:1361 ext4_mknod+0x376/0x840 fs/ext4/namei.c:2844 vfs_mknod+0x79d/0x830 fs/namei.c:3914 do_mknodat+0x47d/0xaa0 __do_sys_mknodat fs/namei.c:3992 [inline] __se_sys_mknodat fs/namei.c:3989 [inline] __ia32_sys_mknodat+0xeb/0x150 fs/namei.c:3989 do_syscall_32_irqs_on arch/x86/entry/common.c:112 [inline] __do_fast_syscall_32+0xa2/0x100 arch/x86/entry/common.c:178 do_fast_syscall_32+0x33/0x70 arch/x86/entry/common.c:203 do_SYSENTER_32+0x1b/0x20 arch/x86/entry/common.c:246 entry_SYSENTER_compat_after_hwframe+0x70/0x82 Uninit was created at: __alloc_pages+0x9f1/0xe80 mm/page_alloc.c:5578 alloc_pages+0xaae/0xd80 mm/mempolicy.c:2285 alloc_slab_page mm/slub.c:1794 [inline] allocate_slab+0x1b5/0x1010 mm/slub.c:1939 new_slab mm/slub.c:1992 [inline] ___slab_alloc+0x10c3/0x2d60 mm/slub.c:3180 __slab_alloc mm/slub.c:3279 [inline] slab_alloc_node mm/slub.c:3364 [inline] slab_alloc mm/slub.c:3406 [inline] __kmem_cache_alloc_lru mm/slub.c:3413 [inline] kmem_cache_alloc_lru+0x6f3/0xb30 mm/slub.c:3429 alloc_inode_sb include/linux/fs.h:3117 [inline] ext4_alloc_inode+0x5f/0x860 fs/ext4/super.c:1321 alloc_inode+0x83/0x440 fs/inode.c:259 new_inode_pseudo fs/inode.c:1018 [inline] new_inode+0x3b/0x430 fs/inode.c:1046 __ext4_new_inode+0x2a7/0x7ec0 fs/ext4/ialloc.c:959 ext4_mkdir+0x4d5/0x1560 fs/ext4/namei.c:2992 vfs_mkdir+0x62a/0x870 fs/namei.c:4035 do_mkdirat+0x466/0x7b0 fs/namei.c:4060 __do_sys_mkdirat fs/namei.c:4075 [inline] __se_sys_mkdirat fs/namei.c:4073 [inline] __ia32_sys_mkdirat+0xc4/0x120 fs/namei.c:4073 do_syscall_32_irqs_on arch/x86/entry/common.c:112 [inline] __do_fast_syscall_32+0xa2/0x100 arch/x86/entry/common.c:178 do_fast_syscall_32+0x33/0x70 arch/x86/entry/common.c:203 do_SYSENTER_32+0x1b/0x20 arch/x86/entry/common.c:246 entry_SYSENTER_compat_after_hwframe+0x70/0x82 CPU: 1 PID: 4625 Comm: syz-executor.2 Not tainted 6.1.0-rc4-syzkaller-62821-gcb231e2f67ec #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 ===================================================== Now, 'ext4_alloc_inode()' didn't init 'ei->i_flags'. If new inode failed before set 'ei->i_flags' in '__ext4_new_inode()', then do 'iput()'. As after 6bc0d63dad7f commit will access 'ei->i_flags' in 'ext4_evict_inode()' which will lead to access uninit-value. To solve above issue just init 'ei->i_flags' in 'ext4_alloc_inode()'. Reported-by: syzbot+57b25da729eb0b88177d@syzkaller.appspotmail.com Signed-off-by: Ye Bin Fixes: 6bc0d63dad7f ("ext4: remove EA inode entry from mbcache on inode eviction") Reviewed-by: Jan Kara Reviewed-by: Eric Biggers Link: https://lore.kernel.org/r/20221117073603.2598882-1-yebin@huaweicloud.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0acfcc5dbf50..aa4f65663fad 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1323,6 +1323,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) return NULL; inode_set_iversion(&ei->vfs_inode, 1); + ei->i_flags = 0; spin_lock_init(&ei->i_raw_lock); INIT_LIST_HEAD(&ei->i_prealloc_list); atomic_set(&ei->i_prealloc_active, 0); From 13271fbbe85d73a7c47058f56a52f2a7f00d6e39 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Mon, 7 Nov 2022 09:53:35 +0800 Subject: [PATCH 182/207] ext4: init quota for 'old.inode' in 'ext4_rename' commit fae381a3d79bb94aa2eb752170d47458d778b797 upstream. Syzbot found the following issue: ext4_parse_param: s_want_extra_isize=128 ext4_inode_info_init: s_want_extra_isize=32 ext4_rename: old.inode=ffff88823869a2c8 old.dir=ffff888238699828 new.inode=ffff88823869d7e8 new.dir=ffff888238699828 __ext4_mark_inode_dirty: inode=ffff888238699828 ea_isize=32 want_ea_size=128 __ext4_mark_inode_dirty: inode=ffff88823869a2c8 ea_isize=32 want_ea_size=128 ext4_xattr_block_set: inode=ffff88823869a2c8 ------------[ cut here ]------------ WARNING: CPU: 13 PID: 2234 at fs/ext4/xattr.c:2070 ext4_xattr_block_set.cold+0x22/0x980 Modules linked in: RIP: 0010:ext4_xattr_block_set.cold+0x22/0x980 RSP: 0018:ffff888227d3f3b0 EFLAGS: 00010202 RAX: 0000000000000001 RBX: ffff88823007a000 RCX: 0000000000000000 RDX: 0000000000000a03 RSI: 0000000000000040 RDI: ffff888230078178 RBP: 0000000000000000 R08: 000000000000002c R09: ffffed1075c7df8e R10: ffff8883ae3efc6b R11: ffffed1075c7df8d R12: 0000000000000000 R13: ffff88823869a2c8 R14: ffff8881012e0460 R15: dffffc0000000000 FS: 00007f350ac1f740(0000) GS:ffff8883ae200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f350a6ed6a0 CR3: 0000000237456000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? ext4_xattr_set_entry+0x3b7/0x2320 ? ext4_xattr_block_set+0x0/0x2020 ? ext4_xattr_set_entry+0x0/0x2320 ? ext4_xattr_check_entries+0x77/0x310 ? ext4_xattr_ibody_set+0x23b/0x340 ext4_xattr_move_to_block+0x594/0x720 ext4_expand_extra_isize_ea+0x59a/0x10f0 __ext4_expand_extra_isize+0x278/0x3f0 __ext4_mark_inode_dirty.cold+0x347/0x410 ext4_rename+0xed3/0x174f vfs_rename+0x13a7/0x2510 do_renameat2+0x55d/0x920 __x64_sys_rename+0x7d/0xb0 do_syscall_64+0x3b/0xa0 entry_SYSCALL_64_after_hwframe+0x72/0xdc As 'ext4_rename' will modify 'old.inode' ctime and mark inode dirty, which may trigger expand 'extra_isize' and allocate block. If inode didn't init quota will lead to warning. To solve above issue, init 'old.inode' firstly in 'ext4_rename'. Reported-by: syzbot+98346927678ac3059c77@syzkaller.appspotmail.com Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221107015335.2524319-1-yebin@huaweicloud.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a789ea9b61a0..1c5518a4bdf9 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3796,6 +3796,9 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, return -EXDEV; retval = dquot_initialize(old.dir); + if (retval) + return retval; + retval = dquot_initialize(old.inode); if (retval) return retval; retval = dquot_initialize(new.dir); From b753b0be45ae3eb4a37a6a9efb69a3ba486b81ea Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 10 Nov 2022 12:16:34 -0800 Subject: [PATCH 183/207] ext4: don't fail GETFSUUID when the caller provides a long buffer commit a7e9d977e031fceefe1e7cd69ebd7202d5758b56 upstream. If userspace provides a longer UUID buffer than is required, we shouldn't fail the call with EINVAL -- rather, we can fill the caller's buffer with the bytes we /can/ fill, and update the length field to reflect what we copied. This doesn't break the UAPI since we're enabling a case that currently fails, and so far Ted hasn't released a version of e2fsprogs that uses the new ext4 ioctl. Signed-off-by: Darrick J. Wong Reviewed-by: Catherine Hoang Link: https://lore.kernel.org/r/166811139478.327006.13879198441587445544.stgit@magnolia Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ioctl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index e5f60057db5b..5a12f87df45e 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1159,14 +1159,16 @@ static int ext4_ioctl_getuuid(struct ext4_sb_info *sbi, return -EINVAL; } - if (fsuuid.fsu_len != UUID_SIZE || fsuuid.fsu_flags != 0) + if (fsuuid.fsu_len < UUID_SIZE || fsuuid.fsu_flags != 0) return -EINVAL; lock_buffer(sbi->s_sbh); memcpy(uuid, sbi->s_es->s_uuid, UUID_SIZE); unlock_buffer(sbi->s_sbh); - if (copy_to_user(&ufsuuid->fsu_uuid[0], uuid, UUID_SIZE)) + fsuuid.fsu_len = UUID_SIZE; + if (copy_to_user(ufsuuid, &fsuuid, sizeof(fsuuid)) || + copy_to_user(&ufsuuid->fsu_uuid[0], uuid, UUID_SIZE)) return -EFAULT; return 0; } From 81b915181c630ee1cffa052e52874fe4e1ba91ac Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Thu, 17 Nov 2022 10:22:07 -0500 Subject: [PATCH 184/207] ext4: fix delayed allocation bug in ext4_clu_mapped for bigalloc + inline commit 131294c35ed6f777bd4e79d42af13b5c41bf2775 upstream. When converting files with inline data to extents, delayed allocations made on a file system created with both the bigalloc and inline options can result in invalid extent status cache content, incorrect reserved cluster counts, kernel memory leaks, and potential kernel panics. With bigalloc, the code that determines whether a block must be delayed allocated searches the extent tree to see if that block maps to a previously allocated cluster. If not, the block is delayed allocated, and otherwise, it isn't. However, if the inline option is also used, and if the file containing the block is marked as able to store data inline, there isn't a valid extent tree associated with the file. The current code in ext4_clu_mapped() calls ext4_find_extent() to search the non-existent tree for a previously allocated cluster anyway, which typically finds nothing, as desired. However, a side effect of the search can be to cache invalid content from the non-existent tree (garbage) in the extent status tree, including bogus entries in the pending reservation tree. To fix this, avoid searching the extent tree when allocating blocks for bigalloc + inline files that are being converted from inline to extent mapped. Signed-off-by: Eric Whitney Link: https://lore.kernel.org/r/20221117152207.2424-1-enwlinux@gmail.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 6c399a8b22b3..36225ef56b0c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5799,6 +5799,14 @@ int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu) struct ext4_extent *extent; ext4_lblk_t first_lblk, first_lclu, last_lclu; + /* + * if data can be stored inline, the logical cluster isn't + * mapped - no physical clusters have been allocated, and the + * file has no extents + */ + if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) + return 0; + /* search for the extent closest to the first block in the cluster */ path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0); if (IS_ERR(path)) { From c99932c83074ef9320cac48aab085adb1a1144c3 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 17 Nov 2022 12:03:41 +0800 Subject: [PATCH 185/207] ext4: fix corruption when online resizing a 1K bigalloc fs commit 0aeaa2559d6d53358fca3e3fce73807367adca74 upstream. When a backup superblock is updated in update_backups(), the primary superblock's offset in the group (that is, sbi->s_sbh->b_blocknr) is used as the backup superblock's offset in its group. However, when the block size is 1K and bigalloc is enabled, the two offsets are not equal. This causes the backup group descriptors to be overwritten by the superblock in update_backups(). Moreover, if meta_bg is enabled, the file system will be corrupted because this feature uses backup group descriptors. To solve this issue, we use a more accurate ext4_group_first_block_no() as the offset of the backup superblock in its group. Fixes: d77147ff443b ("ext4: add support for online resizing with bigalloc") Signed-off-by: Baokun Li Reviewed-by: Jan Kara Cc: stable@kernel.org Link: https://lore.kernel.org/r/20221117040341.1380702-4-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 46b87ffeb304..5db99cf73ba7 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1596,8 +1596,8 @@ exit_journal: int meta_bg = ext4_has_feature_meta_bg(sb); sector_t old_gdb = 0; - update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, - sizeof(struct ext4_super_block), 0); + update_backups(sb, ext4_group_first_block_no(sb, 0), + (char *)es, sizeof(struct ext4_super_block), 0); for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; @@ -1808,7 +1808,7 @@ errout: if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: extended group to %llu " "blocks\n", ext4_blocks_count(es)); - update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, + update_backups(sb, ext4_group_first_block_no(sb, 0), (char *)es, sizeof(struct ext4_super_block), 0); } return err; From dad6a26adb5ae452cc771f937b268cd6bf1de778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= Date: Wed, 9 Nov 2022 18:14:45 +0000 Subject: [PATCH 186/207] ext4: fix error code return to user-space in ext4_get_branch() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 26d75a16af285a70863ba6a81f85d81e7e65da50 upstream. If a block is out of range in ext4_get_branch(), -ENOMEM will be returned to user-space. Obviously, this error code isn't really useful. This patch fixes it by making sure the right error code (-EFSCORRUPTED) is propagated to user-space. EUCLEAN is more informative than ENOMEM. Signed-off-by: Luís Henriques Link: https://lore.kernel.org/r/20221109181445.17843-1-lhenriques@suse.de Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/indirect.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 860fc5119009..c68bebe7ff4b 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -148,6 +148,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, struct super_block *sb = inode->i_sb; Indirect *p = chain; struct buffer_head *bh; + unsigned int key; int ret = -EIO; *err = 0; @@ -156,7 +157,13 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, if (!p->key) goto no_block; while (--depth) { - bh = sb_getblk(sb, le32_to_cpu(p->key)); + key = le32_to_cpu(p->key); + if (key > ext4_blocks_count(EXT4_SB(sb)->s_es)) { + /* the block was out of range */ + ret = -EFSCORRUPTED; + goto failure; + } + bh = sb_getblk(sb, key); if (unlikely(!bh)) { ret = -ENOMEM; goto failure; From a77f3bdb03d0b2a8f4a3838a3120a6e56170a8bc Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 17 Nov 2022 12:03:39 +0800 Subject: [PATCH 187/207] ext4: fix bad checksum after online resize commit a408f33e895e455f16cf964cb5cd4979b658db7b upstream. When online resizing is performed twice consecutively, the error message "Superblock checksum does not match superblock" is displayed for the second time. Here's the reproducer: mkfs.ext4 -F /dev/sdb 100M mount /dev/sdb /tmp/test resize2fs /dev/sdb 5G resize2fs /dev/sdb 6G To solve this issue, we moved the update of the checksum after the es->s_overhead_clusters is updated. Fixes: 026d0d27c488 ("ext4: reduce computation of overhead during resize") Fixes: de394a86658f ("ext4: update s_overhead_clusters in the superblock during an on-line resize") Signed-off-by: Baokun Li Reviewed-by: Darrick J. Wong Reviewed-by: Jan Kara Cc: stable@kernel.org Link: https://lore.kernel.org/r/20221117040341.1380702-2-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 5db99cf73ba7..3c1a74ae68b8 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1476,8 +1476,6 @@ static void ext4_update_super(struct super_block *sb, * active. */ ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + reserved_blocks); - ext4_superblock_csum_set(sb); - unlock_buffer(sbi->s_sbh); /* Update the free space counts */ percpu_counter_add(&sbi->s_freeclusters_counter, @@ -1513,6 +1511,8 @@ static void ext4_update_super(struct super_block *sb, ext4_calculate_overhead(sb); es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: added group %u:" "%llu blocks(%llu free %llu reserved)\n", flex_gd->count, From 627dd452889d5211342accb3cb7a4625f865898b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 10 Nov 2022 12:16:29 -0800 Subject: [PATCH 188/207] ext4: dont return EINVAL from GETFSUUID when reporting UUID length commit b76abb5157468756163fe7e3431c9fe32cba57ca upstream. If userspace calls this ioctl with fsu_length (the length of the fsuuid.fsu_uuid array) set to zero, ext4 copies the desired uuid length out to userspace. The kernel call returned a result from a valid input, so the return value here should be zero, not EINVAL. While we're at it, fix the copy_to_user call to make it clear that we're only copying out fsu_len. Signed-off-by: Darrick J. Wong Reviewed-by: Catherine Hoang Link: https://lore.kernel.org/r/166811138914.327006.9241306894437166566.stgit@magnolia Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 5a12f87df45e..202953b5db49 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1154,9 +1154,10 @@ static int ext4_ioctl_getuuid(struct ext4_sb_info *sbi, if (fsuuid.fsu_len == 0) { fsuuid.fsu_len = UUID_SIZE; - if (copy_to_user(ufsuuid, &fsuuid, sizeof(fsuuid.fsu_len))) + if (copy_to_user(&ufsuuid->fsu_len, &fsuuid.fsu_len, + sizeof(fsuuid.fsu_len))) return -EFAULT; - return -EINVAL; + return 0; } if (fsuuid.fsu_len < UUID_SIZE || fsuuid.fsu_flags != 0) From 1008bbaadca6559f6f67fd9cb2adba80b0a125ab Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 17 Nov 2022 12:03:40 +0800 Subject: [PATCH 189/207] ext4: fix corrupt backup group descriptors after online resize commit 8f49ec603ae3e213bfab2799182724e3abac55a1 upstream. In commit 9a8c5b0d0615 ("ext4: update the backup superblock's at the end of the online resize"), it is assumed that update_backups() only updates backup superblocks, so each b_data is treated as a backupsuper block to update its s_block_group_nr and s_checksum. However, update_backups() also updates the backup group descriptors, which causes the backup group descriptors to be corrupted. The above commit fixes the problem of invalid checksum of the backup superblock. The root cause of this problem is that the checksum of ext4_update_super() is not set correctly. This problem has been fixed in the previous patch ("ext4: fix bad checksum after online resize"). However, we do need to set block_group_nr for the backup superblock in update_backups(). When a block is in a group that contains a backup superblock, and the block is the first block in the group, the block is definitely a superblock. We add a helper function that includes setting s_block_group_nr and updating checksum, and then call it only when the above conditions are met to prevent the backup group descriptors from being incorrectly modified. Fixes: 9a8c5b0d0615 ("ext4: update the backup superblock's at the end of the online resize") Signed-off-by: Baokun Li Reviewed-by: Jan Kara Cc: stable@kernel.org Link: https://lore.kernel.org/r/20221117040341.1380702-3-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3c1a74ae68b8..b493233750ab 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1110,6 +1110,16 @@ exit_free: return err; } +static inline void ext4_set_block_group_nr(struct super_block *sb, char *data, + ext4_group_t group) +{ + struct ext4_super_block *es = (struct ext4_super_block *) data; + + es->s_block_group_nr = cpu_to_le16(group); + if (ext4_has_metadata_csum(sb)) + es->s_checksum = ext4_superblock_csum(sb, es); +} + /* * Update the backup copies of the ext4 metadata. These don't need to be part * of the main resize transaction, because e2fsck will re-write them if there @@ -1158,7 +1168,8 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data, while (group < sbi->s_groups_count) { struct buffer_head *bh; ext4_fsblk_t backup_block; - struct ext4_super_block *es; + int has_super = ext4_bg_has_super(sb, group); + ext4_fsblk_t first_block = ext4_group_first_block_no(sb, group); /* Out of journal space, and can't get more - abort - so sad */ err = ext4_resize_ensure_credits_batch(handle, 1); @@ -1168,8 +1179,7 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data, if (meta_bg == 0) backup_block = ((ext4_fsblk_t)group) * bpg + blk_off; else - backup_block = (ext4_group_first_block_no(sb, group) + - ext4_bg_has_super(sb, group)); + backup_block = first_block + has_super; bh = sb_getblk(sb, backup_block); if (unlikely(!bh)) { @@ -1187,10 +1197,8 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data, memcpy(bh->b_data, data, size); if (rest) memset(bh->b_data + size, 0, rest); - es = (struct ext4_super_block *) bh->b_data; - es->s_block_group_nr = cpu_to_le16(group); - if (ext4_has_metadata_csum(sb)) - es->s_checksum = ext4_superblock_csum(sb, es); + if (has_super && (backup_block == first_block)) + ext4_set_block_group_nr(sb, bh->b_data, group); set_buffer_uptodate(bh); unlock_buffer(bh); err = ext4_handle_dirty_metadata(handle, NULL, bh); From 52cdfab923db7d69ea5020b9818fd4e261d39eab Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 21 Nov 2022 14:09:29 +0100 Subject: [PATCH 190/207] ext4: avoid BUG_ON when creating xattrs commit b40ebaf63851b3a401b0dc9263843538f64f5ce6 upstream. Commit fb0a387dcdcd ("ext4: limit block allocations for indirect-block files to < 2^32") added code to try to allocate xattr block with 32-bit block number for indirect block based files on the grounds that these files cannot use larger block numbers. It also added BUG_ON when allocated block could not fit into 32 bits. This is however bogus reasoning because xattr block is stored in inode->i_file_acl and inode->i_file_acl_hi and as such even indirect block based files can happily use full 48 bits for xattr block number. The proper handling seems to be there basically since 64-bit block number support was added. So remove the bogus limitation and BUG_ON. Cc: Eric Sandeen Fixes: fb0a387dcdcd ("ext4: limit block allocations for indirect-block files to < 2^32") Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20221121130929.32031-1-jack@suse.cz Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 36d6ba7190b6..800ce5cdb9d2 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2070,19 +2070,11 @@ inserted: goal = ext4_group_first_block_no(sb, EXT4_I(inode)->i_block_group); - - /* non-extent files can't have physical blocks past 2^32 */ - if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) - goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; - block = ext4_new_meta_blocks(handle, inode, goal, 0, NULL, &error); if (error) goto cleanup; - if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) - BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); - ea_idebug(inode, "creating block %llu", (unsigned long long)block); From cc1538c693d25e282bed8c54b65c914a04023a78 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 23 Nov 2022 20:39:50 +0100 Subject: [PATCH 191/207] ext4: fix deadlock due to mbcache entry corruption commit a44e84a9b7764c72896f7241a0ec9ac7e7ef38dd upstream. When manipulating xattr blocks, we can deadlock infinitely looping inside ext4_xattr_block_set() where we constantly keep finding xattr block for reuse in mbcache but we are unable to reuse it because its reference count is too big. This happens because cache entry for the xattr block is marked as reusable (e_reusable set) although its reference count is too big. When this inconsistency happens, this inconsistent state is kept indefinitely and so ext4_xattr_block_set() keeps retrying indefinitely. The inconsistent state is caused by non-atomic update of e_reusable bit. e_reusable is part of a bitfield and e_reusable update can race with update of e_referenced bit in the same bitfield resulting in loss of one of the updates. Fix the problem by using atomic bitops instead. This bug has been around for many years, but it became *much* easier to hit after commit 65f8b80053a1 ("ext4: fix race when reusing xattr blocks"). Cc: stable@vger.kernel.org Fixes: 6048c64b2609 ("mbcache: add reusable flag to cache entries") Fixes: 65f8b80053a1 ("ext4: fix race when reusing xattr blocks") Reported-and-tested-by: Jeremi Piotrowski Reported-by: Thilo Fromm Link: https://lore.kernel.org/r/c77bf00f-4618-7149-56f1-b8d1664b9d07@linux.microsoft.com/ Signed-off-by: Jan Kara Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20221123193950.16758-1-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 4 ++-- fs/mbcache.c | 14 ++++++++------ include/linux/mbcache.h | 9 +++++++-- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 800ce5cdb9d2..08043aa72cf1 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1281,7 +1281,7 @@ retry_ref: ce = mb_cache_entry_get(ea_block_cache, hash, bh->b_blocknr); if (ce) { - ce->e_reusable = 1; + set_bit(MBE_REUSABLE_B, &ce->e_flags); mb_cache_entry_put(ea_block_cache, ce); } } @@ -2042,7 +2042,7 @@ inserted: } BHDR(new_bh)->h_refcount = cpu_to_le32(ref); if (ref == EXT4_XATTR_REFCOUNT_MAX) - ce->e_reusable = 0; + clear_bit(MBE_REUSABLE_B, &ce->e_flags); ea_bdebug(new_bh, "reusing; refcount now=%d", ref); ext4_xattr_block_csum_set(inode, new_bh); diff --git a/fs/mbcache.c b/fs/mbcache.c index e272ad738faf..2a4b8b549e93 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -100,8 +100,9 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, atomic_set(&entry->e_refcnt, 2); entry->e_key = key; entry->e_value = value; - entry->e_reusable = reusable; - entry->e_referenced = 0; + entry->e_flags = 0; + if (reusable) + set_bit(MBE_REUSABLE_B, &entry->e_flags); head = mb_cache_entry_head(cache, key); hlist_bl_lock(head); hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { @@ -165,7 +166,8 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache, while (node) { entry = hlist_bl_entry(node, struct mb_cache_entry, e_hash_list); - if (entry->e_key == key && entry->e_reusable && + if (entry->e_key == key && + test_bit(MBE_REUSABLE_B, &entry->e_flags) && atomic_inc_not_zero(&entry->e_refcnt)) goto out; node = node->next; @@ -284,7 +286,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete_or_get); void mb_cache_entry_touch(struct mb_cache *cache, struct mb_cache_entry *entry) { - entry->e_referenced = 1; + set_bit(MBE_REFERENCED_B, &entry->e_flags); } EXPORT_SYMBOL(mb_cache_entry_touch); @@ -309,9 +311,9 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, entry = list_first_entry(&cache->c_list, struct mb_cache_entry, e_list); /* Drop initial hash reference if there is no user */ - if (entry->e_referenced || + if (test_bit(MBE_REFERENCED_B, &entry->e_flags) || atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) { - entry->e_referenced = 0; + clear_bit(MBE_REFERENCED_B, &entry->e_flags); list_move_tail(&entry->e_list, &cache->c_list); continue; } diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h index 2da63fd7b98f..97e64184767d 100644 --- a/include/linux/mbcache.h +++ b/include/linux/mbcache.h @@ -10,6 +10,12 @@ struct mb_cache; +/* Cache entry flags */ +enum { + MBE_REFERENCED_B = 0, + MBE_REUSABLE_B +}; + struct mb_cache_entry { /* List of entries in cache - protected by cache->c_list_lock */ struct list_head e_list; @@ -26,8 +32,7 @@ struct mb_cache_entry { atomic_t e_refcnt; /* Key in hash - stable during lifetime of the entry */ u32 e_key; - u32 e_referenced:1; - u32 e_reusable:1; + unsigned long e_flags; /* User provided value - stable during lifetime of the entry */ u64 e_value; }; From 74ba281971618a76d5067cad7d8b14d549da5e9a Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Tue, 6 Dec 2022 22:41:34 +0800 Subject: [PATCH 192/207] ext4: fix kernel BUG in 'ext4_write_inline_data_end()' commit 5c099c4fdc438014d5893629e70a8ba934433ee8 upstream. Syzbot report follow issue: ------------[ cut here ]------------ kernel BUG at fs/ext4/inline.c:227! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 3629 Comm: syz-executor212 Not tainted 6.1.0-rc5-syzkaller-00018-g59d0d52c30d4 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 RIP: 0010:ext4_write_inline_data+0x344/0x3e0 fs/ext4/inline.c:227 RSP: 0018:ffffc90003b3f368 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff8880704e16c0 RCX: 0000000000000000 RDX: ffff888021763a80 RSI: ffffffff821e31a4 RDI: 0000000000000006 RBP: 000000000006818e R08: 0000000000000006 R09: 0000000000068199 R10: 0000000000000079 R11: 0000000000000000 R12: 000000000000000b R13: 0000000000068199 R14: ffffc90003b3f408 R15: ffff8880704e1c82 FS: 000055555723e3c0(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fffe8ac9080 CR3: 0000000079f81000 CR4: 0000000000350ee0 Call Trace: ext4_write_inline_data_end+0x2a3/0x12f0 fs/ext4/inline.c:768 ext4_write_end+0x242/0xdd0 fs/ext4/inode.c:1313 ext4_da_write_end+0x3ed/0xa30 fs/ext4/inode.c:3063 generic_perform_write+0x316/0x570 mm/filemap.c:3764 ext4_buffered_write_iter+0x15b/0x460 fs/ext4/file.c:285 ext4_file_write_iter+0x8bc/0x16e0 fs/ext4/file.c:700 call_write_iter include/linux/fs.h:2191 [inline] do_iter_readv_writev+0x20b/0x3b0 fs/read_write.c:735 do_iter_write+0x182/0x700 fs/read_write.c:861 vfs_iter_write+0x74/0xa0 fs/read_write.c:902 iter_file_splice_write+0x745/0xc90 fs/splice.c:686 do_splice_from fs/splice.c:764 [inline] direct_splice_actor+0x114/0x180 fs/splice.c:931 splice_direct_to_actor+0x335/0x8a0 fs/splice.c:886 do_splice_direct+0x1ab/0x280 fs/splice.c:974 do_sendfile+0xb19/0x1270 fs/read_write.c:1255 __do_sys_sendfile64 fs/read_write.c:1323 [inline] __se_sys_sendfile64 fs/read_write.c:1309 [inline] __x64_sys_sendfile64+0x1d0/0x210 fs/read_write.c:1309 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd ---[ end trace 0000000000000000 ]--- Above issue may happens as follows: ext4_da_write_begin ext4_da_write_inline_data_begin ext4_da_convert_inline_data_to_extent ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); ext4_da_write_end ext4_run_li_request ext4_mb_prefetch ext4_read_block_bitmap_nowait ext4_validate_block_bitmap ext4_mark_group_bitmap_corrupted(sb, block_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT) percpu_counter_sub(&sbi->s_freeclusters_counter,grp->bb_free); -> sbi->s_freeclusters_counter become zero ext4_da_write_begin if (ext4_nonda_switch(inode->i_sb)) -> As freeclusters_counter is zero will return true *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; ext4_write_begin ext4_da_write_end if (write_mode == FALL_BACK_TO_NONDELALLOC) ext4_write_end if (inline_data) ext4_write_inline_data_end ext4_write_inline_data BUG_ON(pos + len > EXT4_I(inode)->i_inline_size); -> As inode is already convert to extent, so 'pos + len' > inline_size -> then trigger BUG. To solve this issue, instead of checking ext4_has_inline_data() which is only cleared after data has been written back, check the EXT4_STATE_MAY_INLINE_DATA flag in ext4_write_end(). Fixes: f19d5870cbf7 ("ext4: add normal write support for inline data") Reported-by: syzbot+4faa160fa96bfba639f8@syzkaller.appspotmail.com Reported-by: Jun Nie Signed-off-by: Ye Bin Link: https://lore.kernel.org/r/20221206144134.1919987-1-yebin@huaweicloud.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 181bc161b1ac..a0f4d4197a0b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1315,7 +1315,8 @@ static int ext4_write_end(struct file *file, trace_ext4_write_end(inode, pos, len, copied); - if (ext4_has_inline_data(inode)) + if (ext4_has_inline_data(inode) && + ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) return ext4_write_inline_data_end(inode, pos, len, copied, page); copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); From 70e5b46beba64706430a87a6d516054225e8ac8a Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Thu, 8 Dec 2022 10:32:33 +0800 Subject: [PATCH 193/207] ext4: fix inode leak in ext4_xattr_inode_create() on an error path commit e4db04f7d3dbbe16680e0ded27ea2a65b10f766a upstream. There is issue as follows when do setxattr with inject fault: [localhost]# fsck.ext4 -fn /dev/sda e2fsck 1.46.6-rc1 (12-Sep-2022) Pass 1: Checking inodes, blocks, and sizes Pass 2: Checking directory structure Pass 3: Checking directory connectivity Pass 4: Checking reference counts Unattached zero-length inode 15. Clear? no Unattached inode 15 Connect to /lost+found? no Pass 5: Checking group summary information /dev/sda: ********** WARNING: Filesystem still has errors ********** /dev/sda: 15/655360 files (0.0% non-contiguous), 66755/2621440 blocks This occurs in 'ext4_xattr_inode_create()'. If 'ext4_mark_inode_dirty()' fails, dropping i_nlink of the inode is needed. Or will lead to inode leak. Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221208023233.1231330-5-yebin@huaweicloud.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 08043aa72cf1..2308ed061f97 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1441,6 +1441,9 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle, if (!err) err = ext4_inode_attach_jinode(ea_inode); if (err) { + if (ext4_xattr_inode_dec_ref(handle, ea_inode)) + ext4_warning_inode(ea_inode, + "cleanup dec ref error %d", err); iput(ea_inode); return ERR_PTR(err); } From 0f860f71b61776d03817ed8917abea8a2315b544 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 7 Dec 2022 12:59:27 +0100 Subject: [PATCH 194/207] ext4: initialize quota before expanding inode in setproject ioctl commit 1485f726c6dec1a1f85438f2962feaa3d585526f upstream. Make sure we initialize quotas before possibly expanding inode space (and thus maybe needing to allocate external xattr block) in ext4_ioctl_setproject(). This prevents not accounting the necessary block allocation. Signed-off-by: Jan Kara Cc: stable@kernel.org Link: https://lore.kernel.org/r/20221207115937.26601-1-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ioctl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 202953b5db49..8067ccda34e4 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -732,6 +732,10 @@ static int ext4_ioctl_setproject(struct inode *inode, __u32 projid) if (ext4_is_quota_file(inode)) return err; + err = dquot_initialize(inode); + if (err) + return err; + err = ext4_get_inode_loc(inode, &iloc); if (err) return err; @@ -747,10 +751,6 @@ static int ext4_ioctl_setproject(struct inode *inode, __u32 projid) brelse(iloc.bh); } - err = dquot_initialize(inode); - if (err) - return err; - handle = ext4_journal_start(inode, EXT4_HT_QUOTA, EXT4_QUOTA_INIT_BLOCKS(sb) + EXT4_QUOTA_DEL_BLOCKS(sb) + 3); From 56ecd5509fdc76af2b70a9090af15829c21d04b2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 7 Dec 2022 12:59:28 +0100 Subject: [PATCH 195/207] ext4: avoid unaccounted block allocation when expanding inode commit 8994d11395f8165b3deca1971946f549f0822630 upstream. When expanding inode space in ext4_expand_extra_isize_ea() we may need to allocate external xattr block. If quota is not initialized for the inode, the block allocation will not be accounted into quota usage. Make sure the quota is initialized before we try to expand inode space. Reported-by: Pengfei Xu Link: https://lore.kernel.org/all/Y5BT+k6xWqthZc1P@xpf.sh.intel.com Signed-off-by: Jan Kara Cc: stable@kernel.org Link: https://lore.kernel.org/r/20221207115937.26601-2-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a0f4d4197a0b..283afda26d9c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5875,6 +5875,14 @@ static int __ext4_expand_extra_isize(struct inode *inode, return 0; } + /* + * We may need to allocate external xattr block so we need quotas + * initialized. Here we can be called with various locks held so we + * cannot affort to initialize quotas ourselves. So just bail. + */ + if (dquot_initialize_needed(inode)) + return -EAGAIN; + /* try to expand with EAs present */ error = ext4_expand_extra_isize_ea(inode, new_extra_isize, raw_inode, handle); From f06a3cff1b4e4b1b5edfd5031aa58e679064db01 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Thu, 8 Dec 2022 10:32:31 +0800 Subject: [PATCH 196/207] ext4: allocate extended attribute value in vmalloc area commit cc12a6f25e07ed05d5825a1664b67a970842b2ca upstream. Now, extended attribute value maximum length is 64K. The memory requested here does not need continuous physical addresses, so it is appropriate to use kvmalloc to request memory. At the same time, it can also cope with the situation that the extended attribute will become longer in the future. Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221208023233.1231330-3-yebin@huaweicloud.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 2308ed061f97..866772a2e068 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2550,7 +2550,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS); bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS); - buffer = kmalloc(value_size, GFP_NOFS); + buffer = kvmalloc(value_size, GFP_NOFS); b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS); if (!is || !bs || !buffer || !b_entry_name) { error = -ENOMEM; @@ -2602,7 +2602,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, error = 0; out: kfree(b_entry_name); - kfree(buffer); + kvfree(buffer); if (is) brelse(is->iloc.bh); if (bs) From 218f8fe668240f2ec95dcb000f61904dcdc83271 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 12 Dec 2022 17:19:58 +0000 Subject: [PATCH 197/207] drm/i915/ttm: consider CCS for backup objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ad0fca2dceeab8fdd8e1135f4b4ef2dc46c2ead9 upstream. It seems we can have one or more framebuffers that are still pinned when suspending lmem, in such a case we end up creating a shmem backup object, instead of evicting the object directly, but this will skip copying the CCS aux state, since we don't allocate the extra storage for the CCS pages as part of the ttm_tt construction. Since we can already deal with pinned objects just fine, it doesn't seem too nasty to just extend to support dealing with the CCS aux state, if the object is a pinned framebuffer. This fixes display corruption (like in gnome-shell) seen on DG2 when returning from suspend. Fixes: da0595ae91da ("drm/i915/migrate: Evict and restore the flatccs capable lmem obj") Signed-off-by: Matthew Auld Cc: Ville Syrjälä Cc: Nirmoy Das Cc: Andrzej Hajda Cc: Shuicheng Lin Cc: # v5.19+ Tested-by: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20221212171958.82593-2-matthew.auld@intel.com (cherry picked from commit 95df9cc24bee8a09d39c62bcef4319b984814e18) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 3 +++ .../gpu/drm/i915/gem/i915_gem_object_types.h | 10 ++++++---- drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c | 18 +++++++++++++++++- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 369006c5317f..a40bc17acead 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -761,6 +761,9 @@ bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj) if (!HAS_FLAT_CCS(to_i915(obj->base.dev))) return false; + if (obj->flags & I915_BO_ALLOC_CCS_AUX) + return true; + for (i = 0; i < obj->mm.n_placements; i++) { /* Compression is not allowed for the objects with smem placement */ if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index d0d6772e6f36..ab4c2f90a564 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -327,16 +327,18 @@ struct drm_i915_gem_object { * dealing with userspace objects the CPU fault handler is free to ignore this. */ #define I915_BO_ALLOC_GPU_ONLY BIT(6) +#define I915_BO_ALLOC_CCS_AUX BIT(7) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ I915_BO_ALLOC_CPU_CLEAR | \ I915_BO_ALLOC_USER | \ I915_BO_ALLOC_PM_VOLATILE | \ I915_BO_ALLOC_PM_EARLY | \ - I915_BO_ALLOC_GPU_ONLY) -#define I915_BO_READONLY BIT(7) -#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */ -#define I915_BO_PROTECTED BIT(9) + I915_BO_ALLOC_GPU_ONLY | \ + I915_BO_ALLOC_CCS_AUX) +#define I915_BO_READONLY BIT(8) +#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */ +#define I915_BO_PROTECTED BIT(10) /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c index 07e49f22f2de..7e67742bc65e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c @@ -50,6 +50,7 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, container_of(bo->bdev, typeof(*i915), bdev); struct drm_i915_gem_object *backup; struct ttm_operation_ctx ctx = {}; + unsigned int flags; int err = 0; if (bo->resource->mem_type == I915_PL_SYSTEM || obj->ttm.backup) @@ -65,7 +66,22 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, if (obj->flags & I915_BO_ALLOC_PM_VOLATILE) return 0; - backup = i915_gem_object_create_shmem(i915, obj->base.size); + /* + * It seems that we might have some framebuffers still pinned at this + * stage, but for such objects we might also need to deal with the CCS + * aux state. Make sure we force the save/restore of the CCS state, + * otherwise we might observe display corruption, when returning from + * suspend. + */ + flags = 0; + if (i915_gem_object_needs_ccs_pages(obj)) { + WARN_ON_ONCE(!i915_gem_object_is_framebuffer(obj)); + WARN_ON_ONCE(!pm_apply->allow_gpu); + + flags = I915_BO_ALLOC_CCS_AUX; + } + backup = i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], + obj->base.size, 0, flags); if (IS_ERR(backup)) return PTR_ERR(backup); From 3038224f5617bb4628170b29661645138f884292 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Thu, 8 Dec 2022 11:55:15 +0800 Subject: [PATCH 198/207] drm/amd/display: Add DCN314 display SG Support commit fe6872adb05e85bde38f2cdec01a0f4cfb826998 upstream. Add display SG support for DCN 3.1.4. Signed-off-by: Yifan Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 512c32327eb1..c2c26fbea512 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1512,6 +1512,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): + case IP_VERSION(3, 1, 4): case IP_VERSION(3, 1, 5): case IP_VERSION(3, 1, 6): init_data.flags.gpu_vm_support = true; From 9724e6950ef902abd6abc5b5dfacca4b5125ba75 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 21 Nov 2022 15:52:19 -0500 Subject: [PATCH 199/207] drm/amdgpu: handle polaris10/11 overlap asics (v2) commit 1d4624cd72b912b2680c08d0be48338a1629a858 upstream. Some special polaris 10 chips overlap with the polaris11 DID range. Handle this properly in the driver. v2: use local flags for other function calls. Acked-by: Luben Tuikov Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index bf2d50c8c92a..d8dfbb9b735d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2040,6 +2040,15 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, "See modparam exp_hw_support\n"); return -ENODEV; } + /* differentiate between P10 and P11 asics with the same DID */ + if (pdev->device == 0x67FF && + (pdev->revision == 0xE3 || + pdev->revision == 0xE7 || + pdev->revision == 0xF3 || + pdev->revision == 0xF7)) { + flags &= ~AMD_ASIC_MASK; + flags |= CHIP_POLARIS10; + } /* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping, * however, SME requires an indirect IOMMU mapping because the encryption @@ -2109,12 +2118,12 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, ddev); - ret = amdgpu_driver_load_kms(adev, ent->driver_data); + ret = amdgpu_driver_load_kms(adev, flags); if (ret) goto err_pci; retry_init: - ret = drm_dev_register(ddev, ent->driver_data); + ret = drm_dev_register(ddev, flags); if (ret == -EAGAIN && ++retry <= 3) { DRM_INFO("retry init %d\n", retry); /* Don't request EX mode too frequently which is attacking */ From 52beaa0938ad33f3ee848c22935af967e3cce005 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 7 Dec 2022 11:08:53 -0500 Subject: [PATCH 200/207] drm/amdgpu: make display pinning more flexible (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 81d0bcf9900932633d270d5bc4a54ff599c6ebdb upstream. Only apply the static threshold for Stoney and Carrizo. This hardware has certain requirements that don't allow mixing of GTT and VRAM. Newer asics do not have these requirements so we should be able to be more flexible with where buffers end up. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2270 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2291 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2255 Acked-by: Luben Tuikov Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 2e8f6cd7a729..3df13d841e4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1509,7 +1509,8 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo) uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, uint32_t domain) { - if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) { + if ((domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) && + ((adev->asic_type == CHIP_CARRIZO) || (adev->asic_type == CHIP_STONEY))) { domain = AMDGPU_GEM_DOMAIN_VRAM; if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD) domain = AMDGPU_GEM_DOMAIN_GTT; From ea62bd769994d6f18bd265cc156e6182a288c880 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 16 Dec 2022 11:34:56 +0000 Subject: [PATCH 201/207] drm/i915: improve the catch-all evict to handle lock contention MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3f882f2d4f689627c1566c2c92087bc3ff734953 upstream. The catch-all evict can fail due to object lock contention, since it only goes as far as trylocking the object, due to us already holding the vm->mutex. Doing a full object lock here can deadlock, since the vm->mutex is always our inner lock. Add another execbuf pass which drops the vm->mutex and then tries to grab the object will the full lock, before then retrying the eviction. This should be good enough for now to fix the immediate regression with userspace seeing -ENOSPC from execbuf due to contended object locks during GTT eviction. v2 (Mani) - Also revamp the docs for the different passes. Testcase: igt@gem_ppgtt@shrink-vs-evict-* Fixes: 7e00897be8bf ("drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something, v2.") References: https://gitlab.freedesktop.org/drm/intel/-/issues/7627 References: https://gitlab.freedesktop.org/drm/intel/-/issues/7570 References: https://bugzilla.mozilla.org/show_bug.cgi?id=1779558 Signed-off-by: Matthew Auld Cc: Maarten Lankhorst Cc: Thomas Hellström Cc: Tvrtko Ursulin Cc: Andrzej Hajda Cc: Mani Milani Cc: # v5.18+ Reviewed-by: Mani Milani Tested-by: Mani Milani Link: https://patchwork.freedesktop.org/patch/msgid/20221216113456.414183-1-matthew.auld@intel.com (cherry picked from commit 801fa7a81f6da533cc5442fc40e32c72b76cd42a) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 59 +++++++++++++++---- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- drivers/gpu/drm/i915/i915_gem_evict.c | 37 ++++++++---- drivers/gpu/drm/i915/i915_gem_evict.h | 4 +- drivers/gpu/drm/i915/i915_vma.c | 2 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- 6 files changed, 82 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 845023c14eb3..f461e34cc5f0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -729,37 +729,74 @@ static int eb_reserve(struct i915_execbuffer *eb) bool unpinned; /* - * Attempt to pin all of the buffers into the GTT. - * This is done in 2 phases: + * We have one more buffers that we couldn't bind, which could be due to + * various reasons. To resolve this we have 4 passes, with every next + * level turning the screws tighter: * - * 1. Unbind all objects that do not match the GTT constraints for - * the execbuffer (fenceable, mappable, alignment etc). - * 2. Bind new objects. + * 0. Unbind all objects that do not match the GTT constraints for the + * execbuffer (fenceable, mappable, alignment etc). Bind all new + * objects. This avoids unnecessary unbinding of later objects in order + * to make room for the earlier objects *unless* we need to defragment. * - * This avoid unnecessary unbinding of later objects in order to make - * room for the earlier objects *unless* we need to defragment. + * 1. Reorder the buffers, where objects with the most restrictive + * placement requirements go first (ignoring fixed location buffers for + * now). For example, objects needing the mappable aperture (the first + * 256M of GTT), should go first vs objects that can be placed just + * about anywhere. Repeat the previous pass. * - * Defragmenting is skipped if all objects are pinned at a fixed location. + * 2. Consider buffers that are pinned at a fixed location. Also try to + * evict the entire VM this time, leaving only objects that we were + * unable to lock. Try again to bind the buffers. (still using the new + * buffer order). + * + * 3. We likely have object lock contention for one or more stubborn + * objects in the VM, for which we need to evict to make forward + * progress (perhaps we are fighting the shrinker?). When evicting the + * VM this time around, anything that we can't lock we now track using + * the busy_bo, using the full lock (after dropping the vm->mutex to + * prevent deadlocks), instead of trylock. We then continue to evict the + * VM, this time with the stubborn object locked, which we can now + * hopefully unbind (if still bound in the VM). Repeat until the VM is + * evicted. Finally we should be able bind everything. */ - for (pass = 0; pass <= 2; pass++) { + for (pass = 0; pass <= 3; pass++) { int pin_flags = PIN_USER | PIN_VALIDATE; if (pass == 0) pin_flags |= PIN_NONBLOCK; if (pass >= 1) - unpinned = eb_unbind(eb, pass == 2); + unpinned = eb_unbind(eb, pass >= 2); if (pass == 2) { err = mutex_lock_interruptible(&eb->context->vm->mutex); if (!err) { - err = i915_gem_evict_vm(eb->context->vm, &eb->ww); + err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL); mutex_unlock(&eb->context->vm->mutex); } if (err) return err; } + if (pass == 3) { +retry: + err = mutex_lock_interruptible(&eb->context->vm->mutex); + if (!err) { + struct drm_i915_gem_object *busy_bo = NULL; + + err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo); + mutex_unlock(&eb->context->vm->mutex); + if (err && busy_bo) { + err = i915_gem_object_lock(busy_bo, &eb->ww); + i915_gem_object_put(busy_bo); + if (!err) + goto retry; + } + } + if (err) + return err; + } + list_for_each_entry(ev, &eb->unbound, bind_link) { err = eb_reserve_vma(eb, ev, pin_flags); if (err) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index e63329bc8065..354c1d6dab84 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -369,7 +369,7 @@ retry: if (vma == ERR_PTR(-ENOSPC)) { ret = mutex_lock_interruptible(&ggtt->vm.mutex); if (!ret) { - ret = i915_gem_evict_vm(&ggtt->vm, &ww); + ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL); mutex_unlock(&ggtt->vm.mutex); } if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index f025ee4fa526..a4b4d9b7d26c 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -416,6 +416,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * @vm: Address space to cleanse * @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm * will be able to evict vma's locked by the ww as well. + * @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then + * in the event i915_gem_evict_vm() is unable to trylock an object for eviction, + * then @busy_bo will point to it. -EBUSY is also returned. The caller must drop + * the vm->mutex, before trying again to acquire the contended lock. The caller + * also owns a reference to the object. * * This function evicts all vmas from a vm. * @@ -425,7 +430,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * To clarify: This is for freeing up virtual address space, not for freeing * memory in e.g. the shrinker. */ -int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) +int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object **busy_bo) { int ret = 0; @@ -457,15 +463,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) * the resv is shared among multiple objects, we still * need the object ref. */ - if (dying_vma(vma) || + if (!i915_gem_object_get_rcu(vma->obj) || (ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) { __i915_vma_pin(vma); list_add(&vma->evict_link, &locked_eviction_list); continue; } - if (!i915_gem_object_trylock(vma->obj, ww)) + if (!i915_gem_object_trylock(vma->obj, ww)) { + if (busy_bo) { + *busy_bo = vma->obj; /* holds ref */ + ret = -EBUSY; + break; + } + i915_gem_object_put(vma->obj); continue; + } __i915_vma_pin(vma); list_add(&vma->evict_link, &eviction_list); @@ -473,25 +486,29 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) if (list_empty(&eviction_list) && list_empty(&locked_eviction_list)) break; - ret = 0; /* Unbind locked objects first, before unlocking the eviction_list */ list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) { __i915_vma_unpin(vma); - if (ret == 0) + if (ret == 0) { ret = __i915_vma_unbind(vma); - if (ret != -EINTR) /* "Get me out of here!" */ - ret = 0; + if (ret != -EINTR) /* "Get me out of here!" */ + ret = 0; + } + if (!dying_vma(vma)) + i915_gem_object_put(vma->obj); } list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) { __i915_vma_unpin(vma); - if (ret == 0) + if (ret == 0) { ret = __i915_vma_unbind(vma); - if (ret != -EINTR) /* "Get me out of here!" */ - ret = 0; + if (ret != -EINTR) /* "Get me out of here!" */ + ret = 0; + } i915_gem_object_unlock(vma->obj); + i915_gem_object_put(vma->obj); } } while (ret == 0); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h index e593c530f9bd..bf0ee0e4fe60 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.h +++ b/drivers/gpu/drm/i915/i915_gem_evict.h @@ -11,6 +11,7 @@ struct drm_mm_node; struct i915_address_space; struct i915_gem_ww_ctx; +struct drm_i915_gem_object; int __must_check i915_gem_evict_something(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww, @@ -23,6 +24,7 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm, - struct i915_gem_ww_ctx *ww); + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object **busy_bo); #endif /* __I915_GEM_EVICT_H__ */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index f17c09ead7d7..4d06875de14a 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1569,7 +1569,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, * locked objects when called from execbuf when pinning * is removed. This would probably regress badly. */ - i915_gem_evict_vm(vm, NULL); + i915_gem_evict_vm(vm, NULL, NULL); mutex_unlock(&vm->mutex); } } while (1); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 8c6517d29b8e..37068542aafe 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -344,7 +344,7 @@ static int igt_evict_vm(void *arg) /* Everything is pinned, nothing should happen */ mutex_lock(&ggtt->vm.mutex); - err = i915_gem_evict_vm(&ggtt->vm, NULL); + err = i915_gem_evict_vm(&ggtt->vm, NULL, NULL); mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", @@ -356,7 +356,7 @@ static int igt_evict_vm(void *arg) for_i915_gem_ww(&ww, err, false) { mutex_lock(&ggtt->vm.mutex); - err = i915_gem_evict_vm(&ggtt->vm, &ww); + err = i915_gem_evict_vm(&ggtt->vm, &ww, NULL); mutex_unlock(&ggtt->vm.mutex); } From 6e6d577cd90b27a98ce9f06ed96bca7b59d210f0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 2 Dec 2022 12:28:42 +0000 Subject: [PATCH 202/207] drm/i915/migrate: Account for the reserved_space commit 31a2e6cbe8a4eb0d1650fff4b77872b744e14a62 upstream. If the ring is nearly full when calling into emit_pte(), we might incorrectly trample the reserved_space when constructing the packet to emit the PTEs. This then triggers the GEM_BUG_ON(rq->reserved_space > ring->space) when later submitting the request, since the request itself doesn't have enough space left in the ring to emit things like workarounds, breadcrumbs etc. v2: Fix the whitespace errors Testcase: igt@i915_selftests@live_emit_pte_full_ring Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7535 Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6889 Fixes: cf586021642d ("drm/i915/gt: Pipelined page migration") Signed-off-by: Chris Wilson Signed-off-by: Matthew Auld Cc: Andrzej Hajda Cc: Andi Shyti Cc: Nirmoy Das Cc: # v5.15+ Tested-by: Nirmoy Das Reviewed-by: Nirmoy Das Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20221202122844.428006-1-matthew.auld@intel.com (cherry picked from commit 35168a6c4ed53db4f786858bac23b1474fd7d0dc) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gt/intel_migrate.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index aaaf1906026c..ee072c7d62eb 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -341,6 +341,16 @@ static int emit_no_arbitration(struct i915_request *rq) return 0; } +static int max_pte_pkt_size(struct i915_request *rq, int pkt) +{ + struct intel_ring *ring = rq->ring; + + pkt = min_t(int, pkt, (ring->space - rq->reserved_space) / sizeof(u32) + 5); + pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + + return pkt; +} + static int emit_pte(struct i915_request *rq, struct sgt_dma *it, enum i915_cache_level cache_level, @@ -387,8 +397,7 @@ static int emit_pte(struct i915_request *rq, return PTR_ERR(cs); /* Pack as many PTE updates as possible into a single MI command */ - pkt = min_t(int, dword_length, ring->space / sizeof(u32) + 5); - pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + pkt = max_pte_pkt_size(rq, dword_length); hdr = cs; *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); /* as qword elements */ @@ -421,8 +430,7 @@ static int emit_pte(struct i915_request *rq, } } - pkt = min_t(int, dword_rem, ring->space / sizeof(u32) + 5); - pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + pkt = max_pte_pkt_size(rq, dword_rem); hdr = cs; *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); From 1cfd678e2f070fd2f005dc6e12c180ff1a1b5843 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 9 Dec 2022 16:05:12 +0800 Subject: [PATCH 203/207] drm/amd/pm: add missing SMU13.0.0 mm_dpm feature mapping commit 592cd24a08763975c75be850a7d4e461bfd353bf upstream. Without this, the pp_dpm_vclk and pp_dpm_dclk outputs are not with correct data. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index bf745a7e67d3..7dc6a2069723 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -187,6 +187,8 @@ static struct cmn2asic_mapping smu_v13_0_0_feature_mask_map[SMU_FEATURE_COUNT] = FEA_MAP(MEM_TEMP_READ), FEA_MAP(ATHUB_MMHUB_PG), FEA_MAP(SOC_PCC), + [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, + [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, }; static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = { From 789c4804d7aed7094845da4a0ebf7099a503f8ba Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 9 Dec 2022 16:09:58 +0800 Subject: [PATCH 204/207] drm/amd/pm: add missing SMU13.0.7 mm_dpm feature mapping commit e0607c10ebf551a654c3577fc74b4bf5533e1cea upstream. Without this, the pp_dpm_vclk and pp_dpm_dclk outputs are not with correct data. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 39deb06a86ba..88b37adb1f14 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -189,6 +189,8 @@ static struct cmn2asic_mapping smu_v13_0_7_feature_mask_map[SMU_FEATURE_COUNT] = FEA_MAP(MEM_TEMP_READ), FEA_MAP(ATHUB_MMHUB_PG), FEA_MAP(SOC_PCC), + [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, + [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, }; static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = { From 0b865bcd7a084e9cbf171ad3b240bf40058fd985 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 15 Dec 2022 13:38:46 +0800 Subject: [PATCH 205/207] drm/amd/pm: bump SMU13.0.0 driver_if header to version 0x34 commit 272b981416f8be0180c4d8066f90635fa7c1c501 upstream. To fit the latest PMFW and suppress the warning emerged on driver loading. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0, 6.1 Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 1 + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h index b76f0f7e4299..d6b964cf73bd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h @@ -522,9 +522,9 @@ typedef enum { TEMP_HOTSPOT_M, TEMP_MEM, TEMP_VR_GFX, + TEMP_VR_SOC, TEMP_VR_MEM0, TEMP_VR_MEM1, - TEMP_VR_SOC, TEMP_VR_U, TEMP_LIQUID0, TEMP_LIQUID1, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 865d6358918d..a9122b3b1532 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -28,6 +28,7 @@ #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x34 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32 diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 89f0f6eb19f3..8e4830a311bd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -289,6 +289,8 @@ int smu_v13_0_check_fw_version(struct smu_context *smu) smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_ALDE; break; case IP_VERSION(13, 0, 0): + smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0; + break; case IP_VERSION(13, 0, 10): smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10; break; From 54b6a040f38075711751c61b2300a8ce7cb1741f Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 16 Dec 2022 17:04:24 +0800 Subject: [PATCH 206/207] drm/amd/pm: correct the fan speed retrieving in PWM for some SMU13 asics commit e73fc71e8f015d61f3adca7659cb209fd5117aa5 upstream. For SMU 13.0.0 and 13.0.7, the output from PMFW is in percent. Driver need to convert that into correct PMW(255) based. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0, 6.1 Signed-off-by: Greg Kroah-Hartman --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 17 ++++++++++++++--- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 17 ++++++++++++++--- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 7dc6a2069723..b8430601304f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1417,12 +1417,23 @@ out: static int smu_v13_0_0_get_fan_speed_pwm(struct smu_context *smu, uint32_t *speed) { + int ret; + if (!speed) return -EINVAL; - return smu_v13_0_0_get_smu_metrics_data(smu, - METRICS_CURR_FANPWM, - speed); + ret = smu_v13_0_0_get_smu_metrics_data(smu, + METRICS_CURR_FANPWM, + speed); + if (ret) { + dev_err(smu->adev->dev, "Failed to get fan speed(PWM)!"); + return ret; + } + + /* Convert the PMFW output which is in percent to pwm(255) based */ + *speed = MIN(*speed * 255 / 100, 255); + + return 0; } static int smu_v13_0_0_get_fan_speed_rpm(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 88b37adb1f14..222924363a68 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1361,12 +1361,23 @@ static int smu_v13_0_7_populate_umd_state_clk(struct smu_context *smu) static int smu_v13_0_7_get_fan_speed_pwm(struct smu_context *smu, uint32_t *speed) { + int ret; + if (!speed) return -EINVAL; - return smu_v13_0_7_get_smu_metrics_data(smu, - METRICS_CURR_FANPWM, - speed); + ret = smu_v13_0_7_get_smu_metrics_data(smu, + METRICS_CURR_FANPWM, + speed); + if (ret) { + dev_err(smu->adev->dev, "Failed to get fan speed(PWM)!"); + return ret; + } + + /* Convert the PMFW output which is in percent to pwm(255) based */ + *speed = MIN(*speed * 255 / 100, 255); + + return 0; } static int smu_v13_0_7_get_fan_speed_rpm(struct smu_context *smu, From 2cb8e624295ffa0c4d659fcec7d9e7a6c48de156 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 7 Jan 2023 11:12:04 +0100 Subject: [PATCH 207/207] Linux 6.1.4 Link: https://lore.kernel.org/r/20230104160511.905925875@linuxfoundation.org Tested-by: Ronald Warsow Tested-by: Allen Pais Tested-by: Florian Fainelli Tested-by: Shuah Khan Tested-by: Guenter Roeck Tested-by: Jon Hunter Tested-by: Bagas Sanjaya Tested-by: Fenil Jain Tested-by: Linux Kernel Functional Testing Tested-by: Sudip Mukherjee Tested-by: Ron Economos Tested-by: Justin M. Forbes Tested-by: Salvatore Bonaccorso Tested-by: Rudi Heitbaum Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a69d14983a48..56afd1509c74 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 3 +SUBLEVEL = 4 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth