From de1f0ab13915ddc55519b47b6daba6cc2fff7908 Mon Sep 17 00:00:00 2001 From: Mikhail Lobanov Date: Tue, 10 Sep 2024 04:36:20 -0400 Subject: [PATCH 001/207] iio: accel: bma400: Fix uninitialized variable field_value in tap event handling. [ Upstream commit db9795a43dc944f048a37b65e06707f60f713e34 ] In the current implementation, the local variable field_value is used without prior initialization, which may lead to reading uninitialized memory. Specifically, in the macro set_mask_bits, the initial (potentially uninitialized) value of the buffer is copied into old__, and a mask is applied to calculate new__. A similar issue was resolved in commit 6ee2a7058fea ("iio: accel: bma400: Fix smatch warning based on use of unintialized value."). Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 961db2da159d ("iio: accel: bma400: Add support for single and double tap events") Signed-off-by: Mikhail Lobanov Link: https://patch.msgid.link/20240910083624.27224-1-m.lobanov@rosalinux.ru Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/accel/bma400_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/accel/bma400_core.c b/drivers/iio/accel/bma400_core.c index e90e2f01550a..04083b7395ab 100644 --- a/drivers/iio/accel/bma400_core.c +++ b/drivers/iio/accel/bma400_core.c @@ -1219,7 +1219,8 @@ static int bma400_activity_event_en(struct bma400_data *data, static int bma400_tap_event_en(struct bma400_data *data, enum iio_event_direction dir, int state) { - unsigned int mask, field_value; + unsigned int mask; + unsigned int field_value = 0; int ret; /* From 0fca5ed4be8e8bfbfb9bd97845af596bab7192d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 20 Sep 2024 14:56:24 +0200 Subject: [PATCH 002/207] bpf: Make sure internal and UAPI bpf_redirect flags don't overlap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 09d88791c7cd888d5195c84733caf9183dcfbd16 ] The bpf_redirect_info is shared between the SKB and XDP redirect paths, and the two paths use the same numeric flag values in the ri->flags field (specifically, BPF_F_BROADCAST == BPF_F_NEXTHOP). This means that if skb bpf_redirect_neigh() is used with a non-NULL params argument and, subsequently, an XDP redirect is performed using the same bpf_redirect_info struct, the XDP path will get confused and end up crashing, which syzbot managed to trigger. With the stack-allocated bpf_redirect_info, the structure is no longer shared between the SKB and XDP paths, so the crash doesn't happen anymore. However, different code paths using identically-numbered flag values in the same struct field still seems like a bit of a mess, so this patch cleans that up by moving the flag definitions together and redefining the three flags in BPF_F_REDIRECT_INTERNAL to not overlap with the flags used for XDP. It also adds a BUILD_BUG_ON() check to make sure the overlap is not re-introduced by mistake. Fixes: e624d4ed4aa8 ("xdp: Extend xdp_redirect_map with broadcast support") Reported-by: syzbot+cca39e6e84a367a7e6f6@syzkaller.appspotmail.com Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Daniel Borkmann Closes: https://syzkaller.appspot.com/bug?extid=cca39e6e84a367a7e6f6 Link: https://lore.kernel.org/bpf/20240920125625.59465-1-toke@redhat.com Signed-off-by: Sasha Levin --- include/uapi/linux/bpf.h | 13 +++++-------- net/core/filter.c | 8 +++++--- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ba6e346c8d66..4bb38409b26a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5921,11 +5921,6 @@ enum { BPF_F_MARK_ENFORCE = (1ULL << 6), }; -/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ -enum { - BPF_F_INGRESS = (1ULL << 0), -}; - /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ enum { BPF_F_TUNINFO_IPV6 = (1ULL << 0), @@ -6072,10 +6067,12 @@ enum { BPF_F_BPRM_SECUREEXEC = (1ULL << 0), }; -/* Flags for bpf_redirect_map helper */ +/* Flags for bpf_redirect and bpf_redirect_map helpers */ enum { - BPF_F_BROADCAST = (1ULL << 3), - BPF_F_EXCLUDE_INGRESS = (1ULL << 4), + BPF_F_INGRESS = (1ULL << 0), /* used for skb path */ + BPF_F_BROADCAST = (1ULL << 3), /* used for XDP path */ + BPF_F_EXCLUDE_INGRESS = (1ULL << 4), /* used for XDP path */ +#define BPF_F_REDIRECT_FLAGS (BPF_F_INGRESS | BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) }; #define __bpf_md_ptr(type, name) \ diff --git a/net/core/filter.c b/net/core/filter.c index 8bfd46a070c1..bbb143248843 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2423,9 +2423,9 @@ out: /* Internal, non-exposed redirect flags. */ enum { - BPF_F_NEIGH = (1ULL << 1), - BPF_F_PEER = (1ULL << 2), - BPF_F_NEXTHOP = (1ULL << 3), + BPF_F_NEIGH = (1ULL << 16), + BPF_F_PEER = (1ULL << 17), + BPF_F_NEXTHOP = (1ULL << 18), #define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP) }; @@ -2435,6 +2435,8 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) struct sk_buff *clone; int ret; + BUILD_BUG_ON(BPF_F_REDIRECT_INTERNAL & BPF_F_REDIRECT_FLAGS); + if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL))) return -EINVAL; From 49454f09936a9a96edfb047156889879cb4001eb Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Wed, 11 Sep 2024 10:41:18 +0200 Subject: [PATCH 003/207] bpf: devmap: provide rxq after redirect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ca9984c5f0ab3690d98b13937b2485a978c8dd73 ] rxq contains a pointer to the device from where the redirect happened. Currently, the BPF program that was executed after a redirect via BPF_MAP_TYPE_DEVMAP* does not have it set. This is particularly bad since accessing ingress_ifindex, e.g. SEC("xdp") int prog(struct xdp_md *pkt) { return bpf_redirect_map(&dev_redirect_map, 0, 0); } SEC("xdp/devmap") int prog_after_redirect(struct xdp_md *pkt) { bpf_printk("ifindex %i", pkt->ingress_ifindex); return XDP_PASS; } depends on access to rxq, so a NULL pointer gets dereferenced: <1>[ 574.475170] BUG: kernel NULL pointer dereference, address: 0000000000000000 <1>[ 574.475188] #PF: supervisor read access in kernel mode <1>[ 574.475194] #PF: error_code(0x0000) - not-present page <6>[ 574.475199] PGD 0 P4D 0 <4>[ 574.475207] Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI <4>[ 574.475217] CPU: 4 UID: 0 PID: 217 Comm: kworker/4:1 Not tainted 6.11.0-rc5-reduced-00859-g780801200300 #23 <4>[ 574.475226] Hardware name: Intel(R) Client Systems NUC13ANHi7/NUC13ANBi7, BIOS ANRPL357.0026.2023.0314.1458 03/14/2023 <4>[ 574.475231] Workqueue: mld mld_ifc_work <4>[ 574.475247] RIP: 0010:bpf_prog_5e13354d9cf5018a_prog_after_redirect+0x17/0x3c <4>[ 574.475257] Code: cc cc cc cc cc cc cc 80 00 00 00 cc cc cc cc cc cc cc cc f3 0f 1e fa 0f 1f 44 00 00 66 90 55 48 89 e5 f3 0f 1e fa 48 8b 57 20 <48> 8b 52 00 8b 92 e0 00 00 00 48 bf f8 a6 d5 c4 5d a0 ff ff be 0b <4>[ 574.475263] RSP: 0018:ffffa62440280c98 EFLAGS: 00010206 <4>[ 574.475269] RAX: ffffa62440280cd8 RBX: 0000000000000001 RCX: 0000000000000000 <4>[ 574.475274] RDX: 0000000000000000 RSI: ffffa62440549048 RDI: ffffa62440280ce0 <4>[ 574.475278] RBP: ffffa62440280c98 R08: 0000000000000002 R09: 0000000000000001 <4>[ 574.475281] R10: ffffa05dc8b98000 R11: ffffa05f577fca40 R12: ffffa05dcab24000 <4>[ 574.475285] R13: ffffa62440280ce0 R14: ffffa62440549048 R15: ffffa62440549000 <4>[ 574.475289] FS: 0000000000000000(0000) GS:ffffa05f4f700000(0000) knlGS:0000000000000000 <4>[ 574.475294] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 574.475298] CR2: 0000000000000000 CR3: 000000025522e000 CR4: 0000000000f50ef0 <4>[ 574.475303] PKRU: 55555554 <4>[ 574.475306] Call Trace: <4>[ 574.475313] <4>[ 574.475318] ? __die+0x23/0x70 <4>[ 574.475329] ? page_fault_oops+0x180/0x4c0 <4>[ 574.475339] ? skb_pp_cow_data+0x34c/0x490 <4>[ 574.475346] ? kmem_cache_free+0x257/0x280 <4>[ 574.475357] ? exc_page_fault+0x67/0x150 <4>[ 574.475368] ? asm_exc_page_fault+0x26/0x30 <4>[ 574.475381] ? bpf_prog_5e13354d9cf5018a_prog_after_redirect+0x17/0x3c <4>[ 574.475386] bq_xmit_all+0x158/0x420 <4>[ 574.475397] __dev_flush+0x30/0x90 <4>[ 574.475407] veth_poll+0x216/0x250 [veth] <4>[ 574.475421] __napi_poll+0x28/0x1c0 <4>[ 574.475430] net_rx_action+0x32d/0x3a0 <4>[ 574.475441] handle_softirqs+0xcb/0x2c0 <4>[ 574.475451] do_softirq+0x40/0x60 <4>[ 574.475458] <4>[ 574.475461] <4>[ 574.475464] __local_bh_enable_ip+0x66/0x70 <4>[ 574.475471] __dev_queue_xmit+0x268/0xe40 <4>[ 574.475480] ? selinux_ip_postroute+0x213/0x420 <4>[ 574.475491] ? alloc_skb_with_frags+0x4a/0x1d0 <4>[ 574.475502] ip6_finish_output2+0x2be/0x640 <4>[ 574.475512] ? nf_hook_slow+0x42/0xf0 <4>[ 574.475521] ip6_finish_output+0x194/0x300 <4>[ 574.475529] ? __pfx_ip6_finish_output+0x10/0x10 <4>[ 574.475538] mld_sendpack+0x17c/0x240 <4>[ 574.475548] mld_ifc_work+0x192/0x410 <4>[ 574.475557] process_one_work+0x15d/0x380 <4>[ 574.475566] worker_thread+0x29d/0x3a0 <4>[ 574.475573] ? __pfx_worker_thread+0x10/0x10 <4>[ 574.475580] ? __pfx_worker_thread+0x10/0x10 <4>[ 574.475587] kthread+0xcd/0x100 <4>[ 574.475597] ? __pfx_kthread+0x10/0x10 <4>[ 574.475606] ret_from_fork+0x31/0x50 <4>[ 574.475615] ? __pfx_kthread+0x10/0x10 <4>[ 574.475623] ret_from_fork_asm+0x1a/0x30 <4>[ 574.475635] <4>[ 574.475637] Modules linked in: veth br_netfilter bridge stp llc iwlmvm x86_pkg_temp_thermal iwlwifi efivarfs nvme nvme_core <4>[ 574.475662] CR2: 0000000000000000 <4>[ 574.475668] ---[ end trace 0000000000000000 ]--- Therefore, provide it to the program by setting rxq properly. Fixes: cb261b594b41 ("bpf: Run devmap xdp_prog on flush instead of bulk enqueue") Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Florian Kauer Acked-by: Jakub Kicinski Link: https://lore.kernel.org/r/20240911-devel-koalo-fix-ingress-ifindex-v4-1-5c643ae10258@linutronix.de Signed-off-by: Martin KaFai Lau Signed-off-by: Sasha Levin --- kernel/bpf/devmap.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 69e78dc4bb18..96b0345f76c2 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -322,9 +322,11 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key, static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog, struct xdp_frame **frames, int n, - struct net_device *dev) + struct net_device *tx_dev, + struct net_device *rx_dev) { - struct xdp_txq_info txq = { .dev = dev }; + struct xdp_txq_info txq = { .dev = tx_dev }; + struct xdp_rxq_info rxq = { .dev = rx_dev }; struct xdp_buff xdp; int i, nframes = 0; @@ -335,6 +337,7 @@ static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog, xdp_convert_frame_to_buff(xdpf, &xdp); xdp.txq = &txq; + xdp.rxq = &rxq; act = bpf_prog_run_xdp(xdp_prog, &xdp); switch (act) { @@ -349,7 +352,7 @@ static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog, bpf_warn_invalid_xdp_action(NULL, xdp_prog, act); fallthrough; case XDP_ABORTED: - trace_xdp_exception(dev, xdp_prog, act); + trace_xdp_exception(tx_dev, xdp_prog, act); fallthrough; case XDP_DROP: xdp_return_frame_rx_napi(xdpf); @@ -377,7 +380,7 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags) } if (bq->xdp_prog) { - to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev); + to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev, bq->dev_rx); if (!to_send) goto out; } From 8552150e5d804cebec436b86288dd2235b136156 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Fri, 4 Oct 2024 12:23:04 +0000 Subject: [PATCH 004/207] cpufreq/amd-pstate: Fix amd_pstate mode switch on shared memory systems [ Upstream commit c10e50a469b5ec91eabf653526a22bdce03a9bca ] While switching the driver mode between active and passive, Collaborative Processor Performance Control (CPPC) is disabled in amd_pstate_unregister_driver(). But, it is not enabled back while registering the new driver (passive or active). This leads to the new driver mode not working correctly, so enable it back in amd_pstate_register_driver(). Fixes: 3ca7bc818d8c ("cpufreq: amd-pstate: Add guided mode control support via sysfs") Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20241004122303.94283-1-Dhananjay.Ugwekar@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Sasha Levin --- drivers/cpufreq/amd-pstate.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index f461f99eb040..8c16d67b98bf 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1061,11 +1061,21 @@ static int amd_pstate_register_driver(int mode) return -EINVAL; cppc_state = mode; + + ret = amd_pstate_enable(true); + if (ret) { + pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n", + ret); + amd_pstate_driver_cleanup(); + return ret; + } + ret = cpufreq_register_driver(current_pstate_driver); if (ret) { amd_pstate_driver_cleanup(); return ret; } + return 0; } From 978d1f63fc6e1c6310f8629fe3ba23634b64985e Mon Sep 17 00:00:00 2001 From: Timo Grautstueck Date: Sun, 6 Oct 2024 16:02:44 +0200 Subject: [PATCH 005/207] lib/Kconfig.debug: fix grammar in RUST_BUILD_ASSERT_ALLOW [ Upstream commit ab8851431bef5cc44f0f3f0da112e883fd4d0df5 ] Just a grammar fix in lib/Kconfig.debug, under the config option RUST_BUILD_ASSERT_ALLOW. Reported-by: Miguel Ojeda Closes: https://github.com/Rust-for-Linux/linux/issues/1006 Fixes: ecaa6ddff2fd ("rust: add `build_error` crate") Signed-off-by: Timo Grautstueck Link: https://lore.kernel.org/r/20241006140244.5509-1-timo.grautstueck@web.de Signed-off-by: Miguel Ojeda Signed-off-by: Sasha Levin --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index da5513cfc125..f94c3e957b82 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2999,7 +2999,7 @@ config RUST_BUILD_ASSERT_ALLOW bool "Allow unoptimized build-time assertions" depends on RUST help - Controls how are `build_error!` and `build_assert!` handled during build. + Controls how `build_error!` and `build_assert!` are handled during the build. If calls to them exist in the binary, it may indicate a violated invariant or that the optimizer failed to verify the invariant during compilation. From 90ad4e2fe3a243ff87ca535715ba6953bb2947b6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2024 18:09:58 +0200 Subject: [PATCH 006/207] bpf: Fix memory leak in bpf_core_apply [ Upstream commit 45126b155e3b5201179cdc038504bf93a8ccd921 ] We need to free specs properly. Fixes: 3d2786d65aaa ("bpf: correctly handle malformed BPF_CORE_TYPE_ID_LOCAL relos") Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20241007160958.607434-1-jolsa@kernel.org Signed-off-by: Sasha Levin --- kernel/bpf/btf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index e0e4d4f490e8..c8828016a66f 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -8435,6 +8435,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, if (!type) { bpf_log(ctx->log, "relo #%u: bad type id %u\n", relo_idx, relo->type_id); + kfree(specs); return -EINVAL; } From 73e04a6114e08b5eb10e589e12b680955accb376 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Wed, 18 Sep 2024 20:05:56 -0700 Subject: [PATCH 007/207] RDMA/bnxt_re: Fix a possible memory leak [ Upstream commit 3fc5410f225d1651580a4aeb7c72f55e28673b53 ] In bnxt_re_setup_chip_ctx() when bnxt_qplib_map_db_bar() fails driver is not freeing the memory allocated for "rdev->chip_ctx". Fixes: 0ac20faf5d83 ("RDMA/bnxt_re: Reorg the bar mapping") Link: https://patch.msgid.link/r/1726715161-18941-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Selvin Xavier Signed-off-by: Kalesh AP Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 039801d93ed8..c173d0ffc629 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -176,8 +176,11 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) bnxt_re_set_db_offset(rdev); rc = bnxt_qplib_map_db_bar(&rdev->qplib_res); - if (rc) + if (rc) { + kfree(rdev->chip_ctx); + rdev->chip_ctx = NULL; return rc; + } if (bnxt_qplib_determine_atomics(en_dev->pdev)) ibdev_info(&rdev->ibdev, From 7d10555a340a7432a383b1f8dca5fa7a19aac2bd Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Wed, 18 Sep 2024 20:05:57 -0700 Subject: [PATCH 008/207] RDMA/bnxt_re: Fix incorrect AVID type in WQE structure [ Upstream commit 9ab20f76ae9fad55ebaf36bdff04aea1c2552374 ] Driver uses internal data structure to construct WQE frame. It used avid type as u16 which can accommodate up to 64K AVs. When outstanding AVID crosses 64K, driver truncates AVID and hence it uses incorrect AVID to WR. This leads to WR failure due to invalid AV ID and QP is moved to error state with reason set to 19 (INVALID AVID). When RDMA CM path is used, this issue hits QP1 and it is moved to error state Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Link: https://patch.msgid.link/r/1726715161-18941-3-git-send-email-selvin.xavier@broadcom.com Reviewed-by: Selvin Xavier Reviewed-by: Chandramohan Akula Signed-off-by: Saravanan Vajravel Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index a6f38d8f12ef..56ddff96b508 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -169,7 +169,7 @@ struct bnxt_qplib_swqe { }; u32 q_key; u32 dst_qp; - u16 avid; + u32 avid; } send; /* Send Raw Ethernet and QP1 */ From c71957271f2e8133a6aa82001c2fa671d5008129 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Wed, 18 Sep 2024 20:05:58 -0700 Subject: [PATCH 009/207] RDMA/bnxt_re: Add a check for memory allocation [ Upstream commit c5c1ae73b7741fa3b58e6e001b407825bb971225 ] __alloc_pbl() can return error when memory allocation fails. Driver is not checking the status on one of the instances. Fixes: 0c4dcd602817 ("RDMA/bnxt_re: Refactor hardware queue memory allocation") Link: https://patch.msgid.link/r/1726715161-18941-4-git-send-email-selvin.xavier@broadcom.com Reviewed-by: Selvin Xavier Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/qplib_res.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index ae2bde34e785..47406ab8879c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -244,6 +244,8 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, sginfo.pgsize = npde * pg_size; sginfo.npages = 1; rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], &sginfo); + if (rc) + goto fail; /* Alloc PBL pages */ sginfo.npages = npbl; From b7bf17f4f92fbdd0a028374058de8eaff6b2c88a Mon Sep 17 00:00:00 2001 From: Anumula Murali Mohan Reddy Date: Tue, 8 Oct 2024 17:13:34 +0530 Subject: [PATCH 010/207] RDMA/core: Fix ENODEV error for iWARP test over vlan [ Upstream commit 5069d7e202f640a36cf213a432296c85113a52f7 ] If traffic is over vlan, cma_validate_port() fails to match vlan net_device ifindex with bound_if_index and results in ENODEV error. It is because rdma_copy_src_l2_addr() always assigns bound_if_index with real net_device ifindex. This patch fixes the issue by assigning bound_if_index with vlan net_device index if traffic is over vlan. Fixes: f8ef1be816bf ("RDMA/cma: Avoid GID lookups on iWARP devices") Signed-off-by: Anumula Murali Mohan Reddy Signed-off-by: Potnuri Bharat Teja Link: https://patch.msgid.link/20241008114334.146702-1-anumula@chelsio.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/core/addr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index f253295795f0..fd78d678877c 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -269,6 +269,8 @@ rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in) break; #endif } + if (!ret && dev && is_vlan_dev(dev)) + dev = vlan_dev_real_dev(dev); return ret ? ERR_PTR(ret) : dev; } From 9e3d07e503ea4a5c60ee386bca4a5e6c5a74308a Mon Sep 17 00:00:00 2001 From: Martin Kletzander Date: Tue, 1 Oct 2024 13:43:56 +0200 Subject: [PATCH 011/207] x86/resctrl: Avoid overflow in MB settings in bw_validate() [ Upstream commit 2b5648416e47933939dc310c4ea1e29404f35630 ] The resctrl schemata file supports specifying memory bandwidth associated with the Memory Bandwidth Allocation (MBA) feature via a percentage (this is the default) or bandwidth in MiBps (when resctrl is mounted with the "mba_MBps" option). The allowed range for the bandwidth percentage is from /sys/fs/resctrl/info/MB/min_bandwidth to 100, using a granularity of /sys/fs/resctrl/info/MB/bandwidth_gran. The supported range for the MiBps bandwidth is 0 to U32_MAX. There are two issues with parsing of MiBps memory bandwidth: * The user provided MiBps is mistakenly rounded up to the granularity that is unique to percentage input. * The user provided MiBps is parsed using unsigned long (thus accepting values up to ULONG_MAX), and then assigned to u32 that could result in overflow. Do not round up the MiBps value and parse user provided bandwidth as the u32 it is intended to be. Use the appropriate kstrtou32() that can detect out of range values. Fixes: 8205a078ba78 ("x86/intel_rdt/mba_sc: Add schemata support") Fixes: 6ce1560d35f6 ("x86/resctrl: Switch over to the resctrl mbps_val list") Co-developed-by: Reinette Chatre Signed-off-by: Reinette Chatre Signed-off-by: Martin Kletzander Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Reviewed-by: Tony Luck Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index b44c487727d4..a701e7921ea5 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -27,10 +27,10 @@ * hardware. The allocated bandwidth percentage is rounded to the next * control step available on the hardware. */ -static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) +static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) { - unsigned long bw; int ret; + u32 bw; /* * Only linear delay values is supported for current Intel SKUs. @@ -40,16 +40,21 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) return false; } - ret = kstrtoul(buf, 10, &bw); + ret = kstrtou32(buf, 10, &bw); if (ret) { - rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf); + rdt_last_cmd_printf("Invalid MB value %s\n", buf); return false; } - if ((bw < r->membw.min_bw || bw > r->default_ctrl) && - !is_mba_sc(r)) { - rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw, - r->membw.min_bw, r->default_ctrl); + /* Nothing else to do if software controller is enabled. */ + if (is_mba_sc(r)) { + *data = bw; + return true; + } + + if (bw < r->membw.min_bw || bw > r->default_ctrl) { + rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n", + bw, r->membw.min_bw, r->default_ctrl); return false; } @@ -63,7 +68,7 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, struct resctrl_staged_config *cfg; u32 closid = data->rdtgrp->closid; struct rdt_resource *r = s->res; - unsigned long bw_val; + u32 bw_val; cfg = &d->staged_config[s->conf_type]; if (cfg->have_new_ctrl) { From f4381ccf1ed1938e1d551cc1ea8b526292873230 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Tue, 16 Jul 2024 02:03:11 +0300 Subject: [PATCH 012/207] ARM: dts: bcm2837-rpi-cm3-io3: Fix HDMI hpd-gpio pin [ Upstream commit dc7785e4723510616d776862ddb4c08857a1bdb2 ] HDMI_HPD_N_1V8 is connected to GPIO pin 0, not 1. This fixes HDMI hotplug/output detection. See https://datasheets.raspberrypi.com/cm/cm3-schematics.pdf Signed-off-by: Florian Klink Reviewed-by: Stefan Wahren Link: https://lore.kernel.org/r/20240715230311.685641-1-flokli@flokli.de Reviewed-by: Stefan Wahren Fixes: a54fe8a6cf66 ("ARM: dts: add Raspberry Pi Compute Module 3 and IO board") Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm/boot/dts/broadcom/bcm2837-rpi-cm3-io3.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/broadcom/bcm2837-rpi-cm3-io3.dts b/arch/arm/boot/dts/broadcom/bcm2837-rpi-cm3-io3.dts index 72d26d130efa..85f54fa595aa 100644 --- a/arch/arm/boot/dts/broadcom/bcm2837-rpi-cm3-io3.dts +++ b/arch/arm/boot/dts/broadcom/bcm2837-rpi-cm3-io3.dts @@ -77,7 +77,7 @@ }; &hdmi { - hpd-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>; + hpd-gpios = <&expgpio 0 GPIO_ACTIVE_LOW>; power-domains = <&power RPI_POWER_DOMAIN_HDMI>; status = "okay"; }; From 99bf10e92a3f9faf75275a9c6382bd14680290fb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Sep 2023 23:31:39 +0200 Subject: [PATCH 013/207] bpf: Add missed value to kprobe perf link info [ Upstream commit 3acf8ace68230e9558cf916847f1cc9f208abdf1 ] Add missed value to kprobe attached through perf link info to hold the stats of missed kprobe handler execution. The kprobe's missed counter gets incremented when kprobe handler is not executed due to another kprobe running on the same cpu. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230920213145.1941596-4-jolsa@kernel.org Stable-dep-of: 4deecdd29cf2 ("bpf: fix unpopulated name_len field in perf_event link info") Signed-off-by: Sasha Levin --- include/linux/trace_events.h | 6 ++++-- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 14 ++++++++------ kernel/trace/bpf_trace.c | 5 +++-- kernel/trace/trace_kprobe.c | 14 +++++++++++--- tools/include/uapi/linux/bpf.h | 1 + 6 files changed, 28 insertions(+), 13 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index cb8bd759e800..9d799777c333 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -765,7 +765,8 @@ struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name); void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, - u64 *probe_offset, u64 *probe_addr); + u64 *probe_offset, u64 *probe_addr, + unsigned long *missed); int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); #else @@ -805,7 +806,7 @@ static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) static inline int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, - u64 *probe_addr) + u64 *probe_addr, unsigned long *missed) { return -EOPNOTSUPP; } @@ -880,6 +881,7 @@ extern void perf_kprobe_destroy(struct perf_event *event); extern int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, u64 *probe_addr, + unsigned long *missed, bool perf_type_tracepoint); #endif #ifdef CONFIG_UPROBE_EVENTS diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4bb38409b26a..6ea588d1ae14 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6562,6 +6562,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from func_name */ __u64 addr; + __u64 missed; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b1933d074f05..9c76f21f187f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3444,7 +3444,7 @@ static void bpf_perf_link_dealloc(struct bpf_link *link) static int bpf_perf_link_fill_common(const struct perf_event *event, char __user *uname, u32 ulen, u64 *probe_offset, u64 *probe_addr, - u32 *fd_type) + u32 *fd_type, unsigned long *missed) { const char *buf; u32 prog_id; @@ -3455,7 +3455,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event, return -EINVAL; err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf, - probe_offset, probe_addr); + probe_offset, probe_addr, missed); if (err) return err; if (!uname) @@ -3478,6 +3478,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event, static int bpf_perf_link_fill_kprobe(const struct perf_event *event, struct bpf_link_info *info) { + unsigned long missed; char __user *uname; u64 addr, offset; u32 ulen, type; @@ -3486,7 +3487,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.kprobe.func_name); ulen = info->perf_event.kprobe.name_len; err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr, - &type); + &type, &missed); if (err) return err; if (type == BPF_FD_TYPE_KRETPROBE) @@ -3495,6 +3496,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, info->perf_event.type = BPF_PERF_EVENT_KPROBE; info->perf_event.kprobe.offset = offset; + info->perf_event.kprobe.missed = missed; if (!kallsyms_show_value(current_cred())) addr = 0; info->perf_event.kprobe.addr = addr; @@ -3514,7 +3516,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.uprobe.file_name); ulen = info->perf_event.uprobe.name_len; err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr, - &type); + &type, NULL); if (err) return err; @@ -3550,7 +3552,7 @@ static int bpf_perf_link_fill_tracepoint(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name); ulen = info->perf_event.tracepoint.name_len; info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT; - return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL); + return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL, NULL); } static int bpf_perf_link_fill_perf_event(const struct perf_event *event, @@ -4897,7 +4899,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr, err = bpf_get_perf_event_info(event, &prog_id, &fd_type, &buf, &probe_offset, - &probe_addr); + &probe_addr, NULL); if (!err) err = bpf_task_fd_query_copy(attr, uattr, prog_id, fd_type, buf, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index eca858bde804..bbdc4199748b 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2389,7 +2389,8 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, - u64 *probe_offset, u64 *probe_addr) + u64 *probe_offset, u64 *probe_addr, + unsigned long *missed) { bool is_tracepoint, is_syscall_tp; struct bpf_prog *prog; @@ -2424,7 +2425,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, #ifdef CONFIG_KPROBE_EVENTS if (flags & TRACE_EVENT_FL_KPROBE) err = bpf_get_kprobe_info(event, fd_type, buf, - probe_offset, probe_addr, + probe_offset, probe_addr, missed, event->attr.type == PERF_TYPE_TRACEPOINT); #endif #ifdef CONFIG_UPROBE_EVENTS diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 47812aa16bb5..52f8b537dd0a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1249,6 +1249,12 @@ static const struct file_operations kprobe_events_ops = { .write = probes_write, }; +static unsigned long trace_kprobe_missed(struct trace_kprobe *tk) +{ + return trace_kprobe_is_return(tk) ? + tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; +} + /* Probes profiling interfaces */ static int probes_profile_seq_show(struct seq_file *m, void *v) { @@ -1260,8 +1266,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) return 0; tk = to_trace_kprobe(ev); - nmissed = trace_kprobe_is_return(tk) ? - tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; + nmissed = trace_kprobe_missed(tk); seq_printf(m, " %-44s %15lu %15lu\n", trace_probe_name(&tk->tp), trace_kprobe_nhit(tk), @@ -1607,7 +1612,8 @@ NOKPROBE_SYMBOL(kretprobe_perf_func); int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, - u64 *probe_addr, bool perf_type_tracepoint) + u64 *probe_addr, unsigned long *missed, + bool perf_type_tracepoint) { const char *pevent = trace_event_name(event->tp_event); const char *group = event->tp_event->class->system; @@ -1626,6 +1632,8 @@ int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, *probe_addr = kallsyms_show_value(current_cred()) ? (unsigned long)tk->rp.kp.addr : 0; *symbol = tk->symbol; + if (missed) + *missed = trace_kprobe_missed(tk); return 0; } #endif /* CONFIG_PERF_EVENTS */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ba6e346c8d66..acda713f8b4d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6565,6 +6565,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from func_name */ __u64 addr; + __u64 missed; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ From cfd63c3a45388d628b3cdd57d79b03f004cdf4d8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 19 Jan 2024 12:04:58 +0100 Subject: [PATCH 014/207] bpf: Add cookie to perf_event bpf_link_info records [ Upstream commit d5c16492c66fbfca85f36e42363d32212df5927b ] At the moment we don't store cookie for perf_event probes, while we do that for the rest of the probes. Adding cookie fields to struct bpf_link_info perf event probe records: perf_event.uprobe perf_event.kprobe perf_event.tracepoint perf_event.perf_event And the code to store that in bpf_link_info struct. Signed-off-by: Jiri Olsa Acked-by: Song Liu Acked-by: Yafang Shao Link: https://lore.kernel.org/r/20240119110505.400573-2-jolsa@kernel.org Signed-off-by: Alexei Starovoitov Stable-dep-of: 4deecdd29cf2 ("bpf: fix unpopulated name_len field in perf_event link info") Signed-off-by: Sasha Levin --- include/uapi/linux/bpf.h | 6 ++++++ kernel/bpf/syscall.c | 4 ++++ tools/include/uapi/linux/bpf.h | 6 ++++++ 3 files changed, 16 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6ea588d1ae14..431bc700bcfb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6556,6 +6556,7 @@ struct bpf_link_info { __aligned_u64 file_name; /* in/out */ __u32 name_len; __u32 offset; /* offset from file_name */ + __u64 cookie; } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ struct { __aligned_u64 func_name; /* in/out */ @@ -6563,14 +6564,19 @@ struct bpf_link_info { __u32 offset; /* offset from func_name */ __u64 addr; __u64 missed; + __u64 cookie; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ __u32 name_len; + __u32 :32; + __u64 cookie; } tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */ struct { __u64 config; __u32 type; + __u32 :32; + __u64 cookie; } event; /* BPF_PERF_EVENT_EVENT */ }; } perf_event; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 9c76f21f187f..1d04d098f57d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3500,6 +3500,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, if (!kallsyms_show_value(current_cred())) addr = 0; info->perf_event.kprobe.addr = addr; + info->perf_event.kprobe.cookie = event->bpf_cookie; return 0; } #endif @@ -3525,6 +3526,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event, else info->perf_event.type = BPF_PERF_EVENT_UPROBE; info->perf_event.uprobe.offset = offset; + info->perf_event.uprobe.cookie = event->bpf_cookie; return 0; } #endif @@ -3552,6 +3554,7 @@ static int bpf_perf_link_fill_tracepoint(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name); ulen = info->perf_event.tracepoint.name_len; info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT; + info->perf_event.tracepoint.cookie = event->bpf_cookie; return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL, NULL); } @@ -3560,6 +3563,7 @@ static int bpf_perf_link_fill_perf_event(const struct perf_event *event, { info->perf_event.event.type = event->attr.type; info->perf_event.event.config = event->attr.config; + info->perf_event.event.cookie = event->bpf_cookie; info->perf_event.type = BPF_PERF_EVENT_EVENT; return 0; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index acda713f8b4d..977ec094bc2a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6559,6 +6559,7 @@ struct bpf_link_info { __aligned_u64 file_name; /* in/out */ __u32 name_len; __u32 offset; /* offset from file_name */ + __u64 cookie; } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ struct { __aligned_u64 func_name; /* in/out */ @@ -6566,14 +6567,19 @@ struct bpf_link_info { __u32 offset; /* offset from func_name */ __u64 addr; __u64 missed; + __u64 cookie; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ __u32 name_len; + __u32 :32; + __u64 cookie; } tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */ struct { __u64 config; __u32 type; + __u32 :32; + __u64 cookie; } event; /* BPF_PERF_EVENT_EVENT */ }; } perf_event; From e5c2b971db09b0669772c13479f9fbc1c58ead68 Mon Sep 17 00:00:00 2001 From: Tyrone Wu Date: Tue, 8 Oct 2024 16:43:11 +0000 Subject: [PATCH 015/207] bpf: fix unpopulated name_len field in perf_event link info [ Upstream commit 4deecdd29cf29844c7bd164d72dc38d2e672f64e ] Previously when retrieving `bpf_link_info.perf_event` for kprobe/uprobe/tracepoint, the `name_len` field was not populated by the kernel, leaving it to reflect the value initially set by the user. This behavior was inconsistent with how other input/output string buffer fields function (e.g. `raw_tracepoint.tp_name_len`). This patch fills `name_len` with the actual size of the string name. Fixes: 1b715e1b0ec5 ("bpf: Support ->fill_link_info for perf_event") Signed-off-by: Tyrone Wu Acked-by: Jiri Olsa Acked-by: Yafang Shao Link: https://lore.kernel.org/r/20241008164312.46269-1-wudevelops@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/syscall.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1d04d098f57d..b43302c80cac 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3442,15 +3442,16 @@ static void bpf_perf_link_dealloc(struct bpf_link *link) } static int bpf_perf_link_fill_common(const struct perf_event *event, - char __user *uname, u32 ulen, + char __user *uname, u32 *ulenp, u64 *probe_offset, u64 *probe_addr, u32 *fd_type, unsigned long *missed) { const char *buf; - u32 prog_id; + u32 prog_id, ulen; size_t len; int err; + ulen = *ulenp; if (!ulen ^ !uname) return -EINVAL; @@ -3458,10 +3459,17 @@ static int bpf_perf_link_fill_common(const struct perf_event *event, probe_offset, probe_addr, missed); if (err) return err; - if (!uname) - return 0; + if (buf) { len = strlen(buf); + *ulenp = len + 1; + } else { + *ulenp = 1; + } + if (!uname) + return 0; + + if (buf) { err = bpf_copy_to_user(uname, buf, ulen, len); if (err) return err; @@ -3486,7 +3494,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.kprobe.func_name); ulen = info->perf_event.kprobe.name_len; - err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr, + err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr, &type, &missed); if (err) return err; @@ -3494,7 +3502,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, info->perf_event.type = BPF_PERF_EVENT_KRETPROBE; else info->perf_event.type = BPF_PERF_EVENT_KPROBE; - + info->perf_event.kprobe.name_len = ulen; info->perf_event.kprobe.offset = offset; info->perf_event.kprobe.missed = missed; if (!kallsyms_show_value(current_cred())) @@ -3516,7 +3524,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.uprobe.file_name); ulen = info->perf_event.uprobe.name_len; - err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr, + err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr, &type, NULL); if (err) return err; @@ -3525,6 +3533,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event, info->perf_event.type = BPF_PERF_EVENT_URETPROBE; else info->perf_event.type = BPF_PERF_EVENT_UPROBE; + info->perf_event.uprobe.name_len = ulen; info->perf_event.uprobe.offset = offset; info->perf_event.uprobe.cookie = event->bpf_cookie; return 0; @@ -3550,12 +3559,18 @@ static int bpf_perf_link_fill_tracepoint(const struct perf_event *event, { char __user *uname; u32 ulen; + int err; uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name); ulen = info->perf_event.tracepoint.name_len; + err = bpf_perf_link_fill_common(event, uname, &ulen, NULL, NULL, NULL, NULL); + if (err) + return err; + info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT; + info->perf_event.tracepoint.name_len = ulen; info->perf_event.tracepoint.cookie = event->bpf_cookie; - return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL, NULL); + return 0; } static int bpf_perf_link_fill_perf_event(const struct perf_event *event, From e909258d41b2da5070d3e3de45b16fe576386285 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 25 Nov 2023 20:31:28 +0100 Subject: [PATCH 016/207] selftests/bpf: Use bpf_link__destroy in fill_link_info tests [ Upstream commit 1703612885723869064f18e8816c6f3f87987748 ] The fill_link_info test keeps skeleton open and just creates various links. We are wrongly calling bpf_link__detach after each test to close them, we need to call bpf_link__destroy. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Yafang Shao Link: https://lore.kernel.org/bpf/20231125193130.834322-5-jolsa@kernel.org Stable-dep-of: 4538a38f654a ("selftests/bpf: fix perf_event link info name_len assertion") Signed-off-by: Sasha Levin --- .../selftests/bpf/prog_tests/fill_link_info.c | 44 ++++++++++--------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index 9d768e083714..7db7f9bd9d58 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -140,14 +140,14 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, .retprobe = type == BPF_PERF_EVENT_KRETPROBE, ); ssize_t entry_offset = 0; + struct bpf_link *link; int link_fd, err; - skel->links.kprobe_run = bpf_program__attach_kprobe_opts(skel->progs.kprobe_run, - KPROBE_FUNC, &opts); - if (!ASSERT_OK_PTR(skel->links.kprobe_run, "attach_kprobe")) + link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_run, KPROBE_FUNC, &opts); + if (!ASSERT_OK_PTR(link, "attach_kprobe")) return; - link_fd = bpf_link__fd(skel->links.kprobe_run); + link_fd = bpf_link__fd(link); if (!invalid) { /* See also arch_adjust_kprobe_addr(). */ if (skel->kconfig->CONFIG_X86_KERNEL_IBT) @@ -157,39 +157,41 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, } else { kprobe_fill_invalid_user_buffer(link_fd); } - bpf_link__detach(skel->links.kprobe_run); + bpf_link__destroy(link); } static void test_tp_fill_link_info(struct test_fill_link_info *skel) { + struct bpf_link *link; int link_fd, err; - skel->links.tp_run = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME); - if (!ASSERT_OK_PTR(skel->links.tp_run, "attach_tp")) + link = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME); + if (!ASSERT_OK_PTR(link, "attach_tp")) return; - link_fd = bpf_link__fd(skel->links.tp_run); + link_fd = bpf_link__fd(link); err = verify_perf_link_info(link_fd, BPF_PERF_EVENT_TRACEPOINT, 0, 0, 0); ASSERT_OK(err, "verify_perf_link_info"); - bpf_link__detach(skel->links.tp_run); + bpf_link__destroy(link); } static void test_uprobe_fill_link_info(struct test_fill_link_info *skel, enum bpf_perf_event_type type) { + struct bpf_link *link; int link_fd, err; - skel->links.uprobe_run = bpf_program__attach_uprobe(skel->progs.uprobe_run, - type == BPF_PERF_EVENT_URETPROBE, - 0, /* self pid */ - UPROBE_FILE, uprobe_offset); - if (!ASSERT_OK_PTR(skel->links.uprobe_run, "attach_uprobe")) + link = bpf_program__attach_uprobe(skel->progs.uprobe_run, + type == BPF_PERF_EVENT_URETPROBE, + 0, /* self pid */ + UPROBE_FILE, uprobe_offset); + if (!ASSERT_OK_PTR(link, "attach_uprobe")) return; - link_fd = bpf_link__fd(skel->links.uprobe_run); + link_fd = bpf_link__fd(link); err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, 0); ASSERT_OK(err, "verify_perf_link_info"); - bpf_link__detach(skel->links.uprobe_run); + bpf_link__destroy(link); } static int verify_kmulti_link_info(int fd, bool retprobe) @@ -278,24 +280,24 @@ static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel, bool retprobe, bool invalid) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + struct bpf_link *link; int link_fd, err; opts.syms = kmulti_syms; opts.cnt = KMULTI_CNT; opts.retprobe = retprobe; - skel->links.kmulti_run = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, - NULL, &opts); - if (!ASSERT_OK_PTR(skel->links.kmulti_run, "attach_kprobe_multi")) + link = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, NULL, &opts); + if (!ASSERT_OK_PTR(link, "attach_kprobe_multi")) return; - link_fd = bpf_link__fd(skel->links.kmulti_run); + link_fd = bpf_link__fd(link); if (!invalid) { err = verify_kmulti_link_info(link_fd, retprobe); ASSERT_OK(err, "verify_kmulti_link_info"); } else { verify_kmulti_invalid_user_buffer(link_fd); } - bpf_link__detach(skel->links.kmulti_run); + bpf_link__destroy(link); } void test_fill_link_info(void) From 0ce20181976d496176e6f5576ec87081a6a472c8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 19 Jan 2024 12:05:02 +0100 Subject: [PATCH 017/207] selftests/bpf: Add cookies check for perf_event fill_link_info test [ Upstream commit d74179708473c649c653f1db280e29875a532e99 ] Now that we get cookies for perf_event probes, adding tests for cookie for kprobe/uprobe/tracepoint. The perf_event test needs to be added completely and is coming in following change. Signed-off-by: Jiri Olsa Acked-by: Song Liu Link: https://lore.kernel.org/r/20240119110505.400573-6-jolsa@kernel.org Signed-off-by: Alexei Starovoitov Stable-dep-of: 4538a38f654a ("selftests/bpf: fix perf_event link info name_len assertion") Signed-off-by: Sasha Levin --- .../selftests/bpf/prog_tests/fill_link_info.c | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index 7db7f9bd9d58..9eb93258614f 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -30,6 +30,8 @@ static noinline void uprobe_func(void) asm volatile (""); } +#define PERF_EVENT_COOKIE 0xdeadbeef + static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr, ssize_t offset, ssize_t entry_offset) { @@ -61,6 +63,8 @@ again: ASSERT_EQ(info.perf_event.kprobe.addr, addr + entry_offset, "kprobe_addr"); + ASSERT_EQ(info.perf_event.kprobe.cookie, PERF_EVENT_COOKIE, "kprobe_cookie"); + if (!info.perf_event.kprobe.func_name) { ASSERT_EQ(info.perf_event.kprobe.name_len, 0, "name_len"); info.perf_event.kprobe.func_name = ptr_to_u64(&buf); @@ -80,6 +84,8 @@ again: goto again; } + ASSERT_EQ(info.perf_event.tracepoint.cookie, PERF_EVENT_COOKIE, "tracepoint_cookie"); + err = strncmp(u64_to_ptr(info.perf_event.tracepoint.tp_name), TP_NAME, strlen(TP_NAME)); ASSERT_EQ(err, 0, "cmp_tp_name"); @@ -95,6 +101,8 @@ again: goto again; } + ASSERT_EQ(info.perf_event.uprobe.cookie, PERF_EVENT_COOKIE, "uprobe_cookie"); + err = strncmp(u64_to_ptr(info.perf_event.uprobe.file_name), UPROBE_FILE, strlen(UPROBE_FILE)); ASSERT_EQ(err, 0, "cmp_file_name"); @@ -138,6 +146,7 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, .attach_mode = PROBE_ATTACH_MODE_LINK, .retprobe = type == BPF_PERF_EVENT_KRETPROBE, + .bpf_cookie = PERF_EVENT_COOKIE, ); ssize_t entry_offset = 0; struct bpf_link *link; @@ -162,10 +171,13 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, static void test_tp_fill_link_info(struct test_fill_link_info *skel) { + DECLARE_LIBBPF_OPTS(bpf_tracepoint_opts, opts, + .bpf_cookie = PERF_EVENT_COOKIE, + ); struct bpf_link *link; int link_fd, err; - link = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME); + link = bpf_program__attach_tracepoint_opts(skel->progs.tp_run, TP_CAT, TP_NAME, &opts); if (!ASSERT_OK_PTR(link, "attach_tp")) return; @@ -178,13 +190,17 @@ static void test_tp_fill_link_info(struct test_fill_link_info *skel) static void test_uprobe_fill_link_info(struct test_fill_link_info *skel, enum bpf_perf_event_type type) { + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, + .retprobe = type == BPF_PERF_EVENT_URETPROBE, + .bpf_cookie = PERF_EVENT_COOKIE, + ); struct bpf_link *link; int link_fd, err; - link = bpf_program__attach_uprobe(skel->progs.uprobe_run, - type == BPF_PERF_EVENT_URETPROBE, - 0, /* self pid */ - UPROBE_FILE, uprobe_offset); + link = bpf_program__attach_uprobe_opts(skel->progs.uprobe_run, + 0, /* self pid */ + UPROBE_FILE, uprobe_offset, + &opts); if (!ASSERT_OK_PTR(link, "attach_uprobe")) return; From 6617ae800cbb210d93970ce4102f4c1b4ab34103 Mon Sep 17 00:00:00 2001 From: Tyrone Wu Date: Tue, 8 Oct 2024 16:43:12 +0000 Subject: [PATCH 018/207] selftests/bpf: fix perf_event link info name_len assertion [ Upstream commit 4538a38f654a1c292fe489a9b66179262bfed088 ] Fix `name_len` field assertions in `bpf_link_info.perf_event` for kprobe/uprobe/tracepoint to validate correct name size instead of 0. Fixes: 23cf7aa539dc ("selftests/bpf: Add selftest for fill_link_info") Signed-off-by: Tyrone Wu Acked-by: Jiri Olsa Acked-by: Yafang Shao Link: https://lore.kernel.org/r/20241008164312.46269-2-wudevelops@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/fill_link_info.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index 9eb93258614f..5b0c6a04cdbf 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -65,8 +65,9 @@ again: ASSERT_EQ(info.perf_event.kprobe.cookie, PERF_EVENT_COOKIE, "kprobe_cookie"); + ASSERT_EQ(info.perf_event.kprobe.name_len, strlen(KPROBE_FUNC) + 1, + "name_len"); if (!info.perf_event.kprobe.func_name) { - ASSERT_EQ(info.perf_event.kprobe.name_len, 0, "name_len"); info.perf_event.kprobe.func_name = ptr_to_u64(&buf); info.perf_event.kprobe.name_len = sizeof(buf); goto again; @@ -77,8 +78,9 @@ again: ASSERT_EQ(err, 0, "cmp_kprobe_func_name"); break; case BPF_PERF_EVENT_TRACEPOINT: + ASSERT_EQ(info.perf_event.tracepoint.name_len, strlen(TP_NAME) + 1, + "name_len"); if (!info.perf_event.tracepoint.tp_name) { - ASSERT_EQ(info.perf_event.tracepoint.name_len, 0, "name_len"); info.perf_event.tracepoint.tp_name = ptr_to_u64(&buf); info.perf_event.tracepoint.name_len = sizeof(buf); goto again; @@ -94,8 +96,9 @@ again: case BPF_PERF_EVENT_URETPROBE: ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset"); + ASSERT_EQ(info.perf_event.uprobe.name_len, strlen(UPROBE_FILE) + 1, + "name_len"); if (!info.perf_event.uprobe.file_name) { - ASSERT_EQ(info.perf_event.uprobe.name_len, 0, "name_len"); info.perf_event.uprobe.file_name = ptr_to_u64(&buf); info.perf_event.uprobe.name_len = sizeof(buf); goto again; From b56328660be084d200e946a4141e4321762bb89f Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 11 Apr 2024 14:01:39 +0200 Subject: [PATCH 019/207] s390/pci: Handle PCI error codes other than 0x3a [ Upstream commit 3cd03ea57e8e16cc78cc357d5e9f26078426f236 ] The Linux implementation of PCI error recovery for s390 was based on the understanding that firmware error recovery is a two step process with an optional initial error event to indicate the cause of the error if known followed by either error event 0x3A (Success) or 0x3B (Failure) to indicate whether firmware was able to recover. While this has been the case in testing and the error cases seen in the wild it turns out this is not correct. Instead firmware only generates 0x3A for some error and service scenarios and expects the OS to perform recovery for all PCI events codes except for those indicating permanent error (0x3B, 0x40) and those indicating errors on the function measurement block (0x2A, 0x2B, 0x2C). Align Linux behavior with these expectations. Fixes: 4cdf2f4e24ff ("s390/pci: implement minimal PCI error recovery") Reviewed-by: Gerd Bayer Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/pci/pci_event.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index b9324ca2eb94..b3961f1016ea 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -272,18 +272,19 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) goto no_pdev; switch (ccdf->pec) { - case 0x003a: /* Service Action or Error Recovery Successful */ + case 0x002a: /* Error event concerns FMB */ + case 0x002b: + case 0x002c: + break; + case 0x0040: /* Service Action or Error Recovery Failed */ + case 0x003b: + zpci_event_io_failure(pdev, pci_channel_io_perm_failure); + break; + default: /* PCI function left in the error state attempt to recover */ ers_res = zpci_event_attempt_error_recovery(pdev); if (ers_res != PCI_ERS_RESULT_RECOVERED) zpci_event_io_failure(pdev, pci_channel_io_perm_failure); break; - default: - /* - * Mark as frozen not permanently failed because the device - * could be subsequently recovered by the platform. - */ - zpci_event_io_failure(pdev, pci_channel_io_frozen); - break; } pci_dev_put(pdev); no_pdev: From 3e212996d21f688f60bf8f6120a7816fcbf20104 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 10 Oct 2024 15:27:07 +0200 Subject: [PATCH 020/207] bpf: fix kfunc btf caching for modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6cb86a0fdece87e126323ec1bb19deb16a52aedf ] The verifier contains a cache for looking up module BTF objects when calling kfuncs defined in modules. This cache uses a 'struct bpf_kfunc_btf_tab', which contains a sorted list of BTF objects that were already seen in the current verifier run, and the BTF objects are looked up by the offset stored in the relocated call instruction using bsearch(). The first time a given offset is seen, the module BTF is loaded from the file descriptor passed in by libbpf, and stored into the cache. However, there's a bug in the code storing the new entry: it stores a pointer to the new cache entry, then calls sort() to keep the cache sorted for the next lookup using bsearch(), and then returns the entry that was just stored through the stored pointer. However, because sort() modifies the list of entries in place *by value*, the stored pointer may no longer point to the right entry, in which case the wrong BTF object will be returned. The end result of this is an intermittent bug where, if a BPF program calls two functions with the same signature in two different modules, the function from the wrong module may sometimes end up being called. Whether this happens depends on the order of the calls in the BPF program (as that affects whether sort() reorders the array of BTF objects), making it especially hard to track down. Simon, credited as reporter below, spent significant effort analysing and creating a reproducer for this issue. The reproducer is added as a selftest in a subsequent patch. The fix is straight forward: simply don't use the stored pointer after calling sort(). Since we already have an on-stack pointer to the BTF object itself at the point where the function return, just use that, and populate it from the cache entry in the branch where the lookup succeeds. Fixes: 2357672c54c3 ("bpf: Introduce BPF support for kernel module function calls") Reported-by: Simon Sundberg Acked-by: Jiri Olsa Acked-by: Kumar Kartikeya Dwivedi Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20241010-fix-kfunc-btf-caching-for-modules-v2-1-745af6c1af98@redhat.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3032a464d31b..d1050479cbb3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2799,10 +2799,16 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, b->module = mod; b->offset = offset; + /* sort() reorders entries by value, so b may no longer point + * to the right entry after this + */ sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), kfunc_btf_cmp_by_off, NULL); + } else { + btf = b->btf; } - return b->btf; + + return btf; } void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab) From 1b3090ca3265cf3d53c63c5039ea877ee41cd6a2 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Mon, 7 Oct 2024 22:06:38 +0200 Subject: [PATCH 021/207] iio: frequency: {admv4420,adrf6780}: format Kconfig entries [ Upstream commit 5c9644a683e1690387a476a4f5f6bd5cf9a1d695 ] Format the entries of these drivers in the Kconfig, where spaces instead of tabs were used. Signed-off-by: Javier Carrasco Link: https://patch.msgid.link/20241007-ad2s1210-select-v2-1-7345d228040f@gmail.com Signed-off-by: Jonathan Cameron Stable-dep-of: 6b8e9dbfaed4 ("iio: frequency: admv4420: fix missing select REMAP_SPI in Kconfig") Signed-off-by: Sasha Levin --- drivers/iio/frequency/Kconfig | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/iio/frequency/Kconfig b/drivers/iio/frequency/Kconfig index 71de6cc4a158..f7534dd8a8ca 100644 --- a/drivers/iio/frequency/Kconfig +++ b/drivers/iio/frequency/Kconfig @@ -82,25 +82,25 @@ config ADMV1014 module will be called admv1014. config ADMV4420 - tristate "Analog Devices ADMV4420 K Band Downconverter" - depends on SPI - help - Say yes here to build support for Analog Devices K Band - Downconverter with integrated Fractional-N PLL and VCO. + tristate "Analog Devices ADMV4420 K Band Downconverter" + depends on SPI + help + Say yes here to build support for Analog Devices K Band + Downconverter with integrated Fractional-N PLL and VCO. - To compile this driver as a module, choose M here: the - module will be called admv4420. + To compile this driver as a module, choose M here: the + module will be called admv4420. config ADRF6780 - tristate "Analog Devices ADRF6780 Microwave Upconverter" - depends on SPI - depends on COMMON_CLK - help - Say yes here to build support for Analog Devices ADRF6780 - 5.9 GHz to 23.6 GHz, Wideband, Microwave Upconverter. + tristate "Analog Devices ADRF6780 Microwave Upconverter" + depends on SPI + depends on COMMON_CLK + help + Say yes here to build support for Analog Devices ADRF6780 + 5.9 GHz to 23.6 GHz, Wideband, Microwave Upconverter. - To compile this driver as a module, choose M here: the - module will be called adrf6780. + To compile this driver as a module, choose M here: the + module will be called adrf6780. endmenu endmenu From 119249d146d5529ed58b42a7adcc38896e8b6bf0 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Mon, 7 Oct 2024 22:06:39 +0200 Subject: [PATCH 022/207] iio: frequency: admv4420: fix missing select REMAP_SPI in Kconfig [ Upstream commit 6b8e9dbfaed471627f7b863633b9937717df1d4d ] This driver makes use of regmap_spi, but does not select the required module. Add the missing 'select REGMAP_SPI'. Fixes: b59c04155901 ("iio: frequency: admv4420.c: Add support for ADMV4420") Signed-off-by: Javier Carrasco Link: https://patch.msgid.link/20241007-ad2s1210-select-v2-2-7345d228040f@gmail.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/frequency/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/frequency/Kconfig b/drivers/iio/frequency/Kconfig index f7534dd8a8ca..036763d3e84c 100644 --- a/drivers/iio/frequency/Kconfig +++ b/drivers/iio/frequency/Kconfig @@ -84,6 +84,7 @@ config ADMV1014 config ADMV4420 tristate "Analog Devices ADMV4420 K Band Downconverter" depends on SPI + select REGMAP_SPI help Say yes here to build support for Analog Devices K Band Downconverter with integrated Fractional-N PLL and VCO. From 923c5ded505c28b07161573000c05ae9674a5de3 Mon Sep 17 00:00:00 2001 From: Ian Forbes Date: Fri, 9 Aug 2024 13:37:56 -0500 Subject: [PATCH 023/207] drm/vmwgfx: Handle possible ENOMEM in vmw_stdu_connector_atomic_check [ Upstream commit 4809a017a2bc42ff239d53ade4b2e70f2fe81348 ] Handle unlikely ENOMEN condition and other errors in vmw_stdu_connector_atomic_check. Signed-off-by: Ian Forbes Reported-by: Dan Carpenter Fixes: 75c3e8a26a35 ("drm/vmwgfx: Trigger a modeset when the screen moves") Reviewed-by: Zack Rusin Reviewed-by: Martin Krastev Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20240809183756.27283-1-ian.forbes@broadcom.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index b22ae25db4e1..52ea0c50c852 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -881,6 +881,10 @@ static int vmw_stdu_connector_atomic_check(struct drm_connector *conn, struct drm_crtc_state *new_crtc_state; conn_state = drm_atomic_get_connector_state(state, conn); + + if (IS_ERR(conn_state)) + return PTR_ERR(conn_state); + du = vmw_connector_to_stdu(conn); if (!conn_state->crtc) From a3fe89d4f5bb12791e4407542385b39bde55fc39 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Tue, 8 Oct 2024 21:07:20 -0700 Subject: [PATCH 024/207] selftests/bpf: Fix cross-compiling urandom_read [ Upstream commit fd526e121c4d6f71aed82d21a8b8277b03e60b43 ] Linking of urandom_read and liburandom_read.so prefers LLVM's 'ld.lld' but falls back to using 'ld' if unsupported. However, this fallback discards any existing makefile macro for LD and can break cross-compilation. Fix by changing the fallback to use the target linker $(LD), passed via '-fuse-ld=' using an absolute path rather than a linker "flavour". Fixes: 08c79c9cd67f ("selftests/bpf: Don't force lld on non-x86 architectures") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241009040720.635260-1-tony.ambardar@gmail.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index ab364e95a9b2..f5a3a84fac95 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -200,7 +200,7 @@ $(OUTPUT)/%:%.c ifeq ($(SRCARCH),x86) LLD := lld else -LLD := ld +LLD := $(shell command -v $(LD)) endif # Filter out -static for liburandom_read.so and its dependent targets so that static builds From 380681a29066c1f5402053f62862931f1cf6b305 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 4 Jul 2024 19:03:36 +0200 Subject: [PATCH 025/207] task_work: Add TWA_NMI_CURRENT as an additional notify mode. [ Upstream commit 466e4d801cd438a1ab2c8a2cce1bef6b65c31bbb ] Adding task_work from NMI context requires the following: - The kasan_record_aux_stack() is not NMU safe and must be avoided. - Using TWA_RESUME is NMI safe. If the NMI occurs while the CPU is in userland then it will continue in userland and not invoke the `work' callback. Add TWA_NMI_CURRENT as an additional notify mode. In this mode skip kasan and use irq_work in hardirq-mode to for needed interrupt. Set TIF_NOTIFY_RESUME within the irq_work callback due to k[ac]san instrumentation in test_and_set_bit() which does not look NMI safe in case of a report. Suggested-by: Peter Zijlstra Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240704170424.1466941-3-bigeasy@linutronix.de Stable-dep-of: 73ab05aa46b0 ("sched/core: Disable page allocation in task_tick_mm_cid()") Signed-off-by: Sasha Levin --- include/linux/task_work.h | 1 + kernel/task_work.c | 24 +++++++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 26b8a47f41fc..cf5e7e891a77 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -18,6 +18,7 @@ enum task_work_notify_mode { TWA_RESUME, TWA_SIGNAL, TWA_SIGNAL_NO_IPI, + TWA_NMI_CURRENT, }; static inline bool task_work_pending(struct task_struct *task) diff --git a/kernel/task_work.c b/kernel/task_work.c index 2134ac8057a9..5c2daa7ad3f9 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -1,10 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include static struct callback_head work_exited; /* all we need is ->next == NULL */ +static void task_work_set_notify_irq(struct irq_work *entry) +{ + test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); +} +static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) = + IRQ_WORK_INIT_HARD(task_work_set_notify_irq); + /** * task_work_add - ask the @task to execute @work->func() * @task: the task which should run the callback @@ -12,7 +20,7 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ * @notify: how to notify the targeted task * * Queue @work for task_work_run() below and notify the @task if @notify - * is @TWA_RESUME, @TWA_SIGNAL, or @TWA_SIGNAL_NO_IPI. + * is @TWA_RESUME, @TWA_SIGNAL, @TWA_SIGNAL_NO_IPI or @TWA_NMI_CURRENT. * * @TWA_SIGNAL works like signals, in that the it will interrupt the targeted * task and run the task_work, regardless of whether the task is currently @@ -24,6 +32,8 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ * kernel anyway. * @TWA_RESUME work is run only when the task exits the kernel and returns to * user mode, or before entering guest mode. + * @TWA_NMI_CURRENT works like @TWA_RESUME, except it can only be used for the + * current @task and if the current context is NMI. * * Fails if the @task is exiting/exited and thus it can't process this @work. * Otherwise @work->func() will be called when the @task goes through one of @@ -44,8 +54,13 @@ int task_work_add(struct task_struct *task, struct callback_head *work, { struct callback_head *head; - /* record the work call stack in order to print it in KASAN reports */ - kasan_record_aux_stack(work); + if (notify == TWA_NMI_CURRENT) { + if (WARN_ON_ONCE(task != current)) + return -EINVAL; + } else { + /* record the work call stack in order to print it in KASAN reports */ + kasan_record_aux_stack(work); + } head = READ_ONCE(task->task_works); do { @@ -66,6 +81,9 @@ int task_work_add(struct task_struct *task, struct callback_head *work, case TWA_SIGNAL_NO_IPI: __set_notify_signal(task); break; + case TWA_NMI_CURRENT: + irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume)); + break; default: WARN_ON_ONCE(1); break; From 509c29d0d26f68a6f6d0a05cb1a89725237e2b87 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 9 Oct 2024 21:44:32 -0400 Subject: [PATCH 026/207] sched/core: Disable page allocation in task_tick_mm_cid() [ Upstream commit 73ab05aa46b02d96509cb029a8d04fca7bbde8c7 ] With KASAN and PREEMPT_RT enabled, calling task_work_add() in task_tick_mm_cid() may cause the following splat. [ 63.696416] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 [ 63.696416] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 610, name: modprobe [ 63.696416] preempt_count: 10001, expected: 0 [ 63.696416] RCU nest depth: 1, expected: 1 This problem is caused by the following call trace. sched_tick() [ acquire rq->__lock ] -> task_tick_mm_cid() -> task_work_add() -> __kasan_record_aux_stack() -> kasan_save_stack() -> stack_depot_save_flags() -> alloc_pages_mpol_noprof() -> __alloc_pages_noprof() -> get_page_from_freelist() -> rmqueue() -> rmqueue_pcplist() -> __rmqueue_pcplist() -> rmqueue_bulk() -> rt_spin_lock() The rq lock is a raw_spinlock_t. We can't sleep while holding it. IOW, we can't call alloc_pages() in stack_depot_save_flags(). The task_tick_mm_cid() function with its task_work_add() call was introduced by commit 223baf9d17f2 ("sched: Fix performance regression introduced by mm_cid") in v6.4 kernel. Fortunately, there is a kasan_record_aux_stack_noalloc() variant that calls stack_depot_save_flags() while not allowing it to allocate new pages. To allow task_tick_mm_cid() to use task_work without page allocation, a new TWAF_NO_ALLOC flag is added to enable calling kasan_record_aux_stack_noalloc() instead of kasan_record_aux_stack() if set. The task_tick_mm_cid() function is modified to add this new flag. The possible downside is the missing stack trace in a KASAN report due to new page allocation required when task_work_add_noallloc() is called which should be rare. Fixes: 223baf9d17f2 ("sched: Fix performance regression introduced by mm_cid") Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20241010014432.194742-1-longman@redhat.com Signed-off-by: Sasha Levin --- include/linux/task_work.h | 5 ++++- kernel/sched/core.c | 4 +++- kernel/task_work.c | 15 +++++++++++++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/include/linux/task_work.h b/include/linux/task_work.h index cf5e7e891a77..2964171856e0 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -14,11 +14,14 @@ init_task_work(struct callback_head *twork, task_work_func_t func) } enum task_work_notify_mode { - TWA_NONE, + TWA_NONE = 0, TWA_RESUME, TWA_SIGNAL, TWA_SIGNAL_NO_IPI, TWA_NMI_CURRENT, + + TWA_FLAGS = 0xff00, + TWAF_NO_ALLOC = 0x0100, }; static inline bool task_work_pending(struct task_struct *task) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9b406d988654..b6f922a20f83 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -12050,7 +12050,9 @@ void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) return; if (time_before(now, READ_ONCE(curr->mm->mm_cid_next_scan))) return; - task_work_add(curr, work, TWA_RESUME); + + /* No page allocation under rq lock */ + task_work_add(curr, work, TWA_RESUME | TWAF_NO_ALLOC); } void sched_mm_cid_exit_signals(struct task_struct *t) diff --git a/kernel/task_work.c b/kernel/task_work.c index 5c2daa7ad3f9..8aa43204cb7d 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -53,13 +53,24 @@ int task_work_add(struct task_struct *task, struct callback_head *work, enum task_work_notify_mode notify) { struct callback_head *head; + int flags = notify & TWA_FLAGS; + notify &= ~TWA_FLAGS; if (notify == TWA_NMI_CURRENT) { if (WARN_ON_ONCE(task != current)) return -EINVAL; } else { - /* record the work call stack in order to print it in KASAN reports */ - kasan_record_aux_stack(work); + /* + * Record the work call stack in order to print it in KASAN + * reports. + * + * Note that stack allocation can fail if TWAF_NO_ALLOC flag + * is set and new page is needed to expand the stack buffer. + */ + if (flags & TWAF_NO_ALLOC) + kasan_record_aux_stack_noalloc(work); + else + kasan_record_aux_stack(work); } head = READ_ONCE(task->task_works); From 8971fd61210d75fd2af225621cd2fcc87eb1847c Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Fri, 11 Oct 2024 01:16:45 +0300 Subject: [PATCH 027/207] ALSA: hda/cs8409: Fix possible NULL dereference [ Upstream commit c9bd4a82b4ed32c6d1c90500a52063e6e341517f ] If snd_hda_gen_add_kctl fails to allocate memory and returns NULL, then NULL pointer dereference will occur in the next line. Since dolphin_fixups function is a hda_fixup function which is not supposed to return any errors, add simple check before dereference, ignore the fail. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 20e507724113 ("ALSA: hda/cs8409: Add support for dolphin") Signed-off-by: Murad Masimov Link: https://patch.msgid.link/20241010221649.1305-1-m.masimov@maxima.ru Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_cs8409.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c index e41316e2e983..892223d9e64a 100644 --- a/sound/pci/hda/patch_cs8409.c +++ b/sound/pci/hda/patch_cs8409.c @@ -1411,8 +1411,9 @@ void dolphin_fixups(struct hda_codec *codec, const struct hda_fixup *fix, int ac kctrl = snd_hda_gen_add_kctl(&spec->gen, "Line Out Playback Volume", &cs42l42_dac_volume_mixer); /* Update Line Out kcontrol template */ - kctrl->private_value = HDA_COMPOSE_AMP_VAL_OFS(DOLPHIN_HP_PIN_NID, 3, CS8409_CODEC1, - HDA_OUTPUT, CS42L42_VOL_DAC) | HDA_AMP_VAL_MIN_MUTE; + if (kctrl) + kctrl->private_value = HDA_COMPOSE_AMP_VAL_OFS(DOLPHIN_HP_PIN_NID, 3, CS8409_CODEC1, + HDA_OUTPUT, CS42L42_VOL_DAC) | HDA_AMP_VAL_MIN_MUTE; cs8409_enable_ur(codec, 0); snd_hda_codec_set_name(codec, "CS8409/CS42L42"); break; From 6d91d07913aee90556362d648d6a28a1eda419dc Mon Sep 17 00:00:00 2001 From: Su Hui Date: Fri, 11 Oct 2024 18:40:02 +0800 Subject: [PATCH 028/207] firmware: arm_scmi: Fix the double free in scmi_debugfs_common_setup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 39b13dce1a91cdfc3bec9238f9e89094551bd428 ] Clang static checker(scan-build) throws below warning: | drivers/firmware/arm_scmi/driver.c:line 2915, column 2 | Attempt to free released memory. When devm_add_action_or_reset() fails, scmi_debugfs_common_cleanup() will run twice which causes double free of 'dbg->name'. Remove the redundant scmi_debugfs_common_cleanup() to fix this problem. Fixes: c3d4aed763ce ("firmware: arm_scmi: Populate a common SCMI debugfs root") Signed-off-by: Su Hui Reviewed-by: Cristian Marussi Message-Id: <20241011104001.1546476-1-suhui@nfschina.com> Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/driver.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 87383c05424b..3962683e2af9 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2603,10 +2603,8 @@ static struct scmi_debug_info *scmi_debugfs_common_setup(struct scmi_info *info) dbg->top_dentry = top_dentry; if (devm_add_action_or_reset(info->dev, - scmi_debugfs_common_cleanup, dbg)) { - scmi_debugfs_common_cleanup(dbg); + scmi_debugfs_common_cleanup, dbg)) return NULL; - } return dbg; } From c29fd05329a9661d63addc3df4c38fbfc6ad2fb5 Mon Sep 17 00:00:00 2001 From: Anumula Murali Mohan Reddy Date: Mon, 7 Oct 2024 18:53:11 +0530 Subject: [PATCH 029/207] RDMA/cxgb4: Fix RDMA_CM_EVENT_UNREACHABLE error for iWARP [ Upstream commit c659b405b82ead335bee6eb33f9691bf718e21e8 ] ip_dev_find() always returns real net_device address, whether traffic is running on a vlan or real device, if traffic is over vlan, filling endpoint struture with real ndev and an attempt to send a connect request will results in RDMA_CM_EVENT_UNREACHABLE error. This patch fixes the issue by using vlan_dev_real_dev(). Fixes: 830662f6f032 ("RDMA/cxgb4: Add support for active and passive open connection with IPv6 address") Link: https://patch.msgid.link/r/20241007132311.70593-1-anumula@chelsio.com Signed-off-by: Anumula Murali Mohan Reddy Signed-off-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/cxgb4/cm.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index b3757c6a0457..8d753e6e0c71 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2086,7 +2086,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, err = -ENOMEM; if (n->dev->flags & IFF_LOOPBACK) { if (iptype == 4) - pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip); + pdev = __ip_dev_find(&init_net, *(__be32 *)peer_ip, false); else if (IS_ENABLED(CONFIG_IPV6)) for_each_netdev(&init_net, pdev) { if (ipv6_chk_addr(&init_net, @@ -2101,12 +2101,12 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, err = -ENODEV; goto out; } + if (is_vlan_dev(pdev)) + pdev = vlan_dev_real_dev(pdev); ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, n, pdev, rt_tos2priority(tos)); - if (!ep->l2t) { - dev_put(pdev); + if (!ep->l2t) goto out; - } ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx; @@ -2119,7 +2119,6 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, ep->rss_qid = cdev->rdev.lldi.rxq_ids[ cxgb4_port_idx(pdev) * step]; set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); - dev_put(pdev); } else { pdev = get_real_dev(n->dev); ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, From af29c430a34381bd3c36fff34450e0565a03d1f4 Mon Sep 17 00:00:00 2001 From: Alexander Zubkov Date: Tue, 8 Oct 2024 18:19:13 +0200 Subject: [PATCH 030/207] RDMA/irdma: Fix misspelling of "accept*" [ Upstream commit 8cddfa535c931b8d8110c73bfed7354a94cbf891 ] There is "accept*" misspelled as "accpet*" in the comments. Fix the spelling. Fixes: 146b9756f14c ("RDMA/irdma: Add connection manager") Link: https://patch.msgid.link/r/20241008161913.19965-1-green@qrator.net Signed-off-by: Alexander Zubkov Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 42d1e9771066..1916daa8c332 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -3630,7 +3630,7 @@ void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp) /** * irdma_accept - registered call for connection to be accepted * @cm_id: cm information for passive connection - * @conn_param: accpet parameters + * @conn_param: accept parameters */ int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { From 3a662cc5b5d5fee6faad071b095b2bf50035b827 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 9 Oct 2024 14:00:48 -0700 Subject: [PATCH 031/207] RDMA/srpt: Make slab cache names unique [ Upstream commit 4d784c042d164f10fc809e2338457036cd7c653d ] Since commit 4c39529663b9 ("slab: Warn on duplicate cache names when DEBUG_VM=y"), slab complains about duplicate cache names. Hence this patch. The approach is as follows: - Maintain an xarray with the slab size as index and a reference count and a kmem_cache pointer as contents. Use srpt-${slab_size} as kmem cache name. - Use 512-byte alignment for all slabs instead of only for some of the slabs. - Increment the reference count instead of calling kmem_cache_create(). - Decrement the reference count instead of calling kmem_cache_destroy(). Fixes: 5dabcd0456d7 ("RDMA/srpt: Add support for immediate data") Link: https://patch.msgid.link/r/20241009210048.4122518-1-bvanassche@acm.org Reported-by: Shinichiro Kawasaki Closes: https://lore.kernel.org/linux-block/xpe6bea7rakpyoyfvspvin2dsozjmjtjktpph7rep3h25tv7fb@ooz4cu5z6bq6/ Suggested-by: Jason Gunthorpe Signed-off-by: Bart Van Assche Tested-by: Shin'ichiro Kawasaki Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/srpt/ib_srpt.c | 80 +++++++++++++++++++++++---- 1 file changed, 68 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 45547bf281e3..4bebc34a2929 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -68,6 +68,8 @@ MODULE_LICENSE("Dual BSD/GPL"); static u64 srpt_service_guid; static DEFINE_SPINLOCK(srpt_dev_lock); /* Protects srpt_dev_list. */ static LIST_HEAD(srpt_dev_list); /* List of srpt_device structures. */ +static DEFINE_MUTEX(srpt_mc_mutex); /* Protects srpt_memory_caches. */ +static DEFINE_XARRAY(srpt_memory_caches); /* See also srpt_memory_cache_entry */ static unsigned srp_max_req_size = DEFAULT_MAX_REQ_SIZE; module_param(srp_max_req_size, int, 0444); @@ -105,6 +107,63 @@ static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc); static void srpt_process_wait_list(struct srpt_rdma_ch *ch); +/* Type of the entries in srpt_memory_caches. */ +struct srpt_memory_cache_entry { + refcount_t ref; + struct kmem_cache *c; +}; + +static struct kmem_cache *srpt_cache_get(unsigned int object_size) +{ + struct srpt_memory_cache_entry *e; + char name[32]; + void *res; + + guard(mutex)(&srpt_mc_mutex); + e = xa_load(&srpt_memory_caches, object_size); + if (e) { + refcount_inc(&e->ref); + return e->c; + } + snprintf(name, sizeof(name), "srpt-%u", object_size); + e = kmalloc(sizeof(*e), GFP_KERNEL); + if (!e) + return NULL; + refcount_set(&e->ref, 1); + e->c = kmem_cache_create(name, object_size, /*align=*/512, 0, NULL); + if (!e->c) + goto free_entry; + res = xa_store(&srpt_memory_caches, object_size, e, GFP_KERNEL); + if (xa_is_err(res)) + goto destroy_cache; + return e->c; + +destroy_cache: + kmem_cache_destroy(e->c); + +free_entry: + kfree(e); + return NULL; +} + +static void srpt_cache_put(struct kmem_cache *c) +{ + struct srpt_memory_cache_entry *e = NULL; + unsigned long object_size; + + guard(mutex)(&srpt_mc_mutex); + xa_for_each(&srpt_memory_caches, object_size, e) + if (e->c == c) + break; + if (WARN_ON_ONCE(!e)) + return; + if (!refcount_dec_and_test(&e->ref)) + return; + WARN_ON_ONCE(xa_erase(&srpt_memory_caches, object_size) != e); + kmem_cache_destroy(e->c); + kfree(e); +} + /* * The only allowed channel state changes are those that change the channel * state into a state with a higher numerical value. Hence the new > prev test. @@ -2119,13 +2178,13 @@ static void srpt_release_channel_work(struct work_struct *w) ch->sport->sdev, ch->rq_size, ch->rsp_buf_cache, DMA_TO_DEVICE); - kmem_cache_destroy(ch->rsp_buf_cache); + srpt_cache_put(ch->rsp_buf_cache); srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring, sdev, ch->rq_size, ch->req_buf_cache, DMA_FROM_DEVICE); - kmem_cache_destroy(ch->req_buf_cache); + srpt_cache_put(ch->req_buf_cache); kref_put(&ch->kref, srpt_free_ch); } @@ -2245,8 +2304,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, INIT_LIST_HEAD(&ch->cmd_wait_list); ch->max_rsp_size = ch->sport->port_attrib.srp_max_rsp_size; - ch->rsp_buf_cache = kmem_cache_create("srpt-rsp-buf", ch->max_rsp_size, - 512, 0, NULL); + ch->rsp_buf_cache = srpt_cache_get(ch->max_rsp_size); if (!ch->rsp_buf_cache) goto free_ch; @@ -2280,8 +2338,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, alignment_offset = round_up(imm_data_offset, 512) - imm_data_offset; req_sz = alignment_offset + imm_data_offset + srp_max_req_size; - ch->req_buf_cache = kmem_cache_create("srpt-req-buf", req_sz, - 512, 0, NULL); + ch->req_buf_cache = srpt_cache_get(req_sz); if (!ch->req_buf_cache) goto free_rsp_ring; @@ -2478,7 +2535,7 @@ free_recv_ring: ch->req_buf_cache, DMA_FROM_DEVICE); free_recv_cache: - kmem_cache_destroy(ch->req_buf_cache); + srpt_cache_put(ch->req_buf_cache); free_rsp_ring: srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, @@ -2486,7 +2543,7 @@ free_rsp_ring: ch->rsp_buf_cache, DMA_TO_DEVICE); free_rsp_cache: - kmem_cache_destroy(ch->rsp_buf_cache); + srpt_cache_put(ch->rsp_buf_cache); free_ch: if (rdma_cm_id) @@ -3055,7 +3112,7 @@ static void srpt_free_srq(struct srpt_device *sdev) srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, sdev->srq_size, sdev->req_buf_cache, DMA_FROM_DEVICE); - kmem_cache_destroy(sdev->req_buf_cache); + srpt_cache_put(sdev->req_buf_cache); sdev->srq = NULL; } @@ -3082,8 +3139,7 @@ static int srpt_alloc_srq(struct srpt_device *sdev) pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", sdev->srq_size, sdev->device->attrs.max_srq_wr, dev_name(&device->dev)); - sdev->req_buf_cache = kmem_cache_create("srpt-srq-req-buf", - srp_max_req_size, 0, 0, NULL); + sdev->req_buf_cache = srpt_cache_get(srp_max_req_size); if (!sdev->req_buf_cache) goto free_srq; @@ -3105,7 +3161,7 @@ static int srpt_alloc_srq(struct srpt_device *sdev) return 0; free_cache: - kmem_cache_destroy(sdev->req_buf_cache); + srpt_cache_put(sdev->req_buf_cache); free_srq: ib_destroy_srq(srq); From 1ee5ef316a77d81e6d4d2e38b77a1805f330a2c7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 9 Oct 2024 14:47:13 -0400 Subject: [PATCH 032/207] ipv4: give an IPv4 dev to blackhole_netdev [ Upstream commit 22600596b6756b166fd052d5facb66287e6f0bad ] After commit 8d7017fd621d ("blackhole_netdev: use blackhole_netdev to invalidate dst entries"), blackhole_netdev was introduced to invalidate dst cache entries on the TX path whenever the cache times out or is flushed. When two UDP sockets (sk1 and sk2) send messages to the same destination simultaneously, they are using the same dst cache. If the dst cache is invalidated on one path (sk2) while the other (sk1) is still transmitting, sk1 may try to use the invalid dst entry. CPU1 CPU2 udp_sendmsg(sk1) udp_sendmsg(sk2) udp_send_skb() ip_output() <--- dst timeout or flushed dst_dev_put() ip_finish_output2() ip_neigh_for_gw() This results in a scenario where ip_neigh_for_gw() returns -EINVAL because blackhole_dev lacks an in_dev, which is needed to initialize the neigh in arp_constructor(). This error is then propagated back to userspace, breaking the UDP application. The patch fixes this issue by assigning an in_dev to blackhole_dev for IPv4, similar to what was done for IPv6 in commit e5f80fcf869a ("ipv6: give an IPv6 dev to blackhole_netdev"). This ensures that even when the dst entry is invalidated with blackhole_dev, it will not fail to create the neigh entry. As devinet_init() is called ealier than blackhole_netdev_init() in system booting, it can not assign the in_dev to blackhole_dev in devinet_init(). As Paolo suggested, add a separate late_initcall() in devinet.c to ensure inet_blackhole_dev_init() is called after blackhole_netdev_init(). Fixes: 8d7017fd621d ("blackhole_netdev: use blackhole_netdev to invalidate dst entries") Signed-off-by: Xin Long Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/3000792d45ca44e16c785ebe2b092e610e5b3df1.1728499633.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/devinet.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index cb0c80328eeb..4822f68edbf0 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -283,17 +283,19 @@ static struct in_device *inetdev_init(struct net_device *dev) /* Account for reference dev->ip_ptr (below) */ refcount_set(&in_dev->refcnt, 1); - err = devinet_sysctl_register(in_dev); - if (err) { - in_dev->dead = 1; - neigh_parms_release(&arp_tbl, in_dev->arp_parms); - in_dev_put(in_dev); - in_dev = NULL; - goto out; + if (dev != blackhole_netdev) { + err = devinet_sysctl_register(in_dev); + if (err) { + in_dev->dead = 1; + neigh_parms_release(&arp_tbl, in_dev->arp_parms); + in_dev_put(in_dev); + in_dev = NULL; + goto out; + } + ip_mc_init_dev(in_dev); + if (dev->flags & IFF_UP) + ip_mc_up(in_dev); } - ip_mc_init_dev(in_dev); - if (dev->flags & IFF_UP) - ip_mc_up(in_dev); /* we can receive as soon as ip_ptr is set -- do this last */ rcu_assign_pointer(dev->ip_ptr, in_dev); @@ -332,6 +334,19 @@ static void inetdev_destroy(struct in_device *in_dev) in_dev_put(in_dev); } +static int __init inet_blackhole_dev_init(void) +{ + int err = 0; + + rtnl_lock(); + if (!inetdev_init(blackhole_netdev)) + err = -ENOMEM; + rtnl_unlock(); + + return err; +} +late_initcall(inet_blackhole_dev_init); + int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) { const struct in_ifaddr *ifa; From 5fcf649e4334f4554f60b70d71e51d3ab02f5fe7 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 7 Dec 2023 02:47:35 -0800 Subject: [PATCH 033/207] RDMA/bnxt_re: Support new 5760X P7 devices [ Upstream commit 1801d87b3598b173bce3fbf15c5517796f38db96 ] Add basic support for 5760X P7 devices. Add new chip revisions. The first version support is similar to the existing P5 adapters. Extend the current support for P5 adapters to P7 also. Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1701946060-13931-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky Stable-dep-of: ac6df53738b4 ("RDMA/bnxt_re: Fix the max CQ WQEs for older adapters") Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/hw_counters.c | 4 ++-- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 10 +++++----- drivers/infiniband/hw/bnxt_re/main.c | 14 +++++++------- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 4 ++-- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 2 +- drivers/infiniband/hw/bnxt_re/qplib_res.c | 2 +- drivers/infiniband/hw/bnxt_re/qplib_res.h | 20 +++++++++++++++++--- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 6 +++--- 8 files changed, 38 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 93572405d6fa..128651c01595 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -371,7 +371,7 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, } done: - return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + return bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? BNXT_RE_NUM_EXT_COUNTERS : BNXT_RE_NUM_STD_COUNTERS; } @@ -381,7 +381,7 @@ struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev, struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); int num_counters = 0; - if (bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) + if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) num_counters = BNXT_RE_NUM_EXT_COUNTERS; else num_counters = BNXT_RE_NUM_STD_COUNTERS; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index b4d3e7dfc939..f2eaecef7570 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1023,7 +1023,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, bytes = (qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size); /* Consider mapping PSN search memory only for RC QPs. */ if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) { - psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + psn_sz = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? sizeof(struct sq_psn_search_ext) : sizeof(struct sq_psn_search); psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? @@ -1234,7 +1234,7 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp) qplqp = &qp->qplib_qp; dev_attr = &rdev->dev_attr; - if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { qplqp->rq.max_sge = dev_attr->max_qp_sges; if (qplqp->rq.max_sge > dev_attr->max_qp_sges) qplqp->rq.max_sge = dev_attr->max_qp_sges; @@ -1301,7 +1301,7 @@ static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, qplqp = &qp->qplib_qp; dev_attr = &rdev->dev_attr; - if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { entries = bnxt_re_init_depth(init_attr->cap.max_send_wr + 1, uctx); qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); @@ -1328,7 +1328,7 @@ static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev, goto out; } - if (bnxt_qplib_is_chip_gen_p5(chip_ctx) && + if (bnxt_qplib_is_chip_gen_p5_p7(chip_ctx) && init_attr->qp_type == IB_QPT_GSI) qptype = CMDQ_CREATE_QP_TYPE_GSI; out: @@ -1527,7 +1527,7 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr, goto fail; if (qp_init_attr->qp_type == IB_QPT_GSI && - !(bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))) { + !(bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))) { rc = bnxt_re_create_gsi_qp(qp, pd, qp_init_attr); if (rc == -ENODEV) goto qp_destroy; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index c173d0ffc629..594cc6aa7b79 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -128,7 +128,7 @@ static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode) struct bnxt_qplib_chip_ctx *cctx; cctx = rdev->chip_ctx; - cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? mode : BNXT_QPLIB_WQE_MODE_STATIC; if (bnxt_re_hwrm_qcaps(rdev)) dev_err(rdev_to_dev(rdev), @@ -218,7 +218,7 @@ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT, attr->max_srq); ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq); - if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) rdev->qplib_ctx.tqm_ctx.qcount[i] = rdev->dev_attr.tqm_alloc_reqs[i]; @@ -267,7 +267,7 @@ static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res)); bnxt_re_limit_pf_res(rdev); - num_vfs = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + num_vfs = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs; if (num_vfs) bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs); @@ -279,7 +279,7 @@ static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev) if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) return; rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev); - if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { bnxt_re_set_resource_limits(rdev); bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, &rdev->qplib_ctx); @@ -1074,7 +1074,7 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, #define BNXT_RE_GEN_P5_VF_NQ_DB 0x4000 static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx) { - return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + return bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? (rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB : BNXT_RE_GEN_P5_PF_NQ_DB) : rdev->en_dev->msix_entries[indx].db_offset; @@ -1539,7 +1539,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) bnxt_re_set_resource_limits(rdev); rc = bnxt_qplib_alloc_ctx(&rdev->qplib_res, &rdev->qplib_ctx, 0, - bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)); + bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate QPLIB context: %#x\n", rc); @@ -1662,7 +1662,7 @@ static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable) return; /* Currently enabling only for GenP5 adapters */ - if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) return; if (enable) { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 3b28878f6206..4ee11cb4f2bd 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -995,7 +995,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* SQ */ if (qp->type == CMDQ_CREATE_QP_TYPE_RC) { - psn_sz = bnxt_qplib_is_chip_gen_p5(res->cctx) ? + psn_sz = bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ? sizeof(struct sq_psn_search_ext) : sizeof(struct sq_psn_search); @@ -1649,7 +1649,7 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) & SQ_PSN_SEARCH_NEXT_PSN_MASK); - if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) { + if (bnxt_qplib_is_chip_gen_p5_p7(qp->cctx)) { psns_ext->opcode_start_psn = cpu_to_le32(op_spsn); psns_ext->flags_next_psn = cpu_to_le32(flg_npsn); psns_ext->start_slot_idx = cpu_to_le16(swq->slot_idx); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 5680fe8b890a..3ffaef0c2651 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -852,7 +852,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, */ if (is_virtfn) goto skip_ctx_setup; - if (bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) + if (bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx)) goto config_vf_res; lvl = ctx->qpc_tbl.level; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 47406ab8879c..1fdffd6a0f48 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -807,7 +807,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res, dpit = &res->dpi_tbl; reg = &dpit->wcreg; - if (!bnxt_qplib_is_chip_gen_p5(res->cctx)) { + if (!bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) { /* Offest should come from L2 driver */ dbr_offset = dev_attr->l2_db_size; dpit->ucreg.offset = dbr_offset; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 534db462216a..f9e7aa3757cf 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -44,6 +44,9 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero; #define CHIP_NUM_57508 0x1750 #define CHIP_NUM_57504 0x1751 #define CHIP_NUM_57502 0x1752 +#define CHIP_NUM_58818 0xd818 +#define CHIP_NUM_57608 0x1760 + struct bnxt_qplib_drv_modes { u8 wqe_mode; @@ -296,6 +299,12 @@ struct bnxt_qplib_res { struct bnxt_qplib_db_pacing_data *pacing_data; }; +static inline bool bnxt_qplib_is_chip_gen_p7(struct bnxt_qplib_chip_ctx *cctx) +{ + return (cctx->chip_num == CHIP_NUM_58818 || + cctx->chip_num == CHIP_NUM_57608); +} + static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) { return (cctx->chip_num == CHIP_NUM_57508 || @@ -303,15 +312,20 @@ static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) cctx->chip_num == CHIP_NUM_57502); } +static inline bool bnxt_qplib_is_chip_gen_p5_p7(struct bnxt_qplib_chip_ctx *cctx) +{ + return bnxt_qplib_is_chip_gen_p5(cctx) || bnxt_qplib_is_chip_gen_p7(cctx); +} + static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res) { - return bnxt_qplib_is_chip_gen_p5(res->cctx) ? + return bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ? HWQ_TYPE_QUEUE : HWQ_TYPE_L2_CMPL; } static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx) { - return bnxt_qplib_is_chip_gen_p5(cctx) ? + return bnxt_qplib_is_chip_gen_p5_p7(cctx) ? RING_ALLOC_REQ_RING_TYPE_NQ : RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL; } @@ -488,7 +502,7 @@ static inline void bnxt_qplib_ring_nq_db(struct bnxt_qplib_db_info *info, u32 type; type = arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ; - if (bnxt_qplib_is_chip_gen_p5(cctx)) + if (bnxt_qplib_is_chip_gen_p5_p7(cctx)) bnxt_qplib_ring_db(info, type); else bnxt_qplib_ring_db32(info, arm); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index a27b68515164..c580bf78d4c1 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -59,7 +59,7 @@ static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw) { u16 pcie_ctl2 = 0; - if (!bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) + if (!bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx)) return false; pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2, &pcie_ctl2); @@ -133,7 +133,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, * reporting the max number */ attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1; - attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx) ? + attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx) ? 6 : sb->max_sge; attr->max_cq = le32_to_cpu(sb->max_cq); attr->max_cq_wqes = le32_to_cpu(sb->max_cqe); @@ -934,7 +934,7 @@ int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res, req->inactivity_th = cpu_to_le16(cc_param->inact_th); /* For chip gen P5 onwards fill extended cmd and header */ - if (bnxt_qplib_is_chip_gen_p5(res->cctx)) { + if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) { struct roce_tlv *hdr; u32 payload; u32 chunks; From 903f8b01c5875d523294941d7507f75a36e6533f Mon Sep 17 00:00:00 2001 From: Abhishek Mohapatra Date: Tue, 8 Oct 2024 00:41:33 -0700 Subject: [PATCH 034/207] RDMA/bnxt_re: Fix the max CQ WQEs for older adapters [ Upstream commit ac6df53738b465053d38d491fff87bd7d37fdc07 ] Older adapters doesn't support the MAX CQ WQEs reported by older FW. So restrict the value reported to 1M always for older adapters. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Link: https://patch.msgid.link/r/1728373302-19530-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Abhishek Mohapatra Reviewed-by: Chandramohan Akula Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 2 ++ drivers/infiniband/hw/bnxt_re/qplib_sp.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index c580bf78d4c1..2b73bb433b88 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -137,6 +137,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, 6 : sb->max_sge; attr->max_cq = le32_to_cpu(sb->max_cq); attr->max_cq_wqes = le32_to_cpu(sb->max_cqe); + if (!bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx)) + attr->max_cq_wqes = min_t(u32, BNXT_QPLIB_MAX_CQ_WQES, attr->max_cq_wqes); attr->max_cq_sges = attr->max_qp_sges; attr->max_mr = le32_to_cpu(sb->max_mr); attr->max_mw = le32_to_cpu(sb->max_mw); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index d33c78b96217..755765e68eaa 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -55,6 +55,7 @@ struct bnxt_qplib_dev_attr { u32 max_qp_wqes; u32 max_qp_sges; u32 max_cq; +#define BNXT_QPLIB_MAX_CQ_WQES 0xfffff u32 max_cq_wqes; u32 max_cq_sges; u32 max_mr; From 05c5fcc1869a08e36a29691699b6534e5a00a82b Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 8 Oct 2024 00:41:34 -0700 Subject: [PATCH 035/207] RDMA/bnxt_re: Fix out of bound check [ Upstream commit a9e6e7443922ac0a48243c35d03834c96926bff1 ] Driver exports pacing stats only on GenP5 and P7 adapters. But while parsing the pacing stats, driver has a check for "rdev->dbr_pacing". This caused a trace when KASAN is enabled. BUG: KASAN: slab-out-of-bounds in bnxt_re_get_hw_stats+0x2b6a/0x2e00 [bnxt_re] Write of size 8 at addr ffff8885942a6340 by task modprobe/4809 Fixes: 8b6573ff3420 ("bnxt_re: Update the debug counters for doorbell pacing") Link: https://patch.msgid.link/r/1728373302-19530-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/hw_counters.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 128651c01595..1e63f8091748 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -366,7 +366,7 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, goto done; } } - if (rdev->pacing.dbr_pacing) + if (rdev->pacing.dbr_pacing && bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) bnxt_re_copy_db_pacing_stats(rdev, stats); } From d517cadae6c321b835b9ebc976f9aca4f64bf0cc Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 8 Oct 2024 00:41:36 -0700 Subject: [PATCH 036/207] RDMA/bnxt_re: Return more meaningful error [ Upstream commit 98647df0178df215b8239c5c365537283b2852a6 ] When the HWRM command fails, driver currently returns -EFAULT(Bad address). This does not look correct. Modified to return -EIO(I/O error). Fixes: cc1ec769b87c ("RDMA/bnxt_re: Fixing the Control path command and response handling") Fixes: 65288a22ddd8 ("RDMA/bnxt_re: use shadow qd while posting non blocking rcfw command") Link: https://patch.msgid.link/r/1728373302-19530-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 3ffaef0c2651..7294221b3316 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -525,7 +525,7 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, /* failed with status */ dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n", cookie, opcode, evnt->status); - rc = -EFAULT; + rc = -EIO; } return rc; From 87cb3b0054e53e0155b630bdf8fb714ded62565f Mon Sep 17 00:00:00 2001 From: Bhargava Chenna Marreddy Date: Tue, 8 Oct 2024 00:41:41 -0700 Subject: [PATCH 037/207] RDMA/bnxt_re: Fix a bug while setting up Level-2 PBL pages [ Upstream commit 7988bdbbb85ac85a847baf09879edcd0f70521dc ] Avoid memory corruption while setting up Level-2 PBL pages for the non MR resources when num_pages > 256K. There will be a single PDE page address (contiguous pages in the case of > PAGE_SIZE), but, current logic assumes multiple pages, leading to invalid memory access after 256K PBL entries in the PDE. Fixes: 0c4dcd602817 ("RDMA/bnxt_re: Refactor hardware queue memory allocation") Link: https://patch.msgid.link/r/1728373302-19530-10-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Bhargava Chenna Marreddy Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/qplib_res.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 1fdffd6a0f48..96ceec1e8199 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -257,22 +257,9 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, dst_virt_ptr = (dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr; src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr; - if (hwq_attr->type == HWQ_TYPE_MR) { - /* For MR it is expected that we supply only 1 contigous - * page i.e only 1 entry in the PDL that will contain - * all the PBLs for the user supplied memory region - */ - for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; - i++) - dst_virt_ptr[0][i] = src_phys_ptr[i] | - flag; - } else { - for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; - i++) - dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] = - src_phys_ptr[i] | - PTU_PDE_VALID; - } + for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++) + dst_virt_ptr[0][i] = src_phys_ptr[i] | flag; + /* Alloc or init PTEs */ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_2], hwq_attr->sginfo); From bf39b3532143fd18e05ea20d83649071a7151946 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 7 Dec 2023 02:47:36 -0800 Subject: [PATCH 038/207] RDMA/bnxt_re: Update the BAR offsets [ Upstream commit a62d685814416647fbb28b3eb2617744adef2d4f ] Update the BAR offsets for handling GenP7 adapters. Use the values populated by L2 driver for getting the Doorbell offsets. Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1701946060-13931-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky Stable-dep-of: dc5006cfcf62 ("RDMA/bnxt_re: Fix the GID table length") Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/main.c | 21 +++++++-------------- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 5 +++-- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 594cc6aa7b79..607293794b92 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -107,8 +107,11 @@ static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev) dev_info(rdev_to_dev(rdev), "Couldn't get DB bar size, Low latency framework is disabled\n"); /* set register offsets for both UC and WC */ - res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET : - BNXT_QPLIB_DBR_PF_DB_OFFSET; + if (bnxt_qplib_is_chip_gen_p7(cctx)) + res->dpi_tbl.ucreg.offset = offset; + else + res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET : + BNXT_QPLIB_DBR_PF_DB_OFFSET; res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset; /* If WC mapping is disabled by L2 driver then en_dev->l2_db_size @@ -1070,16 +1073,6 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, return 0; } -#define BNXT_RE_GEN_P5_PF_NQ_DB 0x10000 -#define BNXT_RE_GEN_P5_VF_NQ_DB 0x4000 -static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx) -{ - return bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? - (rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB : - BNXT_RE_GEN_P5_PF_NQ_DB) : - rdev->en_dev->msix_entries[indx].db_offset; -} - static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) { int i; @@ -1100,7 +1093,7 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev) bnxt_qplib_init_res(&rdev->qplib_res); for (i = 1; i < rdev->num_msix ; i++) { - db_offt = bnxt_re_get_nqdb_offset(rdev, i); + db_offt = rdev->en_dev->msix_entries[i].db_offset; rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1], i - 1, rdev->en_dev->msix_entries[i].vector, db_offt, &bnxt_re_cqn_handler, @@ -1511,7 +1504,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc); goto free_rcfw; } - db_offt = bnxt_re_get_nqdb_offset(rdev, BNXT_RE_AEQ_IDX); + db_offt = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].db_offset; vid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].vector; rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw, vid, db_offt, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 2b73bb433b88..7e550432ccb1 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -153,8 +153,9 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->max_srq_sges = sb->max_srq_sge; attr->max_pkey = 1; attr->max_inline_data = le32_to_cpu(sb->max_inline_data); - attr->l2_db_size = (sb->l2_db_space_size + 1) * - (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); + if (!bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx)) + attr->l2_db_size = (sb->l2_db_space_size + 1) * + (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); attr->max_sgid = BNXT_QPLIB_NUM_GIDS_SUPPORTED; attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags); From 5032bf57ab5db42aee83125e09753f232b6375ca Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 8 Oct 2024 00:41:42 -0700 Subject: [PATCH 039/207] RDMA/bnxt_re: Fix the GID table length [ Upstream commit dc5006cfcf62bea88076a587344ba5e00e66d1c6 ] GID table length is reported by FW. The gid index which is passed to the driver during modify_qp/create_ah is restricted by the sgid_index field of struct ib_global_route. sgid_index is u8 and the max sgid possible is 256. Each GID entry in HW will have 2 GID entries in the kernel gid table. So we can support twice the gid table size reported by FW. Also, restrict the max GID to 256 also. Fixes: 847b97887ed4 ("RDMA/bnxt_re: Restrict the max_gids to 256") Link: https://patch.msgid.link/r/1728373302-19530-11-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 7e550432ccb1..0b98577cd708 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -156,7 +156,14 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, if (!bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx)) attr->l2_db_size = (sb->l2_db_space_size + 1) * (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); - attr->max_sgid = BNXT_QPLIB_NUM_GIDS_SUPPORTED; + /* + * Read the max gid supported by HW. + * For each entry in HW GID in HW table, we consume 2 + * GID entries in the kernel GID table. So max_gid reported + * to stack can be up to twice the value reported by the HW, up to 256 gids. + */ + attr->max_sgid = le32_to_cpu(sb->max_gid); + attr->max_sgid = min_t(u32, BNXT_QPLIB_NUM_GIDS_SUPPORTED, 2 * attr->max_sgid); attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags); bnxt_qplib_query_version(rcfw, attr->fw_ver); From 1931dc14b537b5f754f4c9776a0259c5def0126c Mon Sep 17 00:00:00 2001 From: Pranjal Ramajor Asha Kanojiya Date: Fri, 4 Oct 2024 13:32:52 -0600 Subject: [PATCH 040/207] accel/qaic: Fix the for loop used to walk SG table [ Upstream commit c5e8e93897b7bb0a336bf3332f82f8d9f2b33f14 ] Only for_each_sgtable_dma_sg() should be used to walk through a SG table to grab correct bus address and length pair after calling DMA MAP API on a SG table as DMA MAP APIs updates the SG table and for_each_sgtable_sg() walks through the original SG table. Fixes: ff13be830333 ("accel/qaic: Add datapath") Fixes: 129776ac2e38 ("accel/qaic: Add control path") Signed-off-by: Pranjal Ramajor Asha Kanojiya Reviewed-by: Jeffrey Hugo Signed-off-by: Jeffrey Hugo Reviewed-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20241004193252.3888544-1-quic_jhugo@quicinc.com Signed-off-by: Sasha Levin --- drivers/accel/qaic/qaic_control.c | 2 +- drivers/accel/qaic/qaic_data.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c index 388abd40024b..f3db3fa91dd5 100644 --- a/drivers/accel/qaic/qaic_control.c +++ b/drivers/accel/qaic/qaic_control.c @@ -496,7 +496,7 @@ static int encode_addr_size_pairs(struct dma_xfer *xfer, struct wrapper_list *wr nents = sgt->nents; nents_dma = nents; *size = QAIC_MANAGE_EXT_MSG_LENGTH - msg_hdr_len - sizeof(**out_trans); - for_each_sgtable_sg(sgt, sg, i) { + for_each_sgtable_dma_sg(sgt, sg, i) { *size -= sizeof(*asp); /* Save 1K for possible follow-up transactions. */ if (*size < SZ_1K) { diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c index ed1a5af434f2..d2f8c70a77a5 100644 --- a/drivers/accel/qaic/qaic_data.c +++ b/drivers/accel/qaic/qaic_data.c @@ -177,7 +177,7 @@ static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_tabl nents = 0; size = size ? size : PAGE_SIZE; - for (sg = sgt_in->sgl; sg; sg = sg_next(sg)) { + for_each_sgtable_dma_sg(sgt_in, sg, j) { len = sg_dma_len(sg); if (!len) @@ -214,7 +214,7 @@ static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_tabl /* copy relevant sg node and fix page and length */ sgn = sgf; - for_each_sgtable_sg(sgt, sg, j) { + for_each_sgtable_dma_sg(sgt, sg, j) { memcpy(sg, sgn, sizeof(*sg)); if (sgn == sgf) { sg_dma_address(sg) += offf; @@ -294,7 +294,7 @@ static int encode_reqs(struct qaic_device *qdev, struct bo_slice *slice, * fence. */ dev_addr = req->dev_addr; - for_each_sgtable_sg(slice->sgt, sg, i) { + for_each_sgtable_dma_sg(slice->sgt, sg, i) { slice->reqs[i].cmd = cmd; slice->reqs[i].src_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ? sg_dma_address(sg) : dev_addr); From adde6c68bf1abec1198a9fa3e1407d3214654081 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 3 Sep 2024 06:22:44 +0300 Subject: [PATCH 041/207] drm/msm/dpu: make sure phys resources are properly initialized [ Upstream commit bfecbc2cfba9b06d67d9d249c33d92e570e2fa70 ] The commit b954fa6baaca ("drm/msm/dpu: Refactor rm iterator") removed zero-init of the hw_ctl array, but didn't change the error condition, that checked for hw_ctl[i] being NULL. At the same time because of the early returns in case of an error dpu_encoder_phys might be left with the resources assigned in the previous state. Rework assigning of hw_pp / hw_ctl to the dpu_encoder_phys in order to make sure they are always set correctly. Fixes: b954fa6baaca ("drm/msm/dpu: Refactor rm iterator") Suggested-by: Abhinav Kumar Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/612233/ Link: https://lore.kernel.org/r/20240903-dpu-mode-config-width-v6-1-617e1ecc4b7a@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 6262ec5e4020..10c68de1bf22 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1122,21 +1122,20 @@ static void dpu_encoder_virt_atomic_mode_set(struct drm_encoder *drm_enc, for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - if (!dpu_enc->hw_pp[i]) { + phys->hw_pp = dpu_enc->hw_pp[i]; + if (!phys->hw_pp) { DPU_ERROR_ENC(dpu_enc, "no pp block assigned at idx: %d\n", i); return; } - if (!hw_ctl[i]) { + phys->hw_ctl = i < num_ctl ? to_dpu_hw_ctl(hw_ctl[i]) : NULL; + if (!phys->hw_ctl) { DPU_ERROR_ENC(dpu_enc, "no ctl block assigned at idx: %d\n", i); return; } - phys->hw_pp = dpu_enc->hw_pp[i]; - phys->hw_ctl = to_dpu_hw_ctl(hw_ctl[i]); - phys->cached_mode = crtc_state->adjusted_mode; if (phys->ops.atomic_mode_set) phys->ops.atomic_mode_set(phys, crtc_state, conn_state); From dd58a5f8b308f703da491688cf4cfa5f9900b938 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 3 Sep 2024 06:22:46 +0300 Subject: [PATCH 042/207] drm/msm/dpu: check for overflow in _dpu_crtc_setup_lm_bounds() [ Upstream commit 3a0851b442d1f63ba42ecfa2506d3176cfabf9d4 ] Make _dpu_crtc_setup_lm_bounds() check that CRTC width is not overflowing LM requirements. Rename the function accordingly. Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Reviewed-by: Abhinav Kumar Tested-by: Abhinav Kumar # sc7280 Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/612237/ Link: https://lore.kernel.org/r/20240903-dpu-mode-config-width-v6-3-617e1ecc4b7a@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index e238e4e8116c..ad57368dc13f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -722,12 +722,13 @@ void dpu_crtc_complete_commit(struct drm_crtc *crtc) _dpu_crtc_complete_flip(crtc); } -static void _dpu_crtc_setup_lm_bounds(struct drm_crtc *crtc, +static int _dpu_crtc_check_and_setup_lm_bounds(struct drm_crtc *crtc, struct drm_crtc_state *state) { struct dpu_crtc_state *cstate = to_dpu_crtc_state(state); struct drm_display_mode *adj_mode = &state->adjusted_mode; u32 crtc_split_width = adj_mode->hdisplay / cstate->num_mixers; + struct dpu_kms *dpu_kms = _dpu_crtc_get_kms(crtc); int i; for (i = 0; i < cstate->num_mixers; i++) { @@ -738,7 +739,12 @@ static void _dpu_crtc_setup_lm_bounds(struct drm_crtc *crtc, r->y2 = adj_mode->vdisplay; trace_dpu_crtc_setup_lm_bounds(DRMID(crtc), i, r); + + if (drm_rect_width(r) > dpu_kms->catalog->caps->max_mixer_width) + return -E2BIG; } + + return 0; } static void _dpu_crtc_get_pcc_coeff(struct drm_crtc_state *state, @@ -814,7 +820,7 @@ static void dpu_crtc_atomic_begin(struct drm_crtc *crtc, DRM_DEBUG_ATOMIC("crtc%d\n", crtc->base.id); - _dpu_crtc_setup_lm_bounds(crtc, crtc->state); + _dpu_crtc_check_and_setup_lm_bounds(crtc, crtc->state); /* encoder will trigger pending mask now */ drm_for_each_encoder_mask(encoder, crtc->dev, crtc->state->encoder_mask) @@ -1208,8 +1214,11 @@ static int dpu_crtc_atomic_check(struct drm_crtc *crtc, if (crtc_state->active_changed) crtc_state->mode_changed = true; - if (cstate->num_mixers) - _dpu_crtc_setup_lm_bounds(crtc, crtc_state); + if (cstate->num_mixers) { + rc = _dpu_crtc_check_and_setup_lm_bounds(crtc, crtc_state); + if (rc) + return rc; + } /* FIXME: move this to dpu_plane_atomic_check? */ drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) { From 9f2361089152467f4819828f7826f1a9aebe1e9a Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Mon, 7 Oct 2024 01:01:48 -0400 Subject: [PATCH 043/207] drm/msm/dsi: improve/fix dsc pclk calculation [ Upstream commit 24436a540d16ca6a523b8e5441180001c31b6b35 ] drm_mode_vrefresh() can introduce a large rounding error, avoid it. Fixes: 7c9e4a554d4a ("drm/msm/dsi: Reduce pclk rate for compression") Signed-off-by: Jonathan Marek Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/618432/ Link: https://lore.kernel.org/r/20241007050157.26855-1-jonathan@marek.ca Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dsi/dsi_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 77b805eacb1b..fcfe612416b0 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -537,7 +537,7 @@ static unsigned long dsi_adjust_pclk_for_compression(const struct drm_display_mo int new_htotal = mode->htotal - mode->hdisplay + new_hdisplay; - return new_htotal * mode->vtotal * drm_mode_vrefresh(mode); + return mult_frac(mode->clock * 1000u, new_htotal, mode->htotal); } static unsigned long dsi_get_pclk_rate(const struct drm_display_mode *mode, From 33ece6c4f0c1e1133f54cc53b73fbc27965a585e Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Mon, 7 Oct 2024 01:01:49 -0400 Subject: [PATCH 044/207] drm/msm/dsi: fix 32-bit signed integer extension in pclk_rate calculation [ Upstream commit 358b762400bd94db2a14a72dfcef74c7da6bd845 ] When (mode->clock * 1000) is larger than (1<<31), int to unsigned long conversion will sign extend the int to 64 bits and the pclk_rate value will be incorrect. Fix this by making the result of the multiplication unsigned. Note that above (1<<32) would still be broken and require more changes, but its unlikely anyone will need that anytime soon. Fixes: c4d8cfe516dc ("drm/msm/dsi: add implementation for helper functions") Signed-off-by: Jonathan Marek Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/618434/ Link: https://lore.kernel.org/r/20241007050157.26855-2-jonathan@marek.ca Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dsi/dsi_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index fcfe612416b0..f920329fe2e0 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -545,7 +545,7 @@ static unsigned long dsi_get_pclk_rate(const struct drm_display_mode *mode, { unsigned long pclk_rate; - pclk_rate = mode->clock * 1000; + pclk_rate = mode->clock * 1000u; if (dsc) pclk_rate = dsi_adjust_pclk_for_compression(mode, dsc); From f7ad916273483748582d97cfa31054ccb19224f3 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 14 Oct 2024 09:36:08 -0700 Subject: [PATCH 045/207] drm/msm: Avoid NULL dereference in msm_disp_state_print_regs() [ Upstream commit 293f53263266bc4340d777268ab4328a97f041fa ] If the allocation in msm_disp_state_dump_regs() failed then `block->state` can be NULL. The msm_disp_state_print_regs() function _does_ have code to try to handle it with: if (*reg) dump_addr = *reg; ...but since "dump_addr" is initialized to NULL the above is actually a noop. The code then goes on to dereference `dump_addr`. Make the function print "Registers not stored" when it sees a NULL to solve this. Since we're touching the code, fix msm_disp_state_print_regs() not to pointlessly take a double-pointer and properly mark the pointer as `const`. Fixes: 98659487b845 ("drm/msm: add support to take dpu snapshot") Signed-off-by: Douglas Anderson Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/619657/ Link: https://lore.kernel.org/r/20241014093605.1.Ia1217cecec9ef09eb3c6d125360cc6c8574b0e73@changeid Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c index add72bbc28b1..bb149281d31f 100644 --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c @@ -48,20 +48,21 @@ static void msm_disp_state_dump_regs(u32 **reg, u32 aligned_len, void __iomem *b } } -static void msm_disp_state_print_regs(u32 **reg, u32 len, void __iomem *base_addr, - struct drm_printer *p) +static void msm_disp_state_print_regs(const u32 *dump_addr, u32 len, + void __iomem *base_addr, struct drm_printer *p) { int i; - u32 *dump_addr = NULL; void __iomem *addr; u32 num_rows; + if (!dump_addr) { + drm_printf(p, "Registers not stored\n"); + return; + } + addr = base_addr; num_rows = len / REG_DUMP_ALIGN; - if (*reg) - dump_addr = *reg; - for (i = 0; i < num_rows; i++) { drm_printf(p, "0x%lx : %08x %08x %08x %08x\n", (unsigned long)(addr - base_addr), @@ -89,7 +90,7 @@ void msm_disp_state_print(struct msm_disp_state *state, struct drm_printer *p) list_for_each_entry_safe(block, tmp, &state->blocks, node) { drm_printf(p, "====================%s================\n", block->name); - msm_disp_state_print_regs(&block->state, block->size, block->base_addr, p); + msm_disp_state_print_regs(block->state, block->size, block->base_addr, p); } drm_printf(p, "===================dpu drm state================\n"); From d4e44b8a6a188eb159a95fe61b2672596fdbcfac Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 14 Oct 2024 09:36:09 -0700 Subject: [PATCH 046/207] drm/msm: Allocate memory for disp snapshot with kvzalloc() [ Upstream commit e4a45582db1b792c57bdb52c45958264f7fcfbdc ] With the "drm/msm: add a display mmu fault handler" series [1] we saw issues in the field where memory allocation was failing when allocating space for registers in msm_disp_state_dump_regs(). Specifically we were seeing an order 5 allocation fail. It's not surprising that order 5 allocations will sometimes fail after the system has been up and running for a while. There's no need here for contiguous memory. Change the allocation to kvzalloc() which should make it much less likely to fail. [1] https://lore.kernel.org/r/20240628214848.4075651-1-quic_abhinavk@quicinc.com/ Fixes: 98659487b845 ("drm/msm: add support to take dpu snapshot") Signed-off-by: Douglas Anderson Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/619658/ Link: https://lore.kernel.org/r/20241014093605.2.I72441365ffe91f3dceb17db0a8ec976af8139590@changeid Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c index bb149281d31f..4d55e3cf570f 100644 --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c @@ -26,7 +26,7 @@ static void msm_disp_state_dump_regs(u32 **reg, u32 aligned_len, void __iomem *b end_addr = base_addr + aligned_len; if (!(*reg)) - *reg = kzalloc(len_padded, GFP_KERNEL); + *reg = kvzalloc(len_padded, GFP_KERNEL); if (*reg) dump_addr = *reg; @@ -162,7 +162,7 @@ void msm_disp_state_free(void *data) list_for_each_entry_safe(block, tmp, &disp_state->blocks, node) { list_del(&block->node); - kfree(block->state); + kvfree(block->state); kfree(block); } From 95e3da26819da1f4297a17759b7359b68b542320 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Mon, 14 Oct 2024 09:07:17 -0700 Subject: [PATCH 047/207] firmware: arm_scmi: Queue in scmi layer for mailbox implementation [ Upstream commit da1642bc97c4ef67f347edcd493bd0a52f88777b ] send_message() does not block in the MBOX implementation. This is because the mailbox layer has its own queue. However, this confuses the per xfer timeouts as they all start their timeout ticks in parallel. Consider a case where the xfer timeout is 30ms and a SCMI transaction takes 25ms: | 0ms: Message #0 is queued in mailbox layer and sent out, then sits | at scmi_wait_for_message_response() with a timeout of 30ms | 1ms: Message #1 is queued in mailbox layer but not sent out yet. | Since send_message() doesn't block, it also sits at | scmi_wait_for_message_response() with a timeout of 30ms | ... | 25ms: Message #0 is completed, txdone is called and message #1 is sent | 31ms: Message #1 times out since the count started at 1ms. Even though | it has only been inflight for 6ms. Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type") Signed-off-by: Justin Chen Message-Id: <20241014160717.1678953-1-justin.chen@broadcom.com> Reviewed-by: Cristian Marussi Tested-by: Cristian Marussi Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/mailbox.c | 32 +++++++++++++++++++---------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c index b8d470417e8f..8e513f70b75d 100644 --- a/drivers/firmware/arm_scmi/mailbox.c +++ b/drivers/firmware/arm_scmi/mailbox.c @@ -23,6 +23,7 @@ * @chan_receiver: Optional Receiver mailbox unidirectional channel * @cinfo: SCMI channel info * @shmem: Transmit/Receive shared memory area + * @chan_lock: Lock that prevents multiple xfers from being queued */ struct scmi_mailbox { struct mbox_client cl; @@ -30,6 +31,7 @@ struct scmi_mailbox { struct mbox_chan *chan_receiver; struct scmi_chan_info *cinfo; struct scmi_shared_mem __iomem *shmem; + struct mutex chan_lock; }; #define client_to_scmi_mailbox(c) container_of(c, struct scmi_mailbox, cl) @@ -228,6 +230,7 @@ static int mailbox_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, cinfo->transport_info = smbox; smbox->cinfo = cinfo; + mutex_init(&smbox->chan_lock); return 0; } @@ -255,13 +258,23 @@ static int mailbox_send_message(struct scmi_chan_info *cinfo, struct scmi_mailbox *smbox = cinfo->transport_info; int ret; + /* + * The mailbox layer has its own queue. However the mailbox queue + * confuses the per message SCMI timeouts since the clock starts when + * the message is submitted into the mailbox queue. So when multiple + * messages are queued up the clock starts on all messages instead of + * only the one inflight. + */ + mutex_lock(&smbox->chan_lock); + ret = mbox_send_message(smbox->chan, xfer); + /* mbox_send_message returns non-negative value on success */ + if (ret < 0) { + mutex_unlock(&smbox->chan_lock); + return ret; + } - /* mbox_send_message returns non-negative value on success, so reset */ - if (ret > 0) - ret = 0; - - return ret; + return 0; } static void mailbox_mark_txdone(struct scmi_chan_info *cinfo, int ret, @@ -269,13 +282,10 @@ static void mailbox_mark_txdone(struct scmi_chan_info *cinfo, int ret, { struct scmi_mailbox *smbox = cinfo->transport_info; - /* - * NOTE: we might prefer not to need the mailbox ticker to manage the - * transfer queueing since the protocol layer queues things by itself. - * Unfortunately, we have to kick the mailbox framework after we have - * received our message. - */ mbox_client_txdone(smbox->chan, ret); + + /* Release channel */ + mutex_unlock(&smbox->chan_lock); } static void mailbox_fetch_response(struct scmi_chan_info *cinfo, From a17874a3a5ab418fef90e35ae9fbf4c85cd3371d Mon Sep 17 00:00:00 2001 From: Kai Shen Date: Thu, 10 Oct 2024 11:56:24 +0000 Subject: [PATCH 048/207] net/smc: Fix memory leak when using percpu refs [ Upstream commit 25c12b459db8365fee84b63f3dd7910f70627f29 ] This patch adds missing percpu_ref_exit when releasing percpu refs. When releasing percpu refs, percpu_ref_exit should be called. Otherwise, memory leak happens. Fixes: 79a22238b4f2 ("net/smc: Use percpu ref for wr tx reference") Signed-off-by: Kai Shen Reviewed-by: Dust Li Reviewed-by: Wenjia Zhang Link: https://patch.msgid.link/20241010115624.7769-1-KaiShen@linux.alibaba.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/smc/smc_wr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 0021065a600a..994c0cd4fddb 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -648,8 +648,10 @@ void smc_wr_free_link(struct smc_link *lnk) smc_wr_tx_wait_no_pending_sends(lnk); percpu_ref_kill(&lnk->wr_reg_refs); wait_for_completion(&lnk->reg_ref_comp); + percpu_ref_exit(&lnk->wr_reg_refs); percpu_ref_kill(&lnk->wr_tx_refs); wait_for_completion(&lnk->tx_ref_comp); + percpu_ref_exit(&lnk->wr_tx_refs); if (lnk->wr_rx_dma_addr) { ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, @@ -912,11 +914,13 @@ int smc_wr_create_link(struct smc_link *lnk) init_waitqueue_head(&lnk->wr_reg_wait); rc = percpu_ref_init(&lnk->wr_reg_refs, smcr_wr_reg_refs_free, 0, GFP_KERNEL); if (rc) - goto dma_unmap; + goto cancel_ref; init_completion(&lnk->reg_ref_comp); init_waitqueue_head(&lnk->wr_rx_empty_wait); return rc; +cancel_ref: + percpu_ref_exit(&lnk->wr_tx_refs); dma_unmap: if (lnk->wr_rx_v2_dma_addr) { ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr, From bc52115cc41a611651bb88f20dd90100d321e98d Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 10 Oct 2024 15:19:14 +0200 Subject: [PATCH 049/207] net: usb: usbnet: fix race in probe failure [ Upstream commit b62f4c186c70aa235fef2da68d07325d85ca3ade ] The same bug as in the disconnect code path also exists in the case of a failure late during the probe process. The flag must also be set. Signed-off-by: Oliver Neukum Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://patch.msgid.link/20241010131934.1499695-1-oneukum@suse.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/usb/usbnet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 60c58dd6d253..4f5a3a4aac89 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1874,6 +1874,7 @@ out1: * may trigger an error resubmitting itself and, worse, * schedule a timer. So we kill it all just in case. */ + usbnet_mark_going_away(dev); cancel_work_sync(&dev->kevent); del_timer_sync(&dev->delay); free_percpu(net->tstats); From e61fa0d66f2253c52cbaa03940b143428a57f02f Mon Sep 17 00:00:00 2001 From: Paritosh Dixit Date: Thu, 10 Oct 2024 10:29:08 -0400 Subject: [PATCH 050/207] net: stmmac: dwmac-tegra: Fix link bring-up sequence [ Upstream commit 1cff6ff302f5703a627f9ee1d99131161ea2683e ] The Tegra MGBE driver sometimes fails to initialize, reporting the following error, and as a result, it is unable to acquire an IP address with DHCP: tegra-mgbe 6800000.ethernet: timeout waiting for link to become ready As per the recommendation from the Tegra hardware design team, fix this issue by: - clearing the PHY_RDY bit before setting the CDR_RESET bit and then setting PHY_RDY bit before clearing CDR_RESET bit. This ensures valid data is present at UPHY RX inputs before starting the CDR lock. - adding the required delays when bringing up the UPHY lane. Note we need to use delays here because there is no alternative, such as polling, for these cases. Using the usleep_range() instead of ndelay() as sleeping is preferred over busy wait loop. Without this change we would see link failures on boot sometimes as often as 1 in 5 boots. With this fix we have not observed any failures in over 1000 boots. Fixes: d8ca113724e7 ("net: stmmac: tegra: Add MGBE support") Signed-off-by: Paritosh Dixit Link: https://patch.msgid.link/20241010142908.602712-1-paritoshd@nvidia.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../net/ethernet/stmicro/stmmac/dwmac-tegra.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c index e0f3cbd36852..e2d61a3a7712 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c @@ -127,10 +127,12 @@ static int mgbe_uphy_lane_bringup_serdes_up(struct net_device *ndev, void *mgbe_ value &= ~XPCS_WRAP_UPHY_RX_CONTROL_AUX_RX_IDDQ; writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + usleep_range(10, 20); /* 50ns min delay needed as per HW design */ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); value &= ~XPCS_WRAP_UPHY_RX_CONTROL_RX_SLEEP; writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + usleep_range(10, 20); /* 500ns min delay needed as per HW design */ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_CAL_EN; writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); @@ -143,22 +145,30 @@ static int mgbe_uphy_lane_bringup_serdes_up(struct net_device *ndev, void *mgbe_ return err; } + usleep_range(10, 20); /* 50ns min delay needed as per HW design */ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_DATA_EN; writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value &= ~XPCS_WRAP_UPHY_RX_CONTROL_RX_PCS_PHY_RDY; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + + usleep_range(10, 20); /* 50ns min delay needed as per HW design */ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_CDR_RESET; writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); - value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); - value &= ~XPCS_WRAP_UPHY_RX_CONTROL_RX_CDR_RESET; - writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); - + usleep_range(10, 20); /* 50ns min delay needed as per HW design */ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_PCS_PHY_RDY; writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + msleep(30); /* 30ms delay needed as per HW design */ + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value &= ~XPCS_WRAP_UPHY_RX_CONTROL_RX_CDR_RESET; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + err = readl_poll_timeout(mgbe->xpcs + XPCS_WRAP_IRQ_STATUS, value, value & XPCS_WRAP_IRQ_STATUS_PCS_LINK_STS, 500, 500 * 2000); From a4bc03078eb31637a0d690cbfdec21da4ab0726b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 10 Oct 2024 16:45:19 +0100 Subject: [PATCH 051/207] octeontx2-af: Fix potential integer overflows on integer shifts [ Upstream commit 637c4f6fe40befa04f19c38b5d15429cbb9191d9 ] The left shift int 32 bit integer constants 1 is evaluated using 32 bit arithmetic and then assigned to a 64 bit unsigned integer. In the case where the shift is 32 or more this can lead to an overflow. Avoid this by shifting using the BIT_ULL macro instead. Fixes: 019aba04f08c ("octeontx2-af: Modify SMQ flush sequence to drop packets") Signed-off-by: Colin Ian King Reviewed-by: Dan Carpenter Link: https://patch.msgid.link/20241010154519.768785-1-colin.i.king@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 224a025283ca..29487518ca67 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -2298,7 +2298,7 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr, NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link)); if (!(cfg & BIT_ULL(12))) continue; - bmap |= (1 << i); + bmap |= BIT_ULL(i); cfg &= ~BIT_ULL(12); rvu_write64(rvu, blkaddr, NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link), cfg); @@ -2319,7 +2319,7 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr, /* Set NIX_AF_TL3_TL2_LINKX_CFG[ENA] for the TL3/TL2 queue */ for (i = 0; i < (rvu->hw->cgx_links + rvu->hw->lbk_links); i++) { - if (!(bmap & (1 << i))) + if (!(bmap & BIT_ULL(i))) continue; cfg = rvu_read64(rvu, blkaddr, NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link)); From 65ca9f8ddc0d1e66770cd5c4a7555a7169dac0d8 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 8 Oct 2024 19:01:48 +0530 Subject: [PATCH 052/207] drm/amd/amdgpu: Fix double unlock in amdgpu_mes_add_ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e7457532cb7167516263150ceae86f36d6ef9683 ] This patch addresses a double unlock issue in the amdgpu_mes_add_ring function. The mutex was being unlocked twice under certain error conditions, which could lead to undefined behavior. The fix ensures that the mutex is unlocked only once before jumping to the clean_up_memory label. The unlock operation is moved to just before the goto statement within the conditional block that checks the return value of amdgpu_ring_init. This prevents the second unlock attempt after the clean_up_memory label, which is no longer necessary as the mutex is already unlocked by this point in the code flow. This change resolves the potential double unlock and maintains the correct mutex handling throughout the function. Fixes below: Commit d0c423b64765 ("drm/amdgpu/mes: use ring for kernel queue submission"), leads to the following Smatch static checker warning: drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1240 amdgpu_mes_add_ring() warn: double unlock '&adev->mes.mutex_hidden' (orig line 1213) drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 1143 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, 1144 int queue_type, int idx, 1145 struct amdgpu_mes_ctx_data *ctx_data, 1146 struct amdgpu_ring **out) 1147 { 1148 struct amdgpu_ring *ring; 1149 struct amdgpu_mes_gang *gang; 1150 struct amdgpu_mes_queue_properties qprops = {0}; 1151 int r, queue_id, pasid; 1152 1153 /* 1154 * Avoid taking any other locks under MES lock to avoid circular 1155 * lock dependencies. 1156 */ 1157 amdgpu_mes_lock(&adev->mes); 1158 gang = idr_find(&adev->mes.gang_id_idr, gang_id); 1159 if (!gang) { 1160 DRM_ERROR("gang id %d doesn't exist\n", gang_id); 1161 amdgpu_mes_unlock(&adev->mes); 1162 return -EINVAL; 1163 } 1164 pasid = gang->process->pasid; 1165 1166 ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL); 1167 if (!ring) { 1168 amdgpu_mes_unlock(&adev->mes); 1169 return -ENOMEM; 1170 } 1171 1172 ring->ring_obj = NULL; 1173 ring->use_doorbell = true; 1174 ring->is_mes_queue = true; 1175 ring->mes_ctx = ctx_data; 1176 ring->idx = idx; 1177 ring->no_scheduler = true; 1178 1179 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { 1180 int offset = offsetof(struct amdgpu_mes_ctx_meta_data, 1181 compute[ring->idx].mec_hpd); 1182 ring->eop_gpu_addr = 1183 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 1184 } 1185 1186 switch (queue_type) { 1187 case AMDGPU_RING_TYPE_GFX: 1188 ring->funcs = adev->gfx.gfx_ring[0].funcs; 1189 ring->me = adev->gfx.gfx_ring[0].me; 1190 ring->pipe = adev->gfx.gfx_ring[0].pipe; 1191 break; 1192 case AMDGPU_RING_TYPE_COMPUTE: 1193 ring->funcs = adev->gfx.compute_ring[0].funcs; 1194 ring->me = adev->gfx.compute_ring[0].me; 1195 ring->pipe = adev->gfx.compute_ring[0].pipe; 1196 break; 1197 case AMDGPU_RING_TYPE_SDMA: 1198 ring->funcs = adev->sdma.instance[0].ring.funcs; 1199 break; 1200 default: 1201 BUG(); 1202 } 1203 1204 r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1205 AMDGPU_RING_PRIO_DEFAULT, NULL); 1206 if (r) 1207 goto clean_up_memory; 1208 1209 amdgpu_mes_ring_to_queue_props(adev, ring, &qprops); 1210 1211 dma_fence_wait(gang->process->vm->last_update, false); 1212 dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false); 1213 amdgpu_mes_unlock(&adev->mes); ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1214 1215 r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id); 1216 if (r) 1217 goto clean_up_ring; ^^^^^^^^^^^^^^^^^^ 1218 1219 ring->hw_queue_id = queue_id; 1220 ring->doorbell_index = qprops.doorbell_off; 1221 1222 if (queue_type == AMDGPU_RING_TYPE_GFX) 1223 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id); 1224 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) 1225 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id, 1226 queue_id); 1227 else if (queue_type == AMDGPU_RING_TYPE_SDMA) 1228 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id, 1229 queue_id); 1230 else 1231 BUG(); 1232 1233 *out = ring; 1234 return 0; 1235 1236 clean_up_ring: 1237 amdgpu_ring_fini(ring); 1238 clean_up_memory: 1239 kfree(ring); --> 1240 amdgpu_mes_unlock(&adev->mes); ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1241 return r; 1242 } Fixes: d0c423b64765 ("drm/amdgpu/mes: use ring for kernel queue submission") Cc: Christian König Cc: Alex Deucher Cc: Hawking Zhang Suggested-by: Jack Xiao Reported by: Dan Carpenter Signed-off-by: Srinivasan Shanmugam Reviewed-by: Jack Xiao Signed-off-by: Alex Deucher (cherry picked from commit bfaf1883605fd0c0dbabacd67ed49708470d5ea4) Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index c5c55e132af2..5e3abdd0805b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1053,8 +1053,10 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); - if (r) + if (r) { + amdgpu_mes_unlock(&adev->mes); goto clean_up_memory; + } amdgpu_mes_ring_to_queue_props(adev, ring, &qprops); @@ -1087,7 +1089,6 @@ clean_up_ring: amdgpu_ring_fini(ring); clean_up_memory: kfree(ring); - amdgpu_mes_unlock(&adev->mes); return r; } From 268446b40bb144f777ae962b5d9e053b2eeab5ad Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 11 Oct 2024 17:16:37 +0200 Subject: [PATCH 053/207] macsec: don't increment counters for an unrelated SA [ Upstream commit cf58aefb1332db322060cad4a330d5f9292b0f41 ] On RX, we shouldn't be incrementing the stats for an arbitrary SA in case the actual SA hasn't been set up. Those counters are intended to track packets for their respective AN when the SA isn't currently configured. Due to the way MACsec is implemented, we don't keep counters unless the SA is configured, so we can't track those packets, and those counters will remain at 0. The RXSC's stats keeps track of those packets without telling us which AN they belonged to. We could add counters for non-existent SAs, and then find a way to integrate them in the dump to userspace, but I don't think it's worth the effort. Fixes: 91ec9bd57f35 ("macsec: Fix traffic counters/statistics") Reported-by: Paolo Abeni Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/f5ac92aaa5b89343232615f4c03f9f95042c6aa0.1728657709.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 778fb77c5a93..2ada8baf815b 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -151,19 +151,6 @@ static struct macsec_rx_sa *macsec_rxsa_get(struct macsec_rx_sa __rcu *ptr) return sa; } -static struct macsec_rx_sa *macsec_active_rxsa_get(struct macsec_rx_sc *rx_sc) -{ - struct macsec_rx_sa *sa = NULL; - int an; - - for (an = 0; an < MACSEC_NUM_AN; an++) { - sa = macsec_rxsa_get(rx_sc->sa[an]); - if (sa) - break; - } - return sa; -} - static void free_rx_sc_rcu(struct rcu_head *head) { struct macsec_rx_sc *rx_sc = container_of(head, struct macsec_rx_sc, rcu_head); @@ -1205,15 +1192,12 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) /* If validateFrames is Strict or the C bit in the * SecTAG is set, discard */ - struct macsec_rx_sa *active_rx_sa = macsec_active_rxsa_get(rx_sc); if (hdr->tci_an & MACSEC_TCI_C || secy->validate_frames == MACSEC_VALIDATE_STRICT) { u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsNotUsingSA++; u64_stats_update_end(&rxsc_stats->syncp); DEV_STATS_INC(secy->netdev, rx_errors); - if (active_rx_sa) - this_cpu_inc(active_rx_sa->stats->InPktsNotUsingSA); goto drop_nosa; } @@ -1223,8 +1207,6 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsUnusedSA++; u64_stats_update_end(&rxsc_stats->syncp); - if (active_rx_sa) - this_cpu_inc(active_rx_sa->stats->InPktsUnusedSA); goto deliver; } From 681ce79ab6fba2f8d1c5ea60239f0086baebd0d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 12 Oct 2024 09:42:30 +0000 Subject: [PATCH 054/207] netdevsim: use cond_resched() in nsim_dev_trap_report_work() [ Upstream commit a1494d532e28598bde7a5544892ef9c7dbfafa93 ] I am still seeing many syzbot reports hinting that syzbot might fool nsim_dev_trap_report_work() with hundreds of ports [1] Lets use cond_resched(), and system_unbound_wq instead of implicit system_wq. [1] INFO: task syz-executor:20633 blocked for more than 143 seconds. Not tainted 6.12.0-rc2-syzkaller-00205-g1d227fcc7222 #0 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:syz-executor state:D stack:25856 pid:20633 tgid:20633 ppid:1 flags:0x00004006 ... NMI backtrace for cpu 1 CPU: 1 UID: 0 PID: 16760 Comm: kworker/1:0 Not tainted 6.12.0-rc2-syzkaller-00205-g1d227fcc7222 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Workqueue: events nsim_dev_trap_report_work RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x70 kernel/kcov.c:210 Code: 89 fb e8 23 00 00 00 48 8b 3d 04 fb 9c 0c 48 89 de 5b e9 c3 c7 5d 00 0f 1f 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1e fa 48 8b 04 24 65 48 8b 0c 25 c0 d7 03 00 65 8b 15 60 f0 RSP: 0018:ffffc90000a187e8 EFLAGS: 00000246 RAX: 0000000000000100 RBX: ffffc90000a188e0 RCX: ffff888027d3bc00 RDX: ffff888027d3bc00 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff88804a2e6000 R08: ffffffff8a4bc495 R09: ffffffff89da3577 R10: 0000000000000004 R11: ffffffff8a4bc2b0 R12: dffffc0000000000 R13: ffff88806573b503 R14: dffffc0000000000 R15: ffff8880663cca00 FS: 0000000000000000(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc90a747f98 CR3: 000000000e734000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 000000000000002b DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Call Trace: __local_bh_enable_ip+0x1bb/0x200 kernel/softirq.c:382 spin_unlock_bh include/linux/spinlock.h:396 [inline] nsim_dev_trap_report drivers/net/netdevsim/dev.c:820 [inline] nsim_dev_trap_report_work+0x75d/0xaa0 drivers/net/netdevsim/dev.c:850 process_one_work kernel/workqueue.c:3229 [inline] process_scheduled_works+0xa63/0x1850 kernel/workqueue.c:3310 worker_thread+0x870/0xd30 kernel/workqueue.c:3391 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Fixes: ba5e1272142d ("netdevsim: avoid potential loop in nsim_dev_trap_report_work()") Reported-by: syzbot+d383dc9579a76f56c251@syzkaller.appspotmail.com Reported-by: syzbot+c596faae21a68bf7afd0@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Cc: Jiri Pirko Link: https://patch.msgid.link/20241012094230.3893510-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/netdevsim/dev.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 92a7a36b93ac..3e0b61202f0c 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -836,7 +836,8 @@ static void nsim_dev_trap_report_work(struct work_struct *work) nsim_dev = nsim_trap_data->nsim_dev; if (!devl_trylock(priv_to_devlink(nsim_dev))) { - schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, 1); + queue_delayed_work(system_unbound_wq, + &nsim_dev->trap_data->trap_report_dw, 1); return; } @@ -848,11 +849,12 @@ static void nsim_dev_trap_report_work(struct work_struct *work) continue; nsim_dev_trap_report(nsim_dev_port); + cond_resched(); } devl_unlock(priv_to_devlink(nsim_dev)); - - schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, - msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS)); + queue_delayed_work(system_unbound_wq, + &nsim_dev->trap_data->trap_report_dw, + msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS)); } static int nsim_dev_traps_init(struct devlink *devlink) @@ -907,8 +909,9 @@ static int nsim_dev_traps_init(struct devlink *devlink) INIT_DELAYED_WORK(&nsim_dev->trap_data->trap_report_dw, nsim_dev_trap_report_work); - schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, - msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS)); + queue_delayed_work(system_unbound_wq, + &nsim_dev->trap_data->trap_report_dw, + msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS)); return 0; From 79571c4d2f65c4e782977f7857e63b0082cf2fbb Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Sat, 12 Oct 2024 19:04:34 +0800 Subject: [PATCH 055/207] net: ethernet: aeroflex: fix potential memory leak in greth_start_xmit_gbit() [ Upstream commit cf57b5d7a2aad456719152ecd12007fe031628a3 ] The greth_start_xmit_gbit() returns NETDEV_TX_OK without freeing skb in case of skb->len being too long, add dev_kfree_skb() to fix it. Fixes: d4c41139df6e ("net: Add Aeroflex Gaisler 10/100/1G Ethernet MAC driver") Signed-off-by: Wang Hai Reviewed-by: Gerhard Engleder Link: https://patch.msgid.link/20241012110434.49265-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/aeroflex/greth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index 597a02c75d52..e624d31d20d8 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -484,7 +484,7 @@ greth_start_xmit_gbit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb->len > MAX_FRAME_SIZE)) { dev->stats.tx_errors++; - goto out; + goto len_error; } /* Save skb pointer. */ @@ -575,6 +575,7 @@ frag_map_error: map_error: if (net_ratelimit()) dev_warn(greth->dev, "Could not create TX DMA mapping\n"); +len_error: dev_kfree_skb(skb); out: return err; From 0017189d60577896ccffd8891fd14f7847f7f27f Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 14 Oct 2024 19:53:21 +0800 Subject: [PATCH 056/207] net/smc: Fix searching in list of known pnetids in smc_pnet_add_pnetid [ Upstream commit 82ac39ebd6db0c9f7a97a934bda1e3e101a9d201 ] pnetid of pi (not newly allocated pe) should be compared Fixes: e888a2e8337c ("net/smc: introduce list of pnetids for Ethernet devices") Reviewed-by: D. Wythe Reviewed-by: Wen Gu Signed-off-by: Li RongQing Reviewed-by: Simon Horman Reviewed-by: Gerd Bayer Link: https://patch.msgid.link/20241014115321.33234-1-lirongqing@baidu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/smc/smc_pnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 306b536fa89e..284cec1e20ec 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -753,7 +753,7 @@ static int smc_pnet_add_pnetid(struct net *net, u8 *pnetid) write_lock(&sn->pnetids_ndev.lock); list_for_each_entry(pi, &sn->pnetids_ndev.list, list) { - if (smc_pnet_match(pnetid, pe->pnetid)) { + if (smc_pnet_match(pnetid, pi->pnetid)) { refcount_inc(&pi->refcnt); kfree(pe); goto unlock; From b01fbbf385849231e62ce18291eaaf74f1f30f75 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 14 Oct 2024 22:37:04 +0800 Subject: [PATCH 057/207] net: xilinx: axienet: fix potential memory leak in axienet_start_xmit() [ Upstream commit 99714e37e8333bbc22496fe80f241d5b35380e83 ] The axienet_start_xmit() returns NETDEV_TX_OK without freeing skb in case of dma_map_single() fails, add dev_kfree_skb_any() to fix it. Fixes: 71791dc8bdea ("net: axienet: Check for DMA mapping errors") Signed-off-by: Wang Hai Reviewed-by: Radhey Shyam Pandey Link: https://patch.msgid.link/20241014143704.31938-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 62c10eb4f0ad..9f779653ed62 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -845,6 +845,7 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (net_ratelimit()) netdev_err(ndev, "TX DMA mapping error\n"); ndev->stats.tx_dropped++; + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } desc_set_phys_addr(lp, phys, cur_p); @@ -865,6 +866,7 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) ndev->stats.tx_dropped++; axienet_free_tx_chain(lp, orig_tail_ptr, ii + 1, true, NULL, 0); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } desc_set_phys_addr(lp, phys, cur_p); From 08b8f206de4ceadf5b6dac76008701e69acf21f1 Mon Sep 17 00:00:00 2001 From: Dimitar Kanaliev Date: Mon, 14 Oct 2024 15:11:53 +0300 Subject: [PATCH 058/207] bpf: Fix truncation bug in coerce_reg_to_size_sx() [ Upstream commit ae67b9fb8c4e981e929a665dcaa070f4b05ebdb4 ] coerce_reg_to_size_sx() updates the register state after a sign-extension operation. However, there's a bug in the assignment order of the unsigned min/max values, leading to incorrect truncation: 0: (85) call bpf_get_prandom_u32#7 ; R0_w=scalar() 1: (57) r0 &= 1 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=1,var_off=(0x0; 0x1)) 2: (07) r0 += 254 ; R0_w=scalar(smin=umin=smin32=umin32=254,smax=umax=smax32=umax32=255,var_off=(0xfe; 0x1)) 3: (bf) r0 = (s8)r0 ; R0_w=scalar(smin=smin32=-2,smax=smax32=-1,umin=umin32=0xfffffffe,umax=0xffffffff,var_off=(0xfffffffffffffffe; 0x1)) In the current implementation, the unsigned 32-bit min/max values (u32_min_value and u32_max_value) are assigned directly from the 64-bit signed min/max values (s64_min and s64_max): reg->umin_value = reg->u32_min_value = s64_min; reg->umax_value = reg->u32_max_value = s64_max; Due to the chain assigmnent, this is equivalent to: reg->u32_min_value = s64_min; // Unintended truncation reg->umin_value = reg->u32_min_value; reg->u32_max_value = s64_max; // Unintended truncation reg->umax_value = reg->u32_max_value; Fixes: 1f9a1ea821ff ("bpf: Support new sign-extension load insns") Reported-by: Shung-Hsi Yu Reported-by: Zac Ecob Signed-off-by: Dimitar Kanaliev Acked-by: Yonghong Song Reviewed-by: Shung-Hsi Yu Link: https://lore.kernel.org/r/20241014121155.92887-2-dimitar.kanaliev@siteground.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d1050479cbb3..28b09ca5525f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6143,10 +6143,10 @@ static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size) /* both of s64_max/s64_min positive or negative */ if ((s64_max >= 0) == (s64_min >= 0)) { - reg->smin_value = reg->s32_min_value = s64_min; - reg->smax_value = reg->s32_max_value = s64_max; - reg->umin_value = reg->u32_min_value = s64_min; - reg->umax_value = reg->u32_max_value = s64_max; + reg->s32_min_value = reg->smin_value = s64_min; + reg->s32_max_value = reg->smax_value = s64_max; + reg->u32_min_value = reg->umin_value = s64_min; + reg->u32_max_value = reg->umax_value = s64_max; reg->var_off = tnum_range(s64_min, s64_max); return; } From 4b70478b984af3c9d0279c121df5ff94e2533dbd Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 14 Oct 2024 22:51:15 +0800 Subject: [PATCH 059/207] net: systemport: fix potential memory leak in bcm_sysport_xmit() [ Upstream commit c401ed1c709948e57945485088413e1bb5e94bd1 ] The bcm_sysport_xmit() returns NETDEV_TX_OK without freeing skb in case of dma_map_single() fails, add dev_kfree_skb() to fix it. Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Wang Hai Link: https://patch.msgid.link/20241014145115.44977-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bcmsysport.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index bf1611cce974..49e890a7e04a 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1359,6 +1359,7 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, netif_err(priv, tx_err, dev, "DMA map failed at %p (len=%d)\n", skb->data, skb_len); ret = NETDEV_TX_OK; + dev_kfree_skb_any(skb); goto out; } From 05cc5e67dda8d0d9193e65df9713de4ad6420491 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 20 Nov 2023 13:18:14 +0200 Subject: [PATCH 060/207] irqchip/renesas-rzg2l: Align struct member names to tabs [ Upstream commit 02f6507640173addeeb3af035d2c6f0b3cff1567 ] Align struct member names to tabs to follow the requirements from maintainer-tip file. 3 tabs were used at the moment as the next commits will add a new member which requires 3 tabs for a better view. Signed-off-by: Claudiu Beznea Signed-off-by: Thomas Gleixner Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20231120111820.87398-4-claudiu.beznea.uj@bp.renesas.com Stable-dep-of: d038109ac1c6 ("irqchip/renesas-rzg2l: Fix missing put_device") Signed-off-by: Sasha Levin --- drivers/irqchip/irq-renesas-rzg2l.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index ea4b921e5e15..3ea312a27492 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -56,9 +56,9 @@ #define TINT_EXTRACT_GPIOINT(x) FIELD_GET(GENMASK(31, 16), (x)) struct rzg2l_irqc_priv { - void __iomem *base; - struct irq_fwspec fwspec[IRQC_NUM_IRQ]; - raw_spinlock_t lock; + void __iomem *base; + struct irq_fwspec fwspec[IRQC_NUM_IRQ]; + raw_spinlock_t lock; }; static struct rzg2l_irqc_priv *irq_data_to_priv(struct irq_data *data) From 0776b25d79d86c994cacc5a9eb12f4b197364560 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 20 Nov 2023 13:18:15 +0200 Subject: [PATCH 061/207] irqchip/renesas-rzg2l: Document structure members [ Upstream commit b94f455372ad6e6b4da8e8ed9864d9c7daaf54b8 ] Document structure members to follow the requirements specified in maintainer-tip, section 4.3.7. Struct declarations and initializers. Signed-off-by: Claudiu Beznea Signed-off-by: Thomas Gleixner Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20231120111820.87398-5-claudiu.beznea.uj@bp.renesas.com Stable-dep-of: d038109ac1c6 ("irqchip/renesas-rzg2l: Fix missing put_device") Signed-off-by: Sasha Levin --- drivers/irqchip/irq-renesas-rzg2l.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index 3ea312a27492..ac925da17876 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -55,6 +55,12 @@ #define TINT_EXTRACT_HWIRQ(x) FIELD_GET(GENMASK(15, 0), (x)) #define TINT_EXTRACT_GPIOINT(x) FIELD_GET(GENMASK(31, 16), (x)) +/** + * struct rzg2l_irqc_priv - IRQ controller private data structure + * @base: Controller's base address + * @fwspec: IRQ firmware specific data + * @lock: Lock to serialize access to hardware registers + */ struct rzg2l_irqc_priv { void __iomem *base; struct irq_fwspec fwspec[IRQC_NUM_IRQ]; From 39cb86f19199e0655134c8e5e320e5dab3c10ad8 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 20 Nov 2023 13:18:18 +0200 Subject: [PATCH 062/207] irqchip/renesas-rzg2l: Add support for suspend to RAM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 74d2ef5f6f4b2437e6292ab2502400e8048db4aa ] The irqchip-renesas-rzg2l driver is used on RZ/G3S SoC. RZ/G3S can go into deep sleep states where power to different SoC's parts is cut off and RAM is switched to self-refresh. The resume from these states is done with the help of the bootloader. The IA55 IRQ controller needs to be reconfigured when resuming from deep sleep state. For this the IA55 registers are cached in suspend and restored in resume. The IA55 IRQ controller is connected to GPIO controller and GIC as follows: ┌──────────┐ ┌──────────┐ │ │ SPIX │ │ │ ├─────────►│ │ │ │ │ │ │ │ │ │ ┌────────┐IRQ0-7 │ IA55 │ │ GIC │ Pin0 ───────►│ ├─────────────►│ │ │ │ │ │ │ │ PPIY │ │ ... │ GPIO │ │ ├─────────►│ │ │ │GPIOINT0-127 │ │ │ │ PinN ───────►│ ├─────────────►│ │ │ │ └────────┘ └──────────┘ └──────────┘ where: - Pin0 is the first GPIO controller pin - PinN is the last GPIO controller pin - SPIX is the SPI interrupt with identifier X - PPIY is the PPI interrupt with identifier Y Implement suspend/resume functionality with syscore_ops to be able to cache/restore the registers after/before the GPIO controller suspend/resume functions are invoked. As the syscore_ops suspend/resume functions do not take any argument make the driver private data static so it can be accessed from the suspend/resume functions. The IA55 interrupt controller is resumed before the GPIO controller. As GPIO pins could be in an a state which causes spurious interrupts, the reconfiguration of the interrupt controller is restricted to restore the interrupt type and leave them disabled. An eventually required interrupt enable operation will be done as part of the GPIO controller resume function after restoring the GPIO state. [ tglx: Massaged changelog ] Signed-off-by: Claudiu Beznea Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20231120111820.87398-8-claudiu.beznea.uj@bp.renesas.com Stable-dep-of: d038109ac1c6 ("irqchip/renesas-rzg2l: Fix missing put_device") Signed-off-by: Sasha Levin --- drivers/irqchip/irq-renesas-rzg2l.c | 68 ++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index ac925da17876..00688043697f 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -18,6 +18,7 @@ #include #include #include +#include #define IRQC_IRQ_START 1 #define IRQC_IRQ_COUNT 8 @@ -55,17 +56,29 @@ #define TINT_EXTRACT_HWIRQ(x) FIELD_GET(GENMASK(15, 0), (x)) #define TINT_EXTRACT_GPIOINT(x) FIELD_GET(GENMASK(31, 16), (x)) +/** + * struct rzg2l_irqc_reg_cache - registers cache (necessary for suspend/resume) + * @iitsr: IITSR register + * @titsr: TITSR registers + */ +struct rzg2l_irqc_reg_cache { + u32 iitsr; + u32 titsr[2]; +}; + /** * struct rzg2l_irqc_priv - IRQ controller private data structure * @base: Controller's base address * @fwspec: IRQ firmware specific data * @lock: Lock to serialize access to hardware registers + * @cache: Registers cache for suspend/resume */ -struct rzg2l_irqc_priv { +static struct rzg2l_irqc_priv { void __iomem *base; struct irq_fwspec fwspec[IRQC_NUM_IRQ]; raw_spinlock_t lock; -}; + struct rzg2l_irqc_reg_cache cache; +} *rzg2l_irqc_data; static struct rzg2l_irqc_priv *irq_data_to_priv(struct irq_data *data) { @@ -282,6 +295,38 @@ static int rzg2l_irqc_set_type(struct irq_data *d, unsigned int type) return irq_chip_set_type_parent(d, IRQ_TYPE_LEVEL_HIGH); } +static int rzg2l_irqc_irq_suspend(void) +{ + struct rzg2l_irqc_reg_cache *cache = &rzg2l_irqc_data->cache; + void __iomem *base = rzg2l_irqc_data->base; + + cache->iitsr = readl_relaxed(base + IITSR); + for (u8 i = 0; i < 2; i++) + cache->titsr[i] = readl_relaxed(base + TITSR(i)); + + return 0; +} + +static void rzg2l_irqc_irq_resume(void) +{ + struct rzg2l_irqc_reg_cache *cache = &rzg2l_irqc_data->cache; + void __iomem *base = rzg2l_irqc_data->base; + + /* + * Restore only interrupt type. TSSRx will be restored at the + * request of pin controller to avoid spurious interrupts due + * to invalid PIN states. + */ + for (u8 i = 0; i < 2; i++) + writel_relaxed(cache->titsr[i], base + TITSR(i)); + writel_relaxed(cache->iitsr, base + IITSR); +} + +static struct syscore_ops rzg2l_irqc_syscore_ops = { + .suspend = rzg2l_irqc_irq_suspend, + .resume = rzg2l_irqc_irq_resume, +}; + static const struct irq_chip irqc_chip = { .name = "rzg2l-irqc", .irq_eoi = rzg2l_irqc_eoi, @@ -366,7 +411,6 @@ static int rzg2l_irqc_init(struct device_node *node, struct device_node *parent) struct irq_domain *irq_domain, *parent_domain; struct platform_device *pdev; struct reset_control *resetn; - struct rzg2l_irqc_priv *priv; int ret; pdev = of_find_device_by_node(node); @@ -379,15 +423,15 @@ static int rzg2l_irqc_init(struct device_node *node, struct device_node *parent) return -ENODEV; } - priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); - if (!priv) + rzg2l_irqc_data = devm_kzalloc(&pdev->dev, sizeof(*rzg2l_irqc_data), GFP_KERNEL); + if (!rzg2l_irqc_data) return -ENOMEM; - priv->base = devm_of_iomap(&pdev->dev, pdev->dev.of_node, 0, NULL); - if (IS_ERR(priv->base)) - return PTR_ERR(priv->base); + rzg2l_irqc_data->base = devm_of_iomap(&pdev->dev, pdev->dev.of_node, 0, NULL); + if (IS_ERR(rzg2l_irqc_data->base)) + return PTR_ERR(rzg2l_irqc_data->base); - ret = rzg2l_irqc_parse_interrupts(priv, node); + ret = rzg2l_irqc_parse_interrupts(rzg2l_irqc_data, node); if (ret) { dev_err(&pdev->dev, "cannot parse interrupts: %d\n", ret); return ret; @@ -410,17 +454,19 @@ static int rzg2l_irqc_init(struct device_node *node, struct device_node *parent) goto pm_disable; } - raw_spin_lock_init(&priv->lock); + raw_spin_lock_init(&rzg2l_irqc_data->lock); irq_domain = irq_domain_add_hierarchy(parent_domain, 0, IRQC_NUM_IRQ, node, &rzg2l_irqc_domain_ops, - priv); + rzg2l_irqc_data); if (!irq_domain) { dev_err(&pdev->dev, "failed to add irq domain\n"); ret = -ENOMEM; goto pm_put; } + register_syscore_ops(&rzg2l_irqc_syscore_ops); + return 0; pm_put: From b18db3a4fa5f69d260a3e495bc6706541e591c88 Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Fri, 11 Oct 2024 18:20:03 +0100 Subject: [PATCH 063/207] irqchip/renesas-rzg2l: Fix missing put_device [ Upstream commit d038109ac1c6bf619473dda03a16a6de58170f7f ] rzg2l_irqc_common_init() calls of_find_device_by_node(), but the corresponding put_device() call is missing. This also gets reported by make coccicheck. Make use of the cleanup interfaces from cleanup.h to call into __free_put_device(), which in turn calls into put_device when leaving function rzg2l_irqc_common_init() and variable "dev" goes out of scope. To prevent that the device is put on successful completion, assign NULL to "dev" to prevent __free_put_device() from calling into put_device() within the successful path. "make coccicheck" will still complain about missing put_device() calls, but those are false positives now. Fixes: 3fed09559cd8 ("irqchip: Add RZ/G2L IA55 Interrupt Controller driver") Signed-off-by: Fabrizio Castro Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241011172003.1242841-1-fabrizio.castro.jz@renesas.com Signed-off-by: Sasha Levin --- drivers/irqchip/irq-renesas-rzg2l.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index 00688043697f..5a7836186fd4 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -408,12 +409,12 @@ static int rzg2l_irqc_parse_interrupts(struct rzg2l_irqc_priv *priv, static int rzg2l_irqc_init(struct device_node *node, struct device_node *parent) { + struct platform_device *pdev = of_find_device_by_node(node); + struct device *dev __free(put_device) = pdev ? &pdev->dev : NULL; struct irq_domain *irq_domain, *parent_domain; - struct platform_device *pdev; struct reset_control *resetn; int ret; - pdev = of_find_device_by_node(node); if (!pdev) return -ENODEV; @@ -467,6 +468,17 @@ static int rzg2l_irqc_init(struct device_node *node, struct device_node *parent) register_syscore_ops(&rzg2l_irqc_syscore_ops); + /* + * Prevent the cleanup function from invoking put_device by assigning + * NULL to dev. + * + * make coccicheck will complain about missing put_device calls, but + * those are false positives, as dev will be automatically "put" via + * __free_put_device on the failing path. + * On the successful path we don't actually want to "put" dev. + */ + dev = NULL; + return 0; pm_put: From 1999a9e050c543eb11059e098b8474c00d52945d Mon Sep 17 00:00:00 2001 From: Jessica Zhang Date: Wed, 9 Oct 2024 20:46:19 -0700 Subject: [PATCH 064/207] drm/msm/dpu: don't always program merge_3d block [ Upstream commit f87f3b80abaf7949e638dd17dfdc267066eb52d5 ] Only program the merge_3d block for the video phys encoder when the 3d blend mode is not NONE Fixes: 3e79527a33a8 ("drm/msm/dpu: enable merge_3d support on sm8150/sm8250") Suggested-by: Abhinav Kumar Signed-off-by: Jessica Zhang Patchwork: https://patchwork.freedesktop.org/patch/619095/ Link: https://lore.kernel.org/r/20241009-merge3d-fix-v1-1-0d0b6f5c244e@quicinc.com Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index daaf0e604753..20c8b9af7a21 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -280,7 +280,7 @@ static void dpu_encoder_phys_vid_setup_timing_engine( intf_cfg.stream_sel = 0; /* Don't care value for video mode */ intf_cfg.mode_3d = dpu_encoder_helper_get_3d_blend_mode(phys_enc); intf_cfg.dsc = dpu_encoder_helper_get_dsc(phys_enc); - if (phys_enc->hw_pp->merge_3d) + if (intf_cfg.mode_3d && phys_enc->hw_pp->merge_3d) intf_cfg.merge_3d = phys_enc->hw_pp->merge_3d->idx; spin_lock_irqsave(phys_enc->enc_spinlock, lock_flags); From 7218de0778aefbbbcfe474a55f88bbf6f244627d Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 14 Oct 2024 22:59:01 +0800 Subject: [PATCH 065/207] net: bcmasp: fix potential memory leak in bcmasp_xmit() [ Upstream commit fed07d3eb8a8d9fcc0e455175a89bc6445d6faed ] The bcmasp_xmit() returns NETDEV_TX_OK without freeing skb in case of mapping fails, add dev_kfree_skb() to fix it. Fixes: 490cb412007d ("net: bcmasp: Add support for ASP2.0 Ethernet controller") Signed-off-by: Wang Hai Acked-by: Florian Fainelli Link: https://patch.msgid.link/20241014145901.48940-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c index 6bf149d64594..f0647286c68b 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c @@ -322,6 +322,7 @@ static netdev_tx_t bcmasp_xmit(struct sk_buff *skb, struct net_device *dev) } /* Rewind so we do not have a hole */ spb_index = intf->tx_spb_index; + dev_kfree_skb(skb); return NETDEV_TX_OK; } From 997ae8da14f1639ce6fb66a063dab54031cd61b3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 15:33:12 -0700 Subject: [PATCH 066/207] tcp/dccp: Don't use timer_pending() in reqsk_queue_unlink(). [ Upstream commit e8c526f2bdf1845bedaf6a478816a3d06fa78b8f ] Martin KaFai Lau reported use-after-free [0] in reqsk_timer_handler(). """ We are seeing a use-after-free from a bpf prog attached to trace_tcp_retransmit_synack. The program passes the req->sk to the bpf_sk_storage_get_tracing kernel helper which does check for null before using it. """ The commit 83fccfc3940c ("inet: fix potential deadlock in reqsk_queue_unlink()") added timer_pending() in reqsk_queue_unlink() not to call del_timer_sync() from reqsk_timer_handler(), but it introduced a small race window. Before the timer is called, expire_timers() calls detach_timer(timer, true) to clear timer->entry.pprev and marks it as not pending. If reqsk_queue_unlink() checks timer_pending() just after expire_timers() calls detach_timer(), TCP will miss del_timer_sync(); the reqsk timer will continue running and send multiple SYN+ACKs until it expires. The reported UAF could happen if req->sk is close()d earlier than the timer expiration, which is 63s by default. The scenario would be 1. inet_csk_complete_hashdance() calls inet_csk_reqsk_queue_drop(), but del_timer_sync() is missed 2. reqsk timer is executed and scheduled again 3. req->sk is accept()ed and reqsk_put() decrements rsk_refcnt, but reqsk timer still has another one, and inet_csk_accept() does not clear req->sk for non-TFO sockets 4. sk is close()d 5. reqsk timer is executed again, and BPF touches req->sk Let's not use timer_pending() by passing the caller context to __inet_csk_reqsk_queue_drop(). Note that reqsk timer is pinned, so the issue does not happen in most use cases. [1] [0] BUG: KFENCE: use-after-free read in bpf_sk_storage_get_tracing+0x2e/0x1b0 Use-after-free read at 0x00000000a891fb3a (in kfence-#1): bpf_sk_storage_get_tracing+0x2e/0x1b0 bpf_prog_5ea3e95db6da0438_tcp_retransmit_synack+0x1d20/0x1dda bpf_trace_run2+0x4c/0xc0 tcp_rtx_synack+0xf9/0x100 reqsk_timer_handler+0xda/0x3d0 run_timer_softirq+0x292/0x8a0 irq_exit_rcu+0xf5/0x320 sysvec_apic_timer_interrupt+0x6d/0x80 asm_sysvec_apic_timer_interrupt+0x16/0x20 intel_idle_irq+0x5a/0xa0 cpuidle_enter_state+0x94/0x273 cpu_startup_entry+0x15e/0x260 start_secondary+0x8a/0x90 secondary_startup_64_no_verify+0xfa/0xfb kfence-#1: 0x00000000a72cc7b6-0x00000000d97616d9, size=2376, cache=TCPv6 allocated by task 0 on cpu 9 at 260507.901592s: sk_prot_alloc+0x35/0x140 sk_clone_lock+0x1f/0x3f0 inet_csk_clone_lock+0x15/0x160 tcp_create_openreq_child+0x1f/0x410 tcp_v6_syn_recv_sock+0x1da/0x700 tcp_check_req+0x1fb/0x510 tcp_v6_rcv+0x98b/0x1420 ipv6_list_rcv+0x2258/0x26e0 napi_complete_done+0x5b1/0x2990 mlx5e_napi_poll+0x2ae/0x8d0 net_rx_action+0x13e/0x590 irq_exit_rcu+0xf5/0x320 common_interrupt+0x80/0x90 asm_common_interrupt+0x22/0x40 cpuidle_enter_state+0xfb/0x273 cpu_startup_entry+0x15e/0x260 start_secondary+0x8a/0x90 secondary_startup_64_no_verify+0xfa/0xfb freed by task 0 on cpu 9 at 260507.927527s: rcu_core_si+0x4ff/0xf10 irq_exit_rcu+0xf5/0x320 sysvec_apic_timer_interrupt+0x6d/0x80 asm_sysvec_apic_timer_interrupt+0x16/0x20 cpuidle_enter_state+0xfb/0x273 cpu_startup_entry+0x15e/0x260 start_secondary+0x8a/0x90 secondary_startup_64_no_verify+0xfa/0xfb Fixes: 83fccfc3940c ("inet: fix potential deadlock in reqsk_queue_unlink()") Reported-by: Martin KaFai Lau Closes: https://lore.kernel.org/netdev/eb6684d0-ffd9-4bdc-9196-33f690c25824@linux.dev/ Link: https://lore.kernel.org/netdev/b55e2ca0-42f2-4b7c-b445-6ffd87ca74a0@linux.dev/ [1] Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Reviewed-by: Martin KaFai Lau Link: https://patch.msgid.link/20241014223312.4254-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/inet_connection_sock.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8720f3840b69..ca8cc0988b61 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -980,21 +980,31 @@ static bool reqsk_queue_unlink(struct request_sock *req) found = __sk_nulls_del_node_init_rcu(sk); spin_unlock(lock); } - if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) - reqsk_put(req); + return found; } -bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) +static bool __inet_csk_reqsk_queue_drop(struct sock *sk, + struct request_sock *req, + bool from_timer) { bool unlinked = reqsk_queue_unlink(req); + if (!from_timer && timer_delete_sync(&req->rsk_timer)) + reqsk_put(req); + if (unlinked) { reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); reqsk_put(req); } + return unlinked; } + +bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) +{ + return __inet_csk_reqsk_queue_drop(sk, req, false); +} EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req) @@ -1087,7 +1097,7 @@ static void reqsk_timer_handler(struct timer_list *t) if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) { /* delete timer */ - inet_csk_reqsk_queue_drop(sk_listener, nreq); + __inet_csk_reqsk_queue_drop(sk_listener, nreq, true); goto no_ownership; } @@ -1113,7 +1123,8 @@ no_ownership: } drop: - inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq); + __inet_csk_reqsk_queue_drop(sk_listener, oreq, true); + reqsk_put(req); } static bool reqsk_queue_hash_req(struct request_sock *req, From f8f8afc101f2910fda3b950ee4513db082ca1428 Mon Sep 17 00:00:00 2001 From: Peter Rashleigh Date: Mon, 14 Oct 2024 13:43:42 -0700 Subject: [PATCH 067/207] net: dsa: mv88e6xxx: Fix the max_vid definition for the MV88E6361 [ Upstream commit 1833d8a26f057128fd63e126b4428203ece84684 ] According to the Marvell datasheet the 88E6361 has two VTU pages (4k VIDs per page) so the max_vid should be 8191, not 4095. In the current implementation mv88e6xxx_vtu_walk() gives unexpected results because of this error. I verified that mv88e6xxx_vtu_walk() works correctly on the MV88E6361 with this patch in place. Fixes: 12899f299803 ("net: dsa: mv88e6xxx: enable support for 88E6361 switch") Signed-off-by: Peter Rashleigh Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241014204342.5852-1-peter@rashleigh.ca Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 3877744193e2..062bcbe6255c 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -6208,7 +6208,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .invalid_port_mask = BIT(1) | BIT(2) | BIT(8), .num_internal_phys = 5, .internal_phys_offset = 3, - .max_vid = 4095, + .max_vid = 8191, .max_sid = 63, .port_base_addr = 0x0, .phy_base_addr = 0x0, From b94e5375ec66f4fe40ed7591b7fc8a4505c922d4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Oct 2024 17:12:17 +0000 Subject: [PATCH 068/207] genetlink: hold RCU in genlmsg_mcast() [ Upstream commit 56440d7ec28d60f8da3bfa09062b3368ff9b16db ] While running net selftests with CONFIG_PROVE_RCU_LIST=y I saw one lockdep splat [1]. genlmsg_mcast() uses for_each_net_rcu(), and must therefore hold RCU. Instead of letting all callers guard genlmsg_multicast_allns() with a rcu_read_lock()/rcu_read_unlock() pair, do it in genlmsg_mcast(). This also means the @flags parameter is useless, we need to always use GFP_ATOMIC. [1] [10882.424136] ============================= [10882.424166] WARNING: suspicious RCU usage [10882.424309] 6.12.0-rc2-virtme #1156 Not tainted [10882.424400] ----------------------------- [10882.424423] net/netlink/genetlink.c:1940 RCU-list traversed in non-reader section!! [10882.424469] other info that might help us debug this: [10882.424500] rcu_scheduler_active = 2, debug_locks = 1 [10882.424744] 2 locks held by ip/15677: [10882.424791] #0: ffffffffb6b491b0 (cb_lock){++++}-{3:3}, at: genl_rcv (net/netlink/genetlink.c:1219) [10882.426334] #1: ffffffffb6b49248 (genl_mutex){+.+.}-{3:3}, at: genl_rcv_msg (net/netlink/genetlink.c:61 net/netlink/genetlink.c:57 net/netlink/genetlink.c:1209) [10882.426465] stack backtrace: [10882.426805] CPU: 14 UID: 0 PID: 15677 Comm: ip Not tainted 6.12.0-rc2-virtme #1156 [10882.426919] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [10882.427046] Call Trace: [10882.427131] [10882.427244] dump_stack_lvl (lib/dump_stack.c:123) [10882.427335] lockdep_rcu_suspicious (kernel/locking/lockdep.c:6822) [10882.427387] genlmsg_multicast_allns (net/netlink/genetlink.c:1940 (discriminator 7) net/netlink/genetlink.c:1977 (discriminator 7)) [10882.427436] l2tp_tunnel_notify.constprop.0 (net/l2tp/l2tp_netlink.c:119) l2tp_netlink [10882.427683] l2tp_nl_cmd_tunnel_create (net/l2tp/l2tp_netlink.c:253) l2tp_netlink [10882.427748] genl_family_rcv_msg_doit (net/netlink/genetlink.c:1115) [10882.427834] genl_rcv_msg (net/netlink/genetlink.c:1195 net/netlink/genetlink.c:1210) [10882.427877] ? __pfx_l2tp_nl_cmd_tunnel_create (net/l2tp/l2tp_netlink.c:186) l2tp_netlink [10882.427927] ? __pfx_genl_rcv_msg (net/netlink/genetlink.c:1201) [10882.427959] netlink_rcv_skb (net/netlink/af_netlink.c:2551) [10882.428069] genl_rcv (net/netlink/genetlink.c:1220) [10882.428095] netlink_unicast (net/netlink/af_netlink.c:1332 net/netlink/af_netlink.c:1357) [10882.428140] netlink_sendmsg (net/netlink/af_netlink.c:1901) [10882.428210] ____sys_sendmsg (net/socket.c:729 (discriminator 1) net/socket.c:744 (discriminator 1) net/socket.c:2607 (discriminator 1)) Fixes: 33f72e6f0c67 ("l2tp : multicast notification to the registered listeners") Signed-off-by: Eric Dumazet Cc: James Chapman Cc: Tom Parkin Cc: Johannes Berg Link: https://patch.msgid.link/20241011171217.3166614-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/target/target_core_user.c | 2 +- include/net/genetlink.h | 3 +-- net/l2tp/l2tp_netlink.c | 4 ++-- net/netlink/genetlink.c | 28 ++++++++++++++-------------- net/wireless/nl80211.c | 8 ++------ 5 files changed, 20 insertions(+), 25 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 22cc6cac0ba2..2e100b76914a 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -2130,7 +2130,7 @@ static int tcmu_netlink_event_send(struct tcmu_dev *udev, } ret = genlmsg_multicast_allns(&tcmu_genl_family, skb, 0, - TCMU_MCGRP_CONFIG, GFP_KERNEL); + TCMU_MCGRP_CONFIG); /* Wait during an add as the listener may not be up yet */ if (ret == 0 || diff --git a/include/net/genetlink.h b/include/net/genetlink.h index c53244f20437..e8c34aa4a640 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -478,13 +478,12 @@ static inline int genlmsg_multicast(const struct genl_family *family, * @skb: netlink message as socket buffer * @portid: own netlink portid to avoid sending to yourself * @group: offset of multicast group in groups array - * @flags: allocation flags * * This function must hold the RTNL or rcu_read_lock(). */ int genlmsg_multicast_allns(const struct genl_family *family, struct sk_buff *skb, u32 portid, - unsigned int group, gfp_t flags); + unsigned int group); /** * genlmsg_unicast - unicast a netlink message diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index a901fd14fe3b..e27e00cb16c6 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -115,7 +115,7 @@ static int l2tp_tunnel_notify(struct genl_family *family, NLM_F_ACK, tunnel, cmd); if (ret >= 0) { - ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + ret = genlmsg_multicast_allns(family, msg, 0, 0); /* We don't care if no one is listening */ if (ret == -ESRCH) ret = 0; @@ -143,7 +143,7 @@ static int l2tp_session_notify(struct genl_family *family, NLM_F_ACK, session, cmd); if (ret >= 0) { - ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + ret = genlmsg_multicast_allns(family, msg, 0, 0); /* We don't care if no one is listening */ if (ret == -ESRCH) ret = 0; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index d41c4a936ad0..d6eee5140c8b 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1355,15 +1355,11 @@ static int genl_ctrl_event(int event, const struct genl_family *family, if (IS_ERR(msg)) return PTR_ERR(msg); - if (!family->netnsok) { + if (!family->netnsok) genlmsg_multicast_netns(&genl_ctrl, &init_net, msg, 0, 0, GFP_KERNEL); - } else { - rcu_read_lock(); - genlmsg_multicast_allns(&genl_ctrl, msg, 0, - 0, GFP_ATOMIC); - rcu_read_unlock(); - } + else + genlmsg_multicast_allns(&genl_ctrl, msg, 0, 0); return 0; } @@ -1752,23 +1748,23 @@ problem: core_initcall(genl_init); -static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, - gfp_t flags) +static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group) { struct sk_buff *tmp; struct net *net, *prev = NULL; bool delivered = false; int err; + rcu_read_lock(); for_each_net_rcu(net) { if (prev) { - tmp = skb_clone(skb, flags); + tmp = skb_clone(skb, GFP_ATOMIC); if (!tmp) { err = -ENOMEM; goto error; } err = nlmsg_multicast(prev->genl_sock, tmp, - portid, group, flags); + portid, group, GFP_ATOMIC); if (!err) delivered = true; else if (err != -ESRCH) @@ -1777,27 +1773,31 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, prev = net; } + err = nlmsg_multicast(prev->genl_sock, skb, portid, group, GFP_ATOMIC); + + rcu_read_unlock(); - err = nlmsg_multicast(prev->genl_sock, skb, portid, group, flags); if (!err) delivered = true; else if (err != -ESRCH) return err; return delivered ? 0 : -ESRCH; error: + rcu_read_unlock(); + kfree_skb(skb); return err; } int genlmsg_multicast_allns(const struct genl_family *family, struct sk_buff *skb, u32 portid, - unsigned int group, gfp_t flags) + unsigned int group) { if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; group = family->mcgrp_offset + group; - return genlmsg_mcast(skb, portid, group, flags); + return genlmsg_mcast(skb, portid, group); } EXPORT_SYMBOL(genlmsg_multicast_allns); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9e74f249cb45..797907303669 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -17905,10 +17905,8 @@ void nl80211_common_reg_change_event(enum nl80211_commands cmd_id, genlmsg_end(msg, hdr); - rcu_read_lock(); genlmsg_multicast_allns(&nl80211_fam, msg, 0, - NL80211_MCGRP_REGULATORY, GFP_ATOMIC); - rcu_read_unlock(); + NL80211_MCGRP_REGULATORY); return; @@ -18605,10 +18603,8 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy, genlmsg_end(msg, hdr); - rcu_read_lock(); genlmsg_multicast_allns(&nl80211_fam, msg, 0, - NL80211_MCGRP_REGULATORY, GFP_ATOMIC); - rcu_read_unlock(); + NL80211_MCGRP_REGULATORY); return; From e9638d3e62dcb628025f58d503b3ed0479c20f81 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 1 Sep 2024 14:27:55 +0300 Subject: [PATCH 069/207] ravb: Remove setting of RX software timestamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 277901ee3a2620679e2c8797377d2a72f4358068 ] The responsibility for reporting of RX software timestamp has moved to the core layer (see __ethtool_get_ts_info()), remove usage from the device drivers. Reviewed-by: Carolina Jubran Reviewed-by: Rahul Rameshbabu Signed-off-by: Gal Pressman Reviewed-by: Niklas Söderlund Reviewed-by: Sergey Shtylyov Link: https://patch.msgid.link/20240901112803.212753-8-gal@nvidia.com Signed-off-by: Jakub Kicinski Stable-dep-of: 126e799602f4 ("net: ravb: Only advertise Rx/Tx timestamps if hardware supports it") Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index c6897e6ea362..8f62cc451791 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1675,8 +1675,6 @@ static int ravb_get_ts_info(struct net_device *ndev, info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; @@ -1687,6 +1685,8 @@ static int ravb_get_ts_info(struct net_device *ndev, (1 << HWTSTAMP_FILTER_ALL); if (hw_info->gptp || hw_info->ccc_gac) info->phc_index = ptp_clock_index(priv->ptp.clock); + else + info->phc_index = 0; return 0; } From 2fe1384cbb7c63788444f92721590dfe4c9bcaea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Mon, 14 Oct 2024 14:43:43 +0200 Subject: [PATCH 070/207] net: ravb: Only advertise Rx/Tx timestamps if hardware supports it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 126e799602f45e9ce1ded03ee9eadda68bf470e0 ] Recent work moving the reporting of Rx software timestamps to the core [1] highlighted an issue where hardware time stamping was advertised for the platforms where it is not supported. Fix this by covering advertising support for hardware timestamps only if the hardware supports it. Due to the Tx implementation in RAVB software Tx timestamping is also only considered if the hardware supports hardware timestamps. This should be addressed in future, but this fix only reflects what the driver currently implements. 1. Commit 277901ee3a26 ("ravb: Remove setting of RX software timestamp") Fixes: 7e09a052dc4e ("ravb: Exclude gPTP feature support for RZ/G2L") Signed-off-by: Niklas Söderlund Reviewed-by: Paul Barker Tested-by: Paul Barker Reviewed-by: Sergey Shtylyov Link: https://patch.msgid.link/20241014124343.3875285-1-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 25 ++++++++++++------------ 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 8f62cc451791..58fdc4f8dd48 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1673,20 +1673,19 @@ static int ravb_get_ts_info(struct net_device *ndev, struct ravb_private *priv = netdev_priv(ndev); const struct ravb_hw_info *hw_info = priv->info; - info->so_timestamping = - SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_TX_HARDWARE | - SOF_TIMESTAMPING_RX_HARDWARE | - SOF_TIMESTAMPING_RAW_HARDWARE; - info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON); - info->rx_filters = - (1 << HWTSTAMP_FILTER_NONE) | - (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) | - (1 << HWTSTAMP_FILTER_ALL); - if (hw_info->gptp || hw_info->ccc_gac) + if (hw_info->gptp || hw_info->ccc_gac) { + info->so_timestamping = + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON); + info->rx_filters = + (1 << HWTSTAMP_FILTER_NONE) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) | + (1 << HWTSTAMP_FILTER_ALL); info->phc_index = ptp_clock_index(priv->ptp.clock); - else - info->phc_index = 0; + } return 0; } From b80e9bc85bd9af378e7eac83e15dd129557bbdb6 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 11 Oct 2024 19:34:44 +0800 Subject: [PATCH 071/207] scsi: target: core: Fix null-ptr-deref in target_alloc_device() [ Upstream commit fca6caeb4a61d240f031914413fcc69534f6dc03 ] There is a null-ptr-deref issue reported by KASAN: BUG: KASAN: null-ptr-deref in target_alloc_device+0xbc4/0xbe0 [target_core_mod] ... kasan_report+0xb9/0xf0 target_alloc_device+0xbc4/0xbe0 [target_core_mod] core_dev_setup_virtual_lun0+0xef/0x1f0 [target_core_mod] target_core_init_configfs+0x205/0x420 [target_core_mod] do_one_initcall+0xdd/0x4e0 ... entry_SYSCALL_64_after_hwframe+0x76/0x7e In target_alloc_device(), if allocing memory for dev queues fails, then dev will be freed by dev->transport->free_device(), but dev->transport is not initialized at that time, which will lead to a null pointer reference problem. Fixing this bug by freeing dev with hba->backend->ops->free_device(). Fixes: 1526d9f10c61 ("scsi: target: Make state_list per CPU") Signed-off-by: Wang Hai Link: https://lore.kernel.org/r/20241011113444.40749-1-wanghai38@huawei.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/target_core_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 86590a7e29f6..dd041ee18ac9 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -692,7 +692,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->queues = kcalloc(nr_cpu_ids, sizeof(*dev->queues), GFP_KERNEL); if (!dev->queues) { - dev->transport->free_device(dev); + hba->backend->ops->free_device(dev); return NULL; } From b1813c220b76f60b1727984794377c4aa849d4c1 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Tue, 15 Oct 2024 18:20:37 +0800 Subject: [PATCH 072/207] smb: client: fix possible double free in smb2_set_ea() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 19ebc1e6cab334a8193398d4152deb76019b5d34 ] Clang static checker(scan-build) warning: fs/smb/client/smb2ops.c:1304:2: Attempt to free released memory. 1304 | kfree(ea); | ^~~~~~~~~ There is a double free in such case: 'ea is initialized to NULL' -> 'first successful memory allocation for ea' -> 'something failed, goto sea_exit' -> 'first memory release for ea' -> 'goto replay_again' -> 'second goto sea_exit before allocate memory for ea' -> 'second memory release for ea resulted in double free'. Re-initialie 'ea' to NULL near to the replay_again label, it can fix this double free problem. Fixes: 4f1fffa23769 ("cifs: commands that are retried should have replay flag set") Reviewed-by: Dan Carpenter Signed-off-by: Su Hui Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/smb2ops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 450e3050324c..ab6e79be2c15 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1122,7 +1122,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; unsigned int size[1]; void *data[1]; - struct smb2_file_full_ea_info *ea = NULL; + struct smb2_file_full_ea_info *ea; struct smb2_query_info_rsp *rsp; int rc, used_len = 0; int retries = 0, cur_sleep = 1; @@ -1143,6 +1143,7 @@ replay_again: if (!utf16_path) return -ENOMEM; + ea = NULL; resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER; vars = kzalloc(sizeof(*vars), GFP_KERNEL); if (!vars) { From b209c3a0bc3ac172265c7fa8309e5d00654f2510 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 15 Oct 2024 19:04:04 -0300 Subject: [PATCH 073/207] smb: client: fix OOBs when building SMB2_IOCTL request [ Upstream commit 1ab60323c5201bef25f2a3dc0ccc404d9aca77f1 ] When using encryption, either enforced by the server or when using 'seal' mount option, the client will squash all compound request buffers down for encryption into a single iov in smb2_set_next_command(). SMB2_ioctl_init() allocates a small buffer (448 bytes) to hold the SMB2_IOCTL request in the first iov, and if the user passes an input buffer that is greater than 328 bytes, smb2_set_next_command() will end up writing off the end of @rqst->iov[0].iov_base as shown below: mount.cifs //srv/share /mnt -o ...,seal ln -s $(perl -e "print('a')for 1..1024") /mnt/link BUG: KASAN: slab-out-of-bounds in smb2_set_next_command.cold+0x1d6/0x24c [cifs] Write of size 4116 at addr ffff8881148fcab8 by task ln/859 CPU: 1 UID: 0 PID: 859 Comm: ln Not tainted 6.12.0-rc3 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-2.fc40 04/01/2014 Call Trace: dump_stack_lvl+0x5d/0x80 ? smb2_set_next_command.cold+0x1d6/0x24c [cifs] print_report+0x156/0x4d9 ? smb2_set_next_command.cold+0x1d6/0x24c [cifs] ? __virt_addr_valid+0x145/0x310 ? __phys_addr+0x46/0x90 ? smb2_set_next_command.cold+0x1d6/0x24c [cifs] kasan_report+0xda/0x110 ? smb2_set_next_command.cold+0x1d6/0x24c [cifs] kasan_check_range+0x10f/0x1f0 __asan_memcpy+0x3c/0x60 smb2_set_next_command.cold+0x1d6/0x24c [cifs] smb2_compound_op+0x238c/0x3840 [cifs] ? kasan_save_track+0x14/0x30 ? kasan_save_free_info+0x3b/0x70 ? vfs_symlink+0x1a1/0x2c0 ? do_symlinkat+0x108/0x1c0 ? __pfx_smb2_compound_op+0x10/0x10 [cifs] ? kmem_cache_free+0x118/0x3e0 ? cifs_get_writable_path+0xeb/0x1a0 [cifs] smb2_get_reparse_inode+0x423/0x540 [cifs] ? __pfx_smb2_get_reparse_inode+0x10/0x10 [cifs] ? rcu_is_watching+0x20/0x50 ? __kmalloc_noprof+0x37c/0x480 ? smb2_create_reparse_symlink+0x257/0x490 [cifs] ? smb2_create_reparse_symlink+0x38f/0x490 [cifs] smb2_create_reparse_symlink+0x38f/0x490 [cifs] ? __pfx_smb2_create_reparse_symlink+0x10/0x10 [cifs] ? find_held_lock+0x8a/0xa0 ? hlock_class+0x32/0xb0 ? __build_path_from_dentry_optional_prefix+0x19d/0x2e0 [cifs] cifs_symlink+0x24f/0x960 [cifs] ? __pfx_make_vfsuid+0x10/0x10 ? __pfx_cifs_symlink+0x10/0x10 [cifs] ? make_vfsgid+0x6b/0xc0 ? generic_permission+0x96/0x2d0 vfs_symlink+0x1a1/0x2c0 do_symlinkat+0x108/0x1c0 ? __pfx_do_symlinkat+0x10/0x10 ? strncpy_from_user+0xaa/0x160 __x64_sys_symlinkat+0xb9/0xf0 do_syscall_64+0xbb/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f08d75c13bb Reported-by: David Howells Fixes: e77fe73c7e38 ("cifs: we can not use small padding iovs together with encryption") Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/smb2pdu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 83a03201bb86..a86a3fbfb5a4 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -3300,6 +3300,15 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, return rc; if (indatalen) { + unsigned int len; + + if (WARN_ON_ONCE(smb3_encryption_required(tcon) && + (check_add_overflow(total_len - 1, + ALIGN(indatalen, 8), &len) || + len > MAX_CIFS_SMALL_BUFFER_SIZE))) { + cifs_small_buf_release(req); + return -EIO; + } /* * indatalen is usually small at a couple of bytes max, so * just allocate through generic pool From 1ded6b12499e6dee9b0e1ceac633be36538f6fc2 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 4 Oct 2024 09:37:38 -0300 Subject: [PATCH 074/207] usb: typec: altmode should keep reference to parent [ Upstream commit befab3a278c59db0cc88c8799638064f6d3fd6f8 ] The altmode device release refers to its parent device, but without keeping a reference to it. When registering the altmode, get a reference to the parent and put it in the release function. Before this fix, when using CONFIG_DEBUG_KOBJECT_RELEASE, we see issues like this: [ 43.572860] kobject: 'port0.0' (ffff8880057ba008): kobject_release, parent 0000000000000000 (delayed 3000) [ 43.573532] kobject: 'port0.1' (ffff8880057bd008): kobject_release, parent 0000000000000000 (delayed 1000) [ 43.574407] kobject: 'port0' (ffff8880057b9008): kobject_release, parent 0000000000000000 (delayed 3000) [ 43.575059] kobject: 'port1.0' (ffff8880057ca008): kobject_release, parent 0000000000000000 (delayed 4000) [ 43.575908] kobject: 'port1.1' (ffff8880057c9008): kobject_release, parent 0000000000000000 (delayed 4000) [ 43.576908] kobject: 'typec' (ffff8880062dbc00): kobject_release, parent 0000000000000000 (delayed 4000) [ 43.577769] kobject: 'port1' (ffff8880057bf008): kobject_release, parent 0000000000000000 (delayed 3000) [ 46.612867] ================================================================== [ 46.613402] BUG: KASAN: slab-use-after-free in typec_altmode_release+0x38/0x129 [ 46.614003] Read of size 8 at addr ffff8880057b9118 by task kworker/2:1/48 [ 46.614538] [ 46.614668] CPU: 2 UID: 0 PID: 48 Comm: kworker/2:1 Not tainted 6.12.0-rc1-00138-gedbae730ad31 #535 [ 46.615391] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 [ 46.616042] Workqueue: events kobject_delayed_cleanup [ 46.616446] Call Trace: [ 46.616648] [ 46.616820] dump_stack_lvl+0x5b/0x7c [ 46.617112] ? typec_altmode_release+0x38/0x129 [ 46.617470] print_report+0x14c/0x49e [ 46.617769] ? rcu_read_unlock_sched+0x56/0x69 [ 46.618117] ? __virt_addr_valid+0x19a/0x1ab [ 46.618456] ? kmem_cache_debug_flags+0xc/0x1d [ 46.618807] ? typec_altmode_release+0x38/0x129 [ 46.619161] kasan_report+0x8d/0xb4 [ 46.619447] ? typec_altmode_release+0x38/0x129 [ 46.619809] ? process_scheduled_works+0x3cb/0x85f [ 46.620185] typec_altmode_release+0x38/0x129 [ 46.620537] ? process_scheduled_works+0x3cb/0x85f [ 46.620907] device_release+0xaf/0xf2 [ 46.621206] kobject_delayed_cleanup+0x13b/0x17a [ 46.621584] process_scheduled_works+0x4f6/0x85f [ 46.621955] ? __pfx_process_scheduled_works+0x10/0x10 [ 46.622353] ? hlock_class+0x31/0x9a [ 46.622647] ? lock_acquired+0x361/0x3c3 [ 46.622956] ? move_linked_works+0x46/0x7d [ 46.623277] worker_thread+0x1ce/0x291 [ 46.623582] ? __kthread_parkme+0xc8/0xdf [ 46.623900] ? __pfx_worker_thread+0x10/0x10 [ 46.624236] kthread+0x17e/0x190 [ 46.624501] ? kthread+0xfb/0x190 [ 46.624756] ? __pfx_kthread+0x10/0x10 [ 46.625015] ret_from_fork+0x20/0x40 [ 46.625268] ? __pfx_kthread+0x10/0x10 [ 46.625532] ret_from_fork_asm+0x1a/0x30 [ 46.625805] [ 46.625953] [ 46.626056] Allocated by task 678: [ 46.626287] kasan_save_stack+0x24/0x44 [ 46.626555] kasan_save_track+0x14/0x2d [ 46.626811] __kasan_kmalloc+0x3f/0x4d [ 46.627049] __kmalloc_noprof+0x1bf/0x1f0 [ 46.627362] typec_register_port+0x23/0x491 [ 46.627698] cros_typec_probe+0x634/0xbb6 [ 46.628026] platform_probe+0x47/0x8c [ 46.628311] really_probe+0x20a/0x47d [ 46.628605] device_driver_attach+0x39/0x72 [ 46.628940] bind_store+0x87/0xd7 [ 46.629213] kernfs_fop_write_iter+0x1aa/0x218 [ 46.629574] vfs_write+0x1d6/0x29b [ 46.629856] ksys_write+0xcd/0x13b [ 46.630128] do_syscall_64+0xd4/0x139 [ 46.630420] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 46.630820] [ 46.630946] Freed by task 48: [ 46.631182] kasan_save_stack+0x24/0x44 [ 46.631493] kasan_save_track+0x14/0x2d [ 46.631799] kasan_save_free_info+0x3f/0x4d [ 46.632144] __kasan_slab_free+0x37/0x45 [ 46.632474] kfree+0x1d4/0x252 [ 46.632725] device_release+0xaf/0xf2 [ 46.633017] kobject_delayed_cleanup+0x13b/0x17a [ 46.633388] process_scheduled_works+0x4f6/0x85f [ 46.633764] worker_thread+0x1ce/0x291 [ 46.634065] kthread+0x17e/0x190 [ 46.634324] ret_from_fork+0x20/0x40 [ 46.634621] ret_from_fork_asm+0x1a/0x30 Fixes: 8a37d87d72f0 ("usb: typec: Bus type for alternate modes") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Heikki Krogerus Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20241004123738.2964524-1-cascardo@igalia.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/typec/class.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index f92fc2acfcba..79cad8d61dac 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -502,6 +502,7 @@ static void typec_altmode_release(struct device *dev) typec_altmode_put_partner(alt); altmode_id_remove(alt->adev.dev.parent, alt->id); + put_device(alt->adev.dev.parent); kfree(alt); } @@ -551,6 +552,8 @@ typec_register_altmode(struct device *parent, alt->adev.dev.type = &typec_altmode_dev_type; dev_set_name(&alt->adev.dev, "%s.%u", dev_name(parent), id); + get_device(alt->adev.dev.parent); + /* Link partners and plugs with the ports */ if (!is_port) typec_altmode_set_partner(alt); From 9221ae88c20511d08817b2956a8b7a5611db931f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 10 Oct 2024 17:52:39 +0200 Subject: [PATCH 075/207] s390: Initialize psw mask in perf_arch_fetch_caller_regs() [ Upstream commit 223e7fb979fa06934f1595b6ad0ae1d4ead1147f ] Also initialize regs->psw.mask in perf_arch_fetch_caller_regs(). This way user_mode(regs) will return false, like it should. It looks like all current users initialize regs to zero, so that this doesn't fix a bug currently. However it is better to not rely on callers to do this. Fixes: 914d52e46490 ("s390: implement perf_arch_fetch_caller_regs") Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/include/asm/perf_event.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 9917e2717b2b..66aff768f815 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -73,6 +73,7 @@ struct perf_sf_sde_regs { #define SAMPLE_FREQ_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE) #define perf_arch_fetch_caller_regs(regs, __ip) do { \ + (regs)->psw.mask = 0; \ (regs)->psw.addr = (__ip); \ (regs)->gprs[15] = (unsigned long)__builtin_frame_address(0) - \ offsetof(struct stack_frame, back_chain); \ From 97bbdc5001db35809d9d4dcf359e5c8af6e6ed55 Mon Sep 17 00:00:00 2001 From: Tyrone Wu Date: Fri, 11 Oct 2024 19:32:51 +0000 Subject: [PATCH 076/207] bpf: Fix link info netfilter flags to populate defrag flag [ Upstream commit 92f3715e1eba1d41e55be06159dc3d856b18326d ] This fix correctly populates the `bpf_link_info.netfilter.flags` field when user passes the `BPF_F_NETFILTER_IP_DEFRAG` flag. Fixes: 91721c2d02d3 ("netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link") Signed-off-by: Tyrone Wu Signed-off-by: Daniel Borkmann Acked-by: Florian Westphal Cc: Daniel Xu Link: https://lore.kernel.org/bpf/20241011193252.178997-1-wudevelops@gmail.com Signed-off-by: Sasha Levin --- net/netfilter/nf_bpf_link.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index 0e4beae421f8..96015a59db09 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -150,11 +150,12 @@ static int bpf_nf_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info) { struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); + const struct nf_defrag_hook *hook = nf_link->defrag_hook; info->netfilter.pf = nf_link->hook_ops.pf; info->netfilter.hooknum = nf_link->hook_ops.hooknum; info->netfilter.priority = nf_link->hook_ops.priority; - info->netfilter.flags = 0; + info->netfilter.flags = hook ? BPF_F_NETFILTER_IP_DEFRAG : 0; return 0; } From d10cd7bf574ead01fae140ce117a11bcdacbe6a8 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Mon, 14 Oct 2024 17:07:08 +0800 Subject: [PATCH 077/207] Bluetooth: bnep: fix wild-memory-access in proto_unregister [ Upstream commit 64a90991ba8d4e32e3173ddd83d0b24167a5668c ] There's issue as follows: KASAN: maybe wild-memory-access in range [0xdead...108-0xdead...10f] CPU: 3 UID: 0 PID: 2805 Comm: rmmod Tainted: G W RIP: 0010:proto_unregister+0xee/0x400 Call Trace: __do_sys_delete_module+0x318/0x580 do_syscall_64+0xc1/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f As bnep_init() ignore bnep_sock_init()'s return value, and bnep_sock_init() will cleanup all resource. Then when remove bnep module will call bnep_sock_cleanup() to cleanup sock's resource. To solve above issue just return bnep_sock_init()'s return value in bnep_exit(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Ye Bin Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/bnep/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index a660c428e220..38f542665f19 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -745,8 +745,7 @@ static int __init bnep_init(void) if (flt[0]) BT_INFO("BNEP filters: %s", flt); - bnep_sock_init(); - return 0; + return bnep_sock_init(); } static void __exit bnep_exit(void) From 59ba6cdadb9c26b606a365eb9c9b25eb2052622d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 14 Oct 2024 21:03:11 +0200 Subject: [PATCH 078/207] vmxnet3: Fix packet corruption in vmxnet3_xdp_xmit_frame [ Upstream commit 4678adf94da4a9e9683817b246b58ce15fb81782 ] Andrew and Nikolay reported connectivity issues with Cilium's service load-balancing in case of vmxnet3. If a BPF program for native XDP adds an encapsulation header such as IPIP and transmits the packet out the same interface, then in case of vmxnet3 a corrupted packet is being sent and subsequently dropped on the path. vmxnet3_xdp_xmit_frame() which is called e.g. via vmxnet3_run_xdp() through vmxnet3_xdp_xmit_back() calculates an incorrect DMA address: page = virt_to_page(xdpf->data); tbi->dma_addr = page_pool_get_dma_addr(page) + VMXNET3_XDP_HEADROOM; dma_sync_single_for_device(&adapter->pdev->dev, tbi->dma_addr, buf_size, DMA_TO_DEVICE); The above assumes a fixed offset (VMXNET3_XDP_HEADROOM), but the XDP BPF program could have moved xdp->data. While the passed buf_size is correct (xdpf->len), the dma_addr needs to have a dynamic offset which can be calculated as xdpf->data - (void *)xdpf, that is, xdp->data - xdp->data_hard_start. Fixes: 54f00cce1178 ("vmxnet3: Add XDP support.") Reported-by: Andrew Sauber Reported-by: Nikolay Nikolaev Signed-off-by: Daniel Borkmann Tested-by: Nikolay Nikolaev Acked-by: Anton Protopopov Cc: William Tu Cc: Ronak Doshi Link: https://patch.msgid.link/a0888656d7f09028f9984498cc698bb5364d89fc.1728931137.git.daniel@iogearbox.net Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/vmxnet3/vmxnet3_xdp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_xdp.c b/drivers/net/vmxnet3/vmxnet3_xdp.c index a6c787454a1a..1341374a4588 100644 --- a/drivers/net/vmxnet3/vmxnet3_xdp.c +++ b/drivers/net/vmxnet3/vmxnet3_xdp.c @@ -148,7 +148,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, } else { /* XDP buffer from page pool */ page = virt_to_page(xdpf->data); tbi->dma_addr = page_pool_get_dma_addr(page) + - VMXNET3_XDP_HEADROOM; + (xdpf->data - (void *)xdpf); dma_sync_single_for_device(&adapter->pdev->dev, tbi->dma_addr, buf_size, DMA_TO_DEVICE); From 5ec33b1fe86d0d4686024632d3e489d3171fe014 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Tue, 15 Oct 2024 12:32:05 +0300 Subject: [PATCH 079/207] net/mlx5: Check for invalid vector index on EQ creation [ Upstream commit d4f25be27e3ef7e23998fbd3dd4bff0602de7ae5 ] Currently, mlx5 driver does not enforce vector index to be lower than the maximum number of supported completion vectors when requesting a new completion EQ. Thus, mlx5_comp_eqn_get() fails when trying to acquire an IRQ with an improper vector index. To prevent the case above, enforce that vector index value is valid and lower than maximum in mlx5_comp_eqn_get() before handling the request. Fixes: f14c1a14e632 ("net/mlx5: Allocate completion EQs dynamically") Signed-off-by: Maher Sanalla Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 40a6cb052a2d..07a041954909 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -1073,6 +1073,12 @@ int mlx5_comp_eqn_get(struct mlx5_core_dev *dev, u16 vecidx, int *eqn) struct mlx5_eq_comp *eq; int ret = 0; + if (vecidx >= table->max_comp_eqs) { + mlx5_core_dbg(dev, "Requested vector index %u should be less than %u", + vecidx, table->max_comp_eqs); + return -EINVAL; + } + mutex_lock(&table->comp_lock); eq = xa_load(&table->comp_eqs, vecidx); if (eq) { From d88564c79d1cedaf2655f12261eca0d2796bde4e Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 15 Oct 2024 12:32:06 +0300 Subject: [PATCH 080/207] net/mlx5: Fix command bitmask initialization [ Upstream commit d62b14045c6511a7b2d4948d1a83a4e592deeb05 ] Command bitmask have a dedicated bit for MANAGE_PAGES command, this bit isn't Initialize during command bitmask Initialization, only during MANAGE_PAGES. In addition, mlx5_cmd_trigger_completions() is trying to trigger completion for MANAGE_PAGES command as well. Hence, in case health error occurred before any MANAGE_PAGES command have been invoke (for example, during mlx5_enable_hca()), mlx5_cmd_trigger_completions() will try to trigger completion for MANAGE_PAGES command, which will result in null-ptr-deref error.[1] Fix it by Initialize command bitmask correctly. While at it, re-write the code for better understanding. [1] BUG: KASAN: null-ptr-deref in mlx5_cmd_trigger_completions+0x1db/0x600 [mlx5_core] Write of size 4 at addr 0000000000000214 by task kworker/u96:2/12078 CPU: 10 PID: 12078 Comm: kworker/u96:2 Not tainted 6.9.0-rc2_for_upstream_debug_2024_04_07_19_01 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5_health0000:08:00.0 mlx5_fw_fatal_reporter_err_work [mlx5_core] Call Trace: dump_stack_lvl+0x7e/0xc0 kasan_report+0xb9/0xf0 kasan_check_range+0xec/0x190 mlx5_cmd_trigger_completions+0x1db/0x600 [mlx5_core] mlx5_cmd_flush+0x94/0x240 [mlx5_core] enter_error_state+0x6c/0xd0 [mlx5_core] mlx5_fw_fatal_reporter_err_work+0xf3/0x480 [mlx5_core] process_one_work+0x787/0x1490 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? pwq_dec_nr_in_flight+0xda0/0xda0 ? assign_work+0x168/0x240 worker_thread+0x586/0xd30 ? rescuer_thread+0xae0/0xae0 kthread+0x2df/0x3b0 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x2d/0x70 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork_asm+0x11/0x20 Fixes: 9b98d395b85d ("net/mlx5: Start health poll at earlier stage of driver load") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Reviewed-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 48dc4ae87af0..80af0fc7101f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1758,6 +1758,10 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force } } +#define MLX5_MAX_MANAGE_PAGES_CMD_ENT 1 +#define MLX5_CMD_MASK ((1UL << (cmd->vars.max_reg_cmds + \ + MLX5_MAX_MANAGE_PAGES_CMD_ENT)) - 1) + static void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) { struct mlx5_cmd *cmd = &dev->cmd; @@ -1769,7 +1773,7 @@ static void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) /* wait for pending handlers to complete */ mlx5_eq_synchronize_cmd_irq(dev); spin_lock_irqsave(&dev->cmd.alloc_lock, flags); - vector = ~dev->cmd.vars.bitmask & ((1ul << (1 << dev->cmd.vars.log_sz)) - 1); + vector = ~dev->cmd.vars.bitmask & MLX5_CMD_MASK; if (!vector) goto no_trig; @@ -2275,7 +2279,7 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev) cmd->state = MLX5_CMDIF_STATE_DOWN; cmd->vars.max_reg_cmds = (1 << cmd->vars.log_sz) - 1; - cmd->vars.bitmask = (1UL << cmd->vars.max_reg_cmds) - 1; + cmd->vars.bitmask = MLX5_CMD_MASK; sema_init(&cmd->vars.sem, cmd->vars.max_reg_cmds); sema_init(&cmd->vars.pages_sem, 1); From 9f2ccb6f3888bec45c00121ee43e4e72423b12c1 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 15 Oct 2024 12:32:07 +0300 Subject: [PATCH 081/207] net/mlx5: Unregister notifier on eswitch init failure [ Upstream commit 1da9cfd6c41c2e6bbe624d0568644e1521c33e12 ] It otherwise remains registered and a subsequent attempt at eswitch enabling might trigger warnings of the sort: [ 682.589148] ------------[ cut here ]------------ [ 682.590204] notifier callback eswitch_vport_event [mlx5_core] already registered [ 682.590256] WARNING: CPU: 13 PID: 2660 at kernel/notifier.c:31 notifier_chain_register+0x3e/0x90 [...snipped] [ 682.610052] Call Trace: [ 682.610369] [ 682.610663] ? __warn+0x7c/0x110 [ 682.611050] ? notifier_chain_register+0x3e/0x90 [ 682.611556] ? report_bug+0x148/0x170 [ 682.611977] ? handle_bug+0x36/0x70 [ 682.612384] ? exc_invalid_op+0x13/0x60 [ 682.612817] ? asm_exc_invalid_op+0x16/0x20 [ 682.613284] ? notifier_chain_register+0x3e/0x90 [ 682.613789] atomic_notifier_chain_register+0x25/0x40 [ 682.614322] mlx5_eswitch_enable_locked+0x1d4/0x3b0 [mlx5_core] [ 682.614965] mlx5_eswitch_enable+0xc9/0x100 [mlx5_core] [ 682.615551] mlx5_device_enable_sriov+0x25/0x340 [mlx5_core] [ 682.616170] mlx5_core_sriov_configure+0x50/0x170 [mlx5_core] [ 682.616789] sriov_numvfs_store+0xb0/0x1b0 [ 682.617248] kernfs_fop_write_iter+0x117/0x1a0 [ 682.617734] vfs_write+0x231/0x3f0 [ 682.618138] ksys_write+0x63/0xe0 [ 682.618536] do_syscall_64+0x4c/0x100 [ 682.618958] entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fixes: 7624e58a8b3a ("net/mlx5: E-switch, register event handler before arming the event") Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1789800faaeb..f6022c135ec0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1489,7 +1489,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) } if (err) - goto abort; + goto err_esw_enable; esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED; @@ -1503,7 +1503,8 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) return 0; -abort: +err_esw_enable: + mlx5_eq_notifier_unregister(esw->dev, &esw->nb); mlx5_esw_acls_ns_cleanup(esw); return err; } From 12c3e619d34aea5135367606b04c557171a0bf04 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Sun, 13 Oct 2024 18:26:39 +0200 Subject: [PATCH 082/207] bpf, sockmap: SK_DROP on attempted redirects of unsupported af_vsock [ Upstream commit 9c5bd93edf7b8834aecaa7c340b852d5990d7c78 ] Don't mislead the callers of bpf_{sk,msg}_redirect_{map,hash}(): make sure to immediately and visibly fail the forwarding of unsupported af_vsock packets. Fixes: 634f1a7110b4 ("vsock: support sockmap") Signed-off-by: Michal Luczaj Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20241013-vsock-fixes-for-redir-v2-1-d6577bbfe742@rbox.co Signed-off-by: Sasha Levin --- include/net/sock.h | 5 +++++ net/core/sock_map.c | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/include/net/sock.h b/include/net/sock.h index c3961050b8e3..e0be8bd98396 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2826,6 +2826,11 @@ static inline bool sk_is_stream_unix(const struct sock *sk) return sk->sk_family == AF_UNIX && sk->sk_type == SOCK_STREAM; } +static inline bool sk_is_vsock(const struct sock *sk) +{ + return sk->sk_family == AF_VSOCK; +} + /** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 2afac40bb83c..2da881a8e798 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -644,6 +644,8 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, sk = __sock_map_lookup_elem(map, key); if (unlikely(!sk || !sock_map_redirect_allowed(sk))) return SK_DROP; + if ((flags & BPF_F_INGRESS) && sk_is_vsock(sk)) + return SK_DROP; skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS); return SK_PASS; @@ -672,6 +674,8 @@ BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg, return SK_DROP; if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk)) return SK_DROP; + if (sk_is_vsock(sk)) + return SK_DROP; msg->flags = flags; msg->sk_redir = sk; @@ -1246,6 +1250,8 @@ BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb, sk = __sock_hash_lookup_elem(map, key); if (unlikely(!sk || !sock_map_redirect_allowed(sk))) return SK_DROP; + if ((flags & BPF_F_INGRESS) && sk_is_vsock(sk)) + return SK_DROP; skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS); return SK_PASS; @@ -1274,6 +1280,8 @@ BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg, return SK_DROP; if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk)) return SK_DROP; + if (sk_is_vsock(sk)) + return SK_DROP; msg->flags = flags; msg->sk_redir = sk; From 66cd51de31c682a311c2fa25c580b7ea45859dd9 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Sun, 13 Oct 2024 18:26:40 +0200 Subject: [PATCH 083/207] vsock: Update rx_bytes on read_skb() [ Upstream commit 3543152f2d330141d9394d28855cb90b860091d2 ] Make sure virtio_transport_inc_rx_pkt() and virtio_transport_dec_rx_pkt() calls are balanced (i.e. virtio_vsock_sock::rx_bytes doesn't lie) after vsock_transport::read_skb(). While here, also inform the peer that we've freed up space and it has more credit. Failing to update rx_bytes after packet is dequeued leads to a warning on SOCK_STREAM recv(): [ 233.396654] rx_queue is empty, but rx_bytes is non-zero [ 233.396702] WARNING: CPU: 11 PID: 40601 at net/vmw_vsock/virtio_transport_common.c:589 Fixes: 634f1a7110b4 ("vsock: support sockmap") Suggested-by: Stefano Garzarella Signed-off-by: Michal Luczaj Signed-off-by: Daniel Borkmann Reviewed-by: Stefano Garzarella Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20241013-vsock-fixes-for-redir-v2-2-d6577bbfe742@rbox.co Signed-off-by: Sasha Levin --- net/vmw_vsock/virtio_transport_common.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index e87fd9480acd..072878012b51 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1508,6 +1508,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto { struct virtio_vsock_sock *vvs = vsk->trans; struct sock *sk = sk_vsock(vsk); + struct virtio_vsock_hdr *hdr; struct sk_buff *skb; int off = 0; int err; @@ -1517,10 +1518,16 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto * works for types other than dgrams. */ skb = __skb_recv_datagram(sk, &vvs->rx_queue, MSG_DONTWAIT, &off, &err); + if (!skb) { + spin_unlock_bh(&vvs->rx_lock); + return err; + } + + hdr = virtio_vsock_hdr(skb); + virtio_transport_dec_rx_pkt(vvs, le32_to_cpu(hdr->len)); spin_unlock_bh(&vvs->rx_lock); - if (!skb) - return err; + virtio_transport_send_credit_update(vsk); return recv_actor(sk, skb); } From 9dd9b16018ccba964d111d775b0a7964a691eb54 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Sun, 13 Oct 2024 18:26:41 +0200 Subject: [PATCH 084/207] vsock: Update msg_count on read_skb() [ Upstream commit 6dafde852df8de3617d4b9f835b629aaeaccd01d ] Dequeuing via vsock_transport::read_skb() left msg_count outdated, which then confused SOCK_SEQPACKET recv(). Decrease the counter. Fixes: 634f1a7110b4 ("vsock: support sockmap") Signed-off-by: Michal Luczaj Signed-off-by: Daniel Borkmann Reviewed-by: Stefano Garzarella Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20241013-vsock-fixes-for-redir-v2-3-d6577bbfe742@rbox.co Signed-off-by: Sasha Levin --- net/vmw_vsock/virtio_transport_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 072878012b51..78b5f4f8808b 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1524,6 +1524,9 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto } hdr = virtio_vsock_hdr(skb); + if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) + vvs->msg_count--; + virtio_transport_dec_rx_pkt(vvs, le32_to_cpu(hdr->len)); spin_unlock_bh(&vvs->rx_lock); From 8a377818bcf87665dcb7a701a745491889fd9ae2 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Sun, 13 Oct 2024 18:26:42 +0200 Subject: [PATCH 085/207] bpf, vsock: Drop static vsock_bpf_prot initialization [ Upstream commit 19039f279797efbe044cae41ee216c5fe481fc33 ] vsock_bpf_prot is set up at runtime. Remove the superfluous init. No functional change intended. Fixes: 634f1a7110b4 ("vsock: support sockmap") Signed-off-by: Michal Luczaj Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20241013-vsock-fixes-for-redir-v2-4-d6577bbfe742@rbox.co Signed-off-by: Sasha Levin --- net/vmw_vsock/vsock_bpf.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c index c42c5cc18f32..4aa6e74ec295 100644 --- a/net/vmw_vsock/vsock_bpf.c +++ b/net/vmw_vsock/vsock_bpf.c @@ -114,14 +114,6 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, return copied; } -/* Copy of original proto with updated sock_map methods */ -static struct proto vsock_bpf_prot = { - .close = sock_map_close, - .recvmsg = vsock_bpf_recvmsg, - .sock_is_readable = sk_msg_is_readable, - .unhash = sock_map_unhash, -}; - static void vsock_bpf_rebuild_protos(struct proto *prot, const struct proto *base) { *prot = *base; From ed67a5b99e4dbf6d10cd7d823753e72dcce29938 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Thu, 17 Oct 2024 17:36:28 +0300 Subject: [PATCH 086/207] riscv, bpf: Make BPF_CMPXCHG fully ordered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e59db0623f6955986d1be0880b351a1f56e7fd6d ] According to the prototype formal BPF memory consistency model discussed e.g. in [1] and following the ordering properties of the C/in-kernel macro atomic_cmpxchg(), a BPF atomic operation with the BPF_CMPXCHG modifier is fully ordered. However, the current RISC-V JIT lowerings fail to meet such memory ordering property. This is illustrated by the following litmus test: BPF BPF__MP+success_cmpxchg+fence { 0:r1=x; 0:r3=y; 0:r5=1; 1:r2=y; 1:r4=f; 1:r7=x; } P0 | P1 ; *(u64 *)(r1 + 0) = 1 | r1 = *(u64 *)(r2 + 0) ; r2 = cmpxchg_64 (r3 + 0, r4, r5) | r3 = atomic_fetch_add((u64 *)(r4 + 0), r5) ; | r6 = *(u64 *)(r7 + 0) ; exists (1:r1=1 /\ 1:r6=0) whose "exists" clause is not satisfiable according to the BPF memory model. Using the current RISC-V JIT lowerings, the test can be mapped to the following RISC-V litmus test: RISCV RISCV__MP+success_cmpxchg+fence { 0:x1=x; 0:x3=y; 0:x5=1; 1:x2=y; 1:x4=f; 1:x7=x; } P0 | P1 ; sd x5, 0(x1) | ld x1, 0(x2) ; L00: | amoadd.d.aqrl x3, x5, 0(x4) ; lr.d x2, 0(x3) | ld x6, 0(x7) ; bne x2, x4, L01 | ; sc.d x6, x5, 0(x3) | ; bne x6, x4, L00 | ; fence rw, rw | ; L01: | ; exists (1:x1=1 /\ 1:x6=0) where the two stores in P0 can be reordered. Update the RISC-V JIT lowerings/implementation of BPF_CMPXCHG to emit an SC with RELEASE ("rl") annotation in order to meet the expected memory ordering guarantees. The resulting RISC-V JIT lowerings of BPF_CMPXCHG match the RISC-V lowerings of the C atomic_cmpxchg(). Other lowerings were fixed via 20a759df3bba ("riscv, bpf: make some atomic operations fully ordered"). Fixes: dd642ccb45ec ("riscv, bpf: Implement more atomic operations for RV64") Signed-off-by: Andrea Parri Signed-off-by: Daniel Borkmann Reviewed-by: Puranjay Mohan Acked-by: Björn Töpel Link: https://lpc.events/event/18/contributions/1949/attachments/1665/3441/bpfmemmodel.2024.09.19p.pdf [1] Link: https://lore.kernel.org/bpf/20241017143628.2673894-1-parri.andrea@gmail.com Signed-off-by: Sasha Levin --- arch/riscv/net/bpf_jit_comp64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 2f041b5cea97..26eeb3973631 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -555,8 +555,8 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64, rv_lr_w(r0, 0, rd, 0, 0), ctx); jmp_offset = ninsns_rvoff(8); emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx); - emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 0) : - rv_sc_w(RV_REG_T3, rs, rd, 0, 0), ctx); + emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 1) : + rv_sc_w(RV_REG_T3, rs, rd, 0, 1), ctx); jmp_offset = ninsns_rvoff(-6); emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx); emit(rv_fence(0x3, 0x3), ctx); From e2c03a2c838e13c8d7342b455f53bb432b4d4c8c Mon Sep 17 00:00:00 2001 From: William Butler Date: Wed, 10 Jan 2024 18:28:55 +0000 Subject: [PATCH 087/207] nvme-pci: set doorbell config before unquiescing [ Upstream commit 06c59d427017fcde3107c236177fcc74c9db7909 ] During resets, if queues are unquiesced first, then the host can submit IOs to the controller using shadow doorbell logic but the controller won't be aware. This can lead to necessary MMIO doorbells from being not issued, causing requests to be delayed and timed-out. Signed-off-by: William Butler Signed-off-by: Keith Busch Stable-dep-of: 26bc0a81f64c ("nvme-pci: fix race condition between reset and nvme_dev_disable()") Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 32b5cc76a022..61c9b175e035 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2757,10 +2757,10 @@ static void nvme_reset_work(struct work_struct *work) * controller around but remove all namespaces. */ if (dev->online_queues > 1) { + nvme_dbbuf_set(dev); nvme_unquiesce_io_queues(&dev->ctrl); nvme_wait_freeze(&dev->ctrl); nvme_pci_update_nr_queues(dev); - nvme_dbbuf_set(dev); nvme_unfreeze(&dev->ctrl); } else { dev_warn(dev->ctrl.device, "IO queues lost\n"); From 4ed32cc0939b64e3d7b48c8c0d63ea038775f304 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Tue, 15 Oct 2024 13:21:00 +0200 Subject: [PATCH 088/207] nvme-pci: fix race condition between reset and nvme_dev_disable() [ Upstream commit 26bc0a81f64ce00fc4342c38eeb2eddaad084dd2 ] nvme_dev_disable() modifies the dev->online_queues field, therefore nvme_pci_update_nr_queues() should avoid racing against it, otherwise we could end up passing invalid values to blk_mq_update_nr_hw_queues(). WARNING: CPU: 39 PID: 61303 at drivers/pci/msi/api.c:347 pci_irq_get_affinity+0x187/0x210 Workqueue: nvme-reset-wq nvme_reset_work [nvme] RIP: 0010:pci_irq_get_affinity+0x187/0x210 Call Trace: ? blk_mq_pci_map_queues+0x87/0x3c0 ? pci_irq_get_affinity+0x187/0x210 blk_mq_pci_map_queues+0x87/0x3c0 nvme_pci_map_queues+0x189/0x460 [nvme] blk_mq_update_nr_hw_queues+0x2a/0x40 nvme_reset_work+0x1be/0x2a0 [nvme] Fix the bug by locking the shutdown_lock mutex before using dev->online_queues. Give up if nvme_dev_disable() is running or if it has been executed already. Fixes: 949928c1c731 ("NVMe: Fix possible queue use after freed") Tested-by: Yi Zhang Reviewed-by: Christoph Hellwig Signed-off-by: Maurizio Lombardi Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 61c9b175e035..b701969cf1c2 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2469,17 +2469,29 @@ static unsigned int nvme_pci_nr_maps(struct nvme_dev *dev) return 1; } -static void nvme_pci_update_nr_queues(struct nvme_dev *dev) +static bool nvme_pci_update_nr_queues(struct nvme_dev *dev) { if (!dev->ctrl.tagset) { nvme_alloc_io_tag_set(&dev->ctrl, &dev->tagset, &nvme_mq_ops, nvme_pci_nr_maps(dev), sizeof(struct nvme_iod)); - return; + return true; + } + + /* Give up if we are racing with nvme_dev_disable() */ + if (!mutex_trylock(&dev->shutdown_lock)) + return false; + + /* Check if nvme_dev_disable() has been executed already */ + if (!dev->online_queues) { + mutex_unlock(&dev->shutdown_lock); + return false; } blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); /* free previously allocated queues that are no longer usable */ nvme_free_queues(dev, dev->online_queues); + mutex_unlock(&dev->shutdown_lock); + return true; } static int nvme_pci_enable(struct nvme_dev *dev) @@ -2760,7 +2772,8 @@ static void nvme_reset_work(struct work_struct *work) nvme_dbbuf_set(dev); nvme_unquiesce_io_queues(&dev->ctrl); nvme_wait_freeze(&dev->ctrl); - nvme_pci_update_nr_queues(dev); + if (!nvme_pci_update_nr_queues(dev)) + goto out; nvme_unfreeze(&dev->ctrl); } else { dev_warn(dev->ctrl.device, "IO queues lost\n"); From bb0f943675a9cf1d6b13bd94a13b63a7063a6d36 Mon Sep 17 00:00:00 2001 From: Jordan Rome Date: Wed, 16 Oct 2024 14:00:47 -0700 Subject: [PATCH 089/207] bpf: Fix iter/task tid filtering [ Upstream commit 9495a5b731fcaf580448a3438d63601c88367661 ] In userspace, you can add a tid filter by setting the "task.tid" field for "bpf_iter_link_info". However, `get_pid_task` when called for the `BPF_TASK_ITER_TID` type should have been using `PIDTYPE_PID` (tid) instead of `PIDTYPE_TGID` (pid). Fixes: f0d74c4da1f0 ("bpf: Parameterize task iterators.") Signed-off-by: Jordan Rome Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241016210048.1213935-1-linux@jordanrome.com Signed-off-by: Sasha Levin --- kernel/bpf/task_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index c4ab9d6cdbe9..f7ef58090c7d 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -119,7 +119,7 @@ static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *co rcu_read_lock(); pid = find_pid_ns(common->pid, common->ns); if (pid) { - task = get_pid_task(pid, PIDTYPE_TGID); + task = get_pid_task(pid, PIDTYPE_PID); *tid = common->pid; } rcu_read_unlock(); From cf2bb1bf03068504f5c216194df57c23ed0bed5a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 17 Oct 2024 15:09:02 -0700 Subject: [PATCH 090/207] cdrom: Avoid barrier_nospec() in cdrom_ioctl_media_changed() [ Upstream commit b0bf1afde7c34698cf61422fa8ee60e690dc25c3 ] The barrier_nospec() after the array bounds check is overkill and painfully slow for arches which implement it. Furthermore, most arches don't implement it, so they remain exposed to Spectre v1 (which can affect pretty much any CPU with branch prediction). Instead, clamp the user pointer to a valid range so it's guaranteed to be a valid array index even when the bounds check mispredicts. Fixes: 8270cb10c068 ("cdrom: Fix spectre-v1 gadget") Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/1d86f4d9d8fba68e5ca64cdeac2451b95a8bf872.1729202937.git.jpoimboe@kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/cdrom/cdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 01f46caf1f88..54b80911f3e2 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2313,7 +2313,7 @@ static int cdrom_ioctl_media_changed(struct cdrom_device_info *cdi, return -EINVAL; /* Prevent arg from speculatively bypassing the length check */ - barrier_nospec(); + arg = array_index_nospec(arg, cdi->capacity); info = kmalloc(sizeof(*info), GFP_KERNEL); if (!info) From a1afee6c6f5366949761afd0d52e3d8142a42e2c Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Fri, 20 Oct 2023 11:33:30 -0700 Subject: [PATCH 091/207] mm/khugepaged: convert alloc_charge_hpage() to use folios [ Upstream commit b455f39d228935f88eebcd1f7c1a6981093c6a3b ] Also remove count_memcg_page_event now that its last caller no longer uses it and reword hpage_collapse_alloc_page() to hpage_collapse_alloc_folio(). This removes 1 call to compound_head() and helps convert khugepaged to use folios throughout. Link: https://lkml.kernel.org/r/20231020183331.10770-5-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Rik van Riel Reviewed-by: Yang Shi Cc: Kefeng Wang Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Stable-dep-of: 37f0b47c5143 ("mm: khugepaged: fix the arguments order in khugepaged_collapse_file trace point") Signed-off-by: Sasha Levin --- include/linux/memcontrol.h | 14 -------------- mm/khugepaged.c | 17 ++++++++++------- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e4e24da16d2c..b1fdb1554f2f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1080,15 +1080,6 @@ static inline void count_memcg_events(struct mem_cgroup *memcg, local_irq_restore(flags); } -static inline void count_memcg_page_event(struct page *page, - enum vm_event_item idx) -{ - struct mem_cgroup *memcg = page_memcg(page); - - if (memcg) - count_memcg_events(memcg, idx, 1); -} - static inline void count_memcg_folio_events(struct folio *folio, enum vm_event_item idx, unsigned long nr) { @@ -1565,11 +1556,6 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg, { } -static inline void count_memcg_page_event(struct page *page, - int idx) -{ -} - static inline void count_memcg_folio_events(struct folio *folio, enum vm_event_item idx, unsigned long nr) { diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 88433cc25d8a..97cc4ef06183 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -887,16 +887,16 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc) } #endif -static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node, +static bool hpage_collapse_alloc_folio(struct folio **folio, gfp_t gfp, int node, nodemask_t *nmask) { - *hpage = __alloc_pages(gfp, HPAGE_PMD_ORDER, node, nmask); - if (unlikely(!*hpage)) { + *folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, nmask); + + if (unlikely(!*folio)) { count_vm_event(THP_COLLAPSE_ALLOC_FAILED); return false; } - folio_prep_large_rmappable((struct folio *)*hpage); count_vm_event(THP_COLLAPSE_ALLOC); return true; } @@ -1063,17 +1063,20 @@ static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, int node = hpage_collapse_find_target_node(cc); struct folio *folio; - if (!hpage_collapse_alloc_page(hpage, gfp, node, &cc->alloc_nmask)) + if (!hpage_collapse_alloc_folio(&folio, gfp, node, &cc->alloc_nmask)) { + *hpage = NULL; return SCAN_ALLOC_HUGE_PAGE_FAIL; + } - folio = page_folio(*hpage); if (unlikely(mem_cgroup_charge(folio, mm, gfp))) { folio_put(folio); *hpage = NULL; return SCAN_CGROUP_CHARGE_FAIL; } - count_memcg_page_event(*hpage, THP_COLLAPSE_ALLOC); + count_memcg_folio_events(folio, THP_COLLAPSE_ALLOC, 1); + + *hpage = folio_page(folio, 0); return SCAN_SUCCEED; } From 4ba70817cfadfbf8cb29c6f8fa0af19343c482d5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 11 Dec 2023 16:22:13 +0000 Subject: [PATCH 092/207] mm: convert collapse_huge_page() to use a folio [ Upstream commit 5432726848bb27a01badcbc93b596f39ee6c5ffb ] Replace three calls to compound_head() with one. Link: https://lkml.kernel.org/r/20231211162214.2146080-9-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton Stable-dep-of: 37f0b47c5143 ("mm: khugepaged: fix the arguments order in khugepaged_collapse_file trace point") Signed-off-by: Sasha Levin --- mm/khugepaged.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 97cc4ef06183..24d05e0a672d 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1088,6 +1088,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, pmd_t *pmd, _pmd; pte_t *pte; pgtable_t pgtable; + struct folio *folio; struct page *hpage; spinlock_t *pmd_ptl, *pte_ptl; int result = SCAN_FAIL; @@ -1207,13 +1208,13 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, if (unlikely(result != SCAN_SUCCEED)) goto out_up_write; + folio = page_folio(hpage); /* - * spin_lock() below is not the equivalent of smp_wmb(), but - * the smp_wmb() inside __SetPageUptodate() can be reused to - * avoid the copy_huge_page writes to become visible after - * the set_pmd_at() write. + * The smp_wmb() inside __folio_mark_uptodate() ensures the + * copy_huge_page writes become visible before the set_pmd_at() + * write. */ - __SetPageUptodate(hpage); + __folio_mark_uptodate(folio); pgtable = pmd_pgtable(_pmd); _pmd = mk_huge_pmd(hpage, vma->vm_page_prot); @@ -1221,8 +1222,8 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, spin_lock(pmd_ptl); BUG_ON(!pmd_none(*pmd)); - page_add_new_anon_rmap(hpage, vma, address); - lru_cache_add_inactive_or_unevictable(hpage, vma); + folio_add_new_anon_rmap(folio, vma, address); + folio_add_lru_vma(folio, vma); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, address, pmd, _pmd); update_mmu_cache_pmd(vma, address, pmd); From c556c55806466e7c8ad1085c56a6460bf8fea197 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 Dec 2023 08:57:47 +0000 Subject: [PATCH 093/207] mm/khugepaged: use a folio more in collapse_file() [ Upstream commit b54d60b18e850561e8bdb4264ae740676c3b7658 ] This function is not yet fully converted to the folio API, but this removes a few uses of old APIs. Link: https://lkml.kernel.org/r/20231228085748.1083901-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: Vlastimil Babka Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Johannes Weiner Signed-off-by: Andrew Morton Stable-dep-of: 37f0b47c5143 ("mm: khugepaged: fix the arguments order in khugepaged_collapse_file trace point") Signed-off-by: Sasha Levin --- mm/khugepaged.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 24d05e0a672d..cb6a24368804 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2124,23 +2124,23 @@ immap_locked: xas_lock_irq(&xas); } - nr = thp_nr_pages(hpage); + folio = page_folio(hpage); + nr = folio_nr_pages(folio); if (is_shmem) - __mod_lruvec_page_state(hpage, NR_SHMEM_THPS, nr); + __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr); else - __mod_lruvec_page_state(hpage, NR_FILE_THPS, nr); + __lruvec_stat_mod_folio(folio, NR_FILE_THPS, nr); if (nr_none) { - __mod_lruvec_page_state(hpage, NR_FILE_PAGES, nr_none); + __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr_none); /* nr_none is always 0 for non-shmem. */ - __mod_lruvec_page_state(hpage, NR_SHMEM, nr_none); + __lruvec_stat_mod_folio(folio, NR_SHMEM, nr_none); } /* * Mark hpage as uptodate before inserting it into the page cache so * that it isn't mistaken for an fallocated but unwritten page. */ - folio = page_folio(hpage); folio_mark_uptodate(folio); folio_ref_add(folio, HPAGE_PMD_NR - 1); @@ -2150,7 +2150,7 @@ immap_locked: /* Join all the small entries into a single multi-index entry. */ xas_set_order(&xas, start, HPAGE_PMD_ORDER); - xas_store(&xas, hpage); + xas_store(&xas, folio); WARN_ON_ONCE(xas_error(&xas)); xas_unlock_irq(&xas); @@ -2161,7 +2161,7 @@ immap_locked: retract_page_tables(mapping, start); if (cc && !cc->is_khugepaged) result = SCAN_PTE_MAPPED_HUGEPAGE; - unlock_page(hpage); + folio_unlock(folio); /* * The collapse has succeeded, so free the old pages. From 281a0312ce4199fad0c808b97b7d3a5416417c67 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 3 Apr 2024 18:18:30 +0100 Subject: [PATCH 094/207] khugepaged: inline hpage_collapse_alloc_folio() [ Upstream commit 4746f5ce0fa52e21b5fe432970fe9516d1a45ebc ] Patch series "khugepaged folio conversions". We've been kind of hacking piecemeal at converting khugepaged to use folios instead of compound pages, and so this patchset is a little larger than it should be as I undo some of our wrong moves in the past. In particular, collapse_file() now consistently uses 'new_folio' for the freshly allocated folio and 'folio' for the one that's currently in use. This patch (of 7): This function has one caller, and the combined function is simpler to read, reason about and modify. Link: https://lkml.kernel.org/r/20240403171838.1445826-1-willy@infradead.org Link: https://lkml.kernel.org/r/20240403171838.1445826-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Vishal Moola (Oracle) Signed-off-by: Andrew Morton Stable-dep-of: 37f0b47c5143 ("mm: khugepaged: fix the arguments order in khugepaged_collapse_file trace point") Signed-off-by: Sasha Levin --- mm/khugepaged.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index cb6a24368804..d0fcfa47085b 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -887,20 +887,6 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc) } #endif -static bool hpage_collapse_alloc_folio(struct folio **folio, gfp_t gfp, int node, - nodemask_t *nmask) -{ - *folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, nmask); - - if (unlikely(!*folio)) { - count_vm_event(THP_COLLAPSE_ALLOC_FAILED); - return false; - } - - count_vm_event(THP_COLLAPSE_ALLOC); - return true; -} - /* * If mmap_lock temporarily dropped, revalidate vma * before taking mmap_lock. @@ -1063,11 +1049,14 @@ static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, int node = hpage_collapse_find_target_node(cc); struct folio *folio; - if (!hpage_collapse_alloc_folio(&folio, gfp, node, &cc->alloc_nmask)) { + folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, &cc->alloc_nmask); + if (!folio) { *hpage = NULL; + count_vm_event(THP_COLLAPSE_ALLOC_FAILED); return SCAN_ALLOC_HUGE_PAGE_FAIL; } + count_vm_event(THP_COLLAPSE_ALLOC); if (unlikely(mem_cgroup_charge(folio, mm, gfp))) { folio_put(folio); *hpage = NULL; From 03e36dba005e68ae60abbfe62e9b3fdc2e191796 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 3 Apr 2024 18:18:31 +0100 Subject: [PATCH 095/207] khugepaged: convert alloc_charge_hpage to alloc_charge_folio [ Upstream commit d5ab50b9412c0bba750eef5a34fd2937de1aee55 ] Both callers want to deal with a folio, so return a folio from this function. Link: https://lkml.kernel.org/r/20240403171838.1445826-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Stable-dep-of: 37f0b47c5143 ("mm: khugepaged: fix the arguments order in khugepaged_collapse_file trace point") Signed-off-by: Sasha Levin --- mm/khugepaged.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index d0fcfa47085b..b197323450b5 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1041,7 +1041,7 @@ out: return result; } -static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, +static int alloc_charge_folio(struct folio **foliop, struct mm_struct *mm, struct collapse_control *cc) { gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() : @@ -1051,7 +1051,7 @@ static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, &cc->alloc_nmask); if (!folio) { - *hpage = NULL; + *foliop = NULL; count_vm_event(THP_COLLAPSE_ALLOC_FAILED); return SCAN_ALLOC_HUGE_PAGE_FAIL; } @@ -1059,13 +1059,13 @@ static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, count_vm_event(THP_COLLAPSE_ALLOC); if (unlikely(mem_cgroup_charge(folio, mm, gfp))) { folio_put(folio); - *hpage = NULL; + *foliop = NULL; return SCAN_CGROUP_CHARGE_FAIL; } count_memcg_folio_events(folio, THP_COLLAPSE_ALLOC, 1); - *hpage = folio_page(folio, 0); + *foliop = folio; return SCAN_SUCCEED; } @@ -1094,7 +1094,8 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, */ mmap_read_unlock(mm); - result = alloc_charge_hpage(&hpage, mm, cc); + result = alloc_charge_folio(&folio, mm, cc); + hpage = &folio->page; if (result != SCAN_SUCCEED) goto out_nolock; @@ -1197,7 +1198,6 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, if (unlikely(result != SCAN_SUCCEED)) goto out_up_write; - folio = page_folio(hpage); /* * The smp_wmb() inside __folio_mark_uptodate() ensures the * copy_huge_page writes become visible before the set_pmd_at() @@ -1786,7 +1786,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, struct page *hpage; struct page *page; struct page *tmp; - struct folio *folio; + struct folio *folio, *new_folio; pgoff_t index = 0, end = start + HPAGE_PMD_NR; LIST_HEAD(pagelist); XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); @@ -1797,7 +1797,8 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); - result = alloc_charge_hpage(&hpage, mm, cc); + result = alloc_charge_folio(&new_folio, mm, cc); + hpage = &new_folio->page; if (result != SCAN_SUCCEED) goto out; From 36249a0b212e62990108bb04533875d1fdb20e4c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 3 Apr 2024 18:18:34 +0100 Subject: [PATCH 096/207] khugepaged: remove hpage from collapse_file() [ Upstream commit 610ff817b981921213ae51e5c5f38c76c6f0405e ] Use new_folio throughout where we had been using hpage. Link: https://lkml.kernel.org/r/20240403171838.1445826-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Vishal Moola (Oracle) Signed-off-by: Andrew Morton Stable-dep-of: 37f0b47c5143 ("mm: khugepaged: fix the arguments order in khugepaged_collapse_file trace point") Signed-off-by: Sasha Levin --- include/trace/events/huge_memory.h | 6 +-- mm/khugepaged.c | 77 +++++++++++++++--------------- 2 files changed, 42 insertions(+), 41 deletions(-) diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 6e2ef1d4b002..dc6eeef2d3da 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -207,10 +207,10 @@ TRACE_EVENT(mm_khugepaged_scan_file, ); TRACE_EVENT(mm_khugepaged_collapse_file, - TP_PROTO(struct mm_struct *mm, struct page *hpage, pgoff_t index, + TP_PROTO(struct mm_struct *mm, struct folio *new_folio, pgoff_t index, bool is_shmem, unsigned long addr, struct file *file, int nr, int result), - TP_ARGS(mm, hpage, index, addr, is_shmem, file, nr, result), + TP_ARGS(mm, new_folio, index, addr, is_shmem, file, nr, result), TP_STRUCT__entry( __field(struct mm_struct *, mm) __field(unsigned long, hpfn) @@ -224,7 +224,7 @@ TRACE_EVENT(mm_khugepaged_collapse_file, TP_fast_assign( __entry->mm = mm; - __entry->hpfn = hpage ? page_to_pfn(hpage) : -1; + __entry->hpfn = new_folio ? folio_pfn(new_folio) : -1; __entry->index = index; __entry->addr = addr; __entry->is_shmem = is_shmem; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b197323450b5..4b00592548f5 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1783,30 +1783,27 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, struct collapse_control *cc) { struct address_space *mapping = file->f_mapping; - struct page *hpage; struct page *page; - struct page *tmp; + struct page *tmp, *dst; struct folio *folio, *new_folio; pgoff_t index = 0, end = start + HPAGE_PMD_NR; LIST_HEAD(pagelist); XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); int nr_none = 0, result = SCAN_SUCCEED; bool is_shmem = shmem_file(file); - int nr = 0; VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); result = alloc_charge_folio(&new_folio, mm, cc); - hpage = &new_folio->page; if (result != SCAN_SUCCEED) goto out; - __SetPageLocked(hpage); + __folio_set_locked(new_folio); if (is_shmem) - __SetPageSwapBacked(hpage); - hpage->index = start; - hpage->mapping = mapping; + __folio_set_swapbacked(new_folio); + new_folio->index = start; + new_folio->mapping = mapping; /* * Ensure we have slots for all the pages in the range. This is @@ -2039,20 +2036,24 @@ xa_unlocked: * The old pages are locked, so they won't change anymore. */ index = start; + dst = folio_page(new_folio, 0); list_for_each_entry(page, &pagelist, lru) { while (index < page->index) { - clear_highpage(hpage + (index % HPAGE_PMD_NR)); + clear_highpage(dst); index++; + dst++; } - if (copy_mc_highpage(hpage + (page->index % HPAGE_PMD_NR), page) > 0) { + if (copy_mc_highpage(dst, page) > 0) { result = SCAN_COPY_MC; goto rollback; } index++; + dst++; } while (index < end) { - clear_highpage(hpage + (index % HPAGE_PMD_NR)); + clear_highpage(dst); index++; + dst++; } if (nr_none) { @@ -2080,16 +2081,17 @@ xa_unlocked: } /* - * If userspace observed a missing page in a VMA with a MODE_MISSING - * userfaultfd, then it might expect a UFFD_EVENT_PAGEFAULT for that - * page. If so, we need to roll back to avoid suppressing such an - * event. Since wp/minor userfaultfds don't give userspace any - * guarantees that the kernel doesn't fill a missing page with a zero - * page, so they don't matter here. + * If userspace observed a missing page in a VMA with + * a MODE_MISSING userfaultfd, then it might expect a + * UFFD_EVENT_PAGEFAULT for that page. If so, we need to + * roll back to avoid suppressing such an event. Since + * wp/minor userfaultfds don't give userspace any + * guarantees that the kernel doesn't fill a missing + * page with a zero page, so they don't matter here. * - * Any userfaultfds registered after this point will not be able to - * observe any missing pages due to the previously inserted retry - * entries. + * Any userfaultfds registered after this point will + * not be able to observe any missing pages due to the + * previously inserted retry entries. */ vma_interval_tree_foreach(vma, &mapping->i_mmap, start, end) { if (userfaultfd_missing(vma)) { @@ -2114,33 +2116,32 @@ immap_locked: xas_lock_irq(&xas); } - folio = page_folio(hpage); - nr = folio_nr_pages(folio); if (is_shmem) - __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr); + __lruvec_stat_mod_folio(new_folio, NR_SHMEM_THPS, HPAGE_PMD_NR); else - __lruvec_stat_mod_folio(folio, NR_FILE_THPS, nr); + __lruvec_stat_mod_folio(new_folio, NR_FILE_THPS, HPAGE_PMD_NR); if (nr_none) { - __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr_none); + __lruvec_stat_mod_folio(new_folio, NR_FILE_PAGES, nr_none); /* nr_none is always 0 for non-shmem. */ - __lruvec_stat_mod_folio(folio, NR_SHMEM, nr_none); + __lruvec_stat_mod_folio(new_folio, NR_SHMEM, nr_none); } /* - * Mark hpage as uptodate before inserting it into the page cache so - * that it isn't mistaken for an fallocated but unwritten page. + * Mark new_folio as uptodate before inserting it into the + * page cache so that it isn't mistaken for an fallocated but + * unwritten page. */ - folio_mark_uptodate(folio); - folio_ref_add(folio, HPAGE_PMD_NR - 1); + folio_mark_uptodate(new_folio); + folio_ref_add(new_folio, HPAGE_PMD_NR - 1); if (is_shmem) - folio_mark_dirty(folio); - folio_add_lru(folio); + folio_mark_dirty(new_folio); + folio_add_lru(new_folio); /* Join all the small entries into a single multi-index entry. */ xas_set_order(&xas, start, HPAGE_PMD_ORDER); - xas_store(&xas, folio); + xas_store(&xas, new_folio); WARN_ON_ONCE(xas_error(&xas)); xas_unlock_irq(&xas); @@ -2151,7 +2152,7 @@ immap_locked: retract_page_tables(mapping, start); if (cc && !cc->is_khugepaged) result = SCAN_PTE_MAPPED_HUGEPAGE; - folio_unlock(folio); + folio_unlock(new_folio); /* * The collapse has succeeded, so free the old pages. @@ -2196,13 +2197,13 @@ rollback: smp_mb(); } - hpage->mapping = NULL; + new_folio->mapping = NULL; - unlock_page(hpage); - put_page(hpage); + folio_unlock(new_folio); + folio_put(new_folio); out: VM_BUG_ON(!list_empty(&pagelist)); - trace_mm_khugepaged_collapse_file(mm, hpage, index, is_shmem, addr, file, nr, result); + trace_mm_khugepaged_collapse_file(mm, new_folio, index, is_shmem, addr, file, HPAGE_PMD_NR, result); return result; } From 5f029be65dca9b7ffd80a3293dc934ffe0ede6b5 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 11 Oct 2024 18:17:02 -0700 Subject: [PATCH 097/207] mm: khugepaged: fix the arguments order in khugepaged_collapse_file trace point [ Upstream commit 37f0b47c5143c2957909ced44fc09ffb118c99f7 ] The "addr" and "is_shmem" arguments have different order in TP_PROTO and TP_ARGS. This resulted in the incorrect trace result: text-hugepage-644429 [276] 392092.878683: mm_khugepaged_collapse_file: mm=0xffff20025d52c440, hpage_pfn=0x200678c00, index=512, addr=1, is_shmem=0, filename=text-hugepage, nr=512, result=failed The value of "addr" is wrong because it was treated as bool value, the type of is_shmem. Fix the order in TP_PROTO to keep "addr" is before "is_shmem" since the original patch review suggested this order to achieve best packing. And use "lx" for "addr" instead of "ld" in TP_printk because address is typically shown in hex. After the fix, the trace result looks correct: text-hugepage-7291 [004] 128.627251: mm_khugepaged_collapse_file: mm=0xffff0001328f9500, hpage_pfn=0x20016ea00, index=512, addr=0x400000, is_shmem=0, filename=text-hugepage, nr=512, result=failed Link: https://lkml.kernel.org/r/20241012011702.1084846-1-yang@os.amperecomputing.com Fixes: 4c9473e87e75 ("mm/khugepaged: add tracepoint to collapse_file()") Signed-off-by: Yang Shi Cc: Gautam Menghani Cc: Steven Rostedt (Google) Cc: [6.2+] Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- include/trace/events/huge_memory.h | 4 ++-- mm/khugepaged.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index dc6eeef2d3da..37f2443b3cdb 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -208,7 +208,7 @@ TRACE_EVENT(mm_khugepaged_scan_file, TRACE_EVENT(mm_khugepaged_collapse_file, TP_PROTO(struct mm_struct *mm, struct folio *new_folio, pgoff_t index, - bool is_shmem, unsigned long addr, struct file *file, + unsigned long addr, bool is_shmem, struct file *file, int nr, int result), TP_ARGS(mm, new_folio, index, addr, is_shmem, file, nr, result), TP_STRUCT__entry( @@ -233,7 +233,7 @@ TRACE_EVENT(mm_khugepaged_collapse_file, __entry->result = result; ), - TP_printk("mm=%p, hpage_pfn=0x%lx, index=%ld, addr=%ld, is_shmem=%d, filename=%s, nr=%d, result=%s", + TP_printk("mm=%p, hpage_pfn=0x%lx, index=%ld, addr=%lx, is_shmem=%d, filename=%s, nr=%d, result=%s", __entry->mm, __entry->hpfn, __entry->index, diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 4b00592548f5..a87cfe1d4b7b 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2203,7 +2203,7 @@ rollback: folio_put(new_folio); out: VM_BUG_ON(!list_empty(&pagelist)); - trace_mm_khugepaged_collapse_file(mm, new_folio, index, is_shmem, addr, file, HPAGE_PMD_NR, result); + trace_mm_khugepaged_collapse_file(mm, new_folio, index, addr, is_shmem, file, HPAGE_PMD_NR, result); return result; } From fe19d913d2c85bcbfbb8a379096c0b7295007c2b Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Thu, 3 Oct 2024 23:04:52 +0200 Subject: [PATCH 098/207] iio: adc: ti-lmp92064: add missing select IIO_(TRIGGERED_)BUFFER in Kconfig [ Upstream commit a985576af824426e33100554a5958a6beda60a13 ] This driver makes use of triggered buffers, but does not select the required modules. Add the missing 'select IIO_BUFFER' and 'select IIO_TRIGGERED_BUFFER'. Fixes: 6c7bc1d27bb2 ("iio: adc: ti-lmp92064: add buffering support") Signed-off-by: Javier Carrasco Link: https://patch.msgid.link/20241003-iio-select-v1-6-67c0385197cd@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index e46817cb5581..6dee3b686eff 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -1335,6 +1335,8 @@ config TI_LMP92064 tristate "Texas Instruments LMP92064 ADC driver" depends on SPI select REGMAP_SPI + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say yes here to build support for the LMP92064 Precision Current and Voltage sensor. From 32bd1720ede688eb66ad39b3306d9bc51fabbf4b Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 8 Aug 2024 12:35:40 +0200 Subject: [PATCH 099/207] xhci: dbgtty: remove kfifo_out() wrapper [ Upstream commit 2b217514436744dd98c4d9fa48d60610f9f67d61 ] There is no need to check against kfifo_len() before kfifo_out(). Just ask the latter for data and it tells how much it retrieved. Or returns 0 in case there are no more. Signed-off-by: Jiri Slaby (SUSE) Cc: Mathias Nyman Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Link: https://lore.kernel.org/r/20240808103549.429349-5-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 30c9ae5ece8e ("xhci: dbc: honor usb transfer size boundaries.") Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-dbgtty.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c index b74e98e94393..64ea96494997 100644 --- a/drivers/usb/host/xhci-dbgtty.c +++ b/drivers/usb/host/xhci-dbgtty.c @@ -24,19 +24,6 @@ static inline struct dbc_port *dbc_to_port(struct xhci_dbc *dbc) return dbc->priv; } -static unsigned int -dbc_send_packet(struct dbc_port *port, char *packet, unsigned int size) -{ - unsigned int len; - - len = kfifo_len(&port->write_fifo); - if (len < size) - size = len; - if (size != 0) - size = kfifo_out(&port->write_fifo, packet, size); - return size; -} - static int dbc_start_tx(struct dbc_port *port) __releases(&port->port_lock) __acquires(&port->port_lock) @@ -49,7 +36,7 @@ static int dbc_start_tx(struct dbc_port *port) while (!list_empty(pool)) { req = list_entry(pool->next, struct dbc_request, list_pool); - len = dbc_send_packet(port, req->buf, DBC_MAX_PACKET); + len = kfifo_out(&port->write_fifo, req->buf, DBC_MAX_PACKET); if (len == 0) break; do_tty_wake = true; From 7cbcb40d46999ab0d9634f3082853a17d8222bf4 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 8 Aug 2024 12:35:41 +0200 Subject: [PATCH 100/207] xhci: dbgtty: use kfifo from tty_port struct [ Upstream commit 866025f0237609532bc8e4af5ef4d7252d3b55b6 ] There is no need to define one in a custom structure. The tty_port one is free to use. Signed-off-by: Jiri Slaby (SUSE) Cc: Mathias Nyman Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Link: https://lore.kernel.org/r/20240808103549.429349-6-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 30c9ae5ece8e ("xhci: dbc: honor usb transfer size boundaries.") Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-dbgcap.h | 1 - drivers/usb/host/xhci-dbgtty.c | 17 +++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/usb/host/xhci-dbgcap.h b/drivers/usb/host/xhci-dbgcap.h index 51a7ab3ba0ca..54fafebb7bd1 100644 --- a/drivers/usb/host/xhci-dbgcap.h +++ b/drivers/usb/host/xhci-dbgcap.h @@ -108,7 +108,6 @@ struct dbc_port { struct tasklet_struct push; struct list_head write_pool; - struct kfifo write_fifo; bool registered; }; diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c index 64ea96494997..881f5a7e6e0e 100644 --- a/drivers/usb/host/xhci-dbgtty.c +++ b/drivers/usb/host/xhci-dbgtty.c @@ -36,7 +36,7 @@ static int dbc_start_tx(struct dbc_port *port) while (!list_empty(pool)) { req = list_entry(pool->next, struct dbc_request, list_pool); - len = kfifo_out(&port->write_fifo, req->buf, DBC_MAX_PACKET); + len = kfifo_out(&port->port.xmit_fifo, req->buf, DBC_MAX_PACKET); if (len == 0) break; do_tty_wake = true; @@ -203,7 +203,7 @@ static ssize_t dbc_tty_write(struct tty_struct *tty, const u8 *buf, spin_lock_irqsave(&port->port_lock, flags); if (count) - count = kfifo_in(&port->write_fifo, buf, count); + count = kfifo_in(&port->port.xmit_fifo, buf, count); dbc_start_tx(port); spin_unlock_irqrestore(&port->port_lock, flags); @@ -217,7 +217,7 @@ static int dbc_tty_put_char(struct tty_struct *tty, u8 ch) int status; spin_lock_irqsave(&port->port_lock, flags); - status = kfifo_put(&port->write_fifo, ch); + status = kfifo_put(&port->port.xmit_fifo, ch); spin_unlock_irqrestore(&port->port_lock, flags); return status; @@ -240,7 +240,7 @@ static unsigned int dbc_tty_write_room(struct tty_struct *tty) unsigned int room; spin_lock_irqsave(&port->port_lock, flags); - room = kfifo_avail(&port->write_fifo); + room = kfifo_avail(&port->port.xmit_fifo); spin_unlock_irqrestore(&port->port_lock, flags); return room; @@ -253,7 +253,7 @@ static unsigned int dbc_tty_chars_in_buffer(struct tty_struct *tty) unsigned int chars; spin_lock_irqsave(&port->port_lock, flags); - chars = kfifo_len(&port->write_fifo); + chars = kfifo_len(&port->port.xmit_fifo); spin_unlock_irqrestore(&port->port_lock, flags); return chars; @@ -411,7 +411,8 @@ static int xhci_dbc_tty_register_device(struct xhci_dbc *dbc) goto err_idr; } - ret = kfifo_alloc(&port->write_fifo, DBC_WRITE_BUF_SIZE, GFP_KERNEL); + ret = kfifo_alloc(&port->port.xmit_fifo, DBC_WRITE_BUF_SIZE, + GFP_KERNEL); if (ret) goto err_exit_port; @@ -440,7 +441,7 @@ err_free_requests: xhci_dbc_free_requests(&port->read_pool); xhci_dbc_free_requests(&port->write_pool); err_free_fifo: - kfifo_free(&port->write_fifo); + kfifo_free(&port->port.xmit_fifo); err_exit_port: idr_remove(&dbc_tty_minors, port->minor); err_idr: @@ -465,7 +466,7 @@ static void xhci_dbc_tty_unregister_device(struct xhci_dbc *dbc) idr_remove(&dbc_tty_minors, port->minor); mutex_unlock(&dbc_tty_minors_lock); - kfifo_free(&port->write_fifo); + kfifo_free(&port->port.xmit_fifo); xhci_dbc_free_requests(&port->read_pool); xhci_dbc_free_requests(&port->read_queue); xhci_dbc_free_requests(&port->write_pool); From c691c2adeb75b3e385dff3efcaef978e08698bf8 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 16 Oct 2024 17:00:00 +0300 Subject: [PATCH 101/207] xhci: dbc: honor usb transfer size boundaries. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 30c9ae5ece8ecd69d36e6912c2c0896418f2468c ] Treat each completed full size write to /dev/ttyDBC0 as a separate usb transfer. Make sure the size of the TRBs matches the size of the tty write by first queuing as many max packet size TRBs as possible up to the last TRB which will be cut short to match the size of the tty write. This solves an issue where userspace writes several transfers back to back via /dev/ttyDBC0 into a kfifo before dbgtty can find available request to turn that kfifo data into TRBs on the transfer ring. The boundary between transfer was lost as xhci-dbgtty then turned everyting in the kfifo into as many 'max packet size' TRBs as possible. DbC would then send more data to the host than intended for that transfer, causing host to issue a babble error. Refuse to write more data to kfifo until previous tty write data is turned into properly sized TRBs with data size boundaries matching tty write size Tested-by: Uday M Bhat Tested-by: Łukasz Bartosik Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20241016140000.783905-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-dbgcap.h | 1 + drivers/usb/host/xhci-dbgtty.c | 55 ++++++++++++++++++++++++++++++---- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-dbgcap.h b/drivers/usb/host/xhci-dbgcap.h index 54fafebb7bd1..76170d7a7e7c 100644 --- a/drivers/usb/host/xhci-dbgcap.h +++ b/drivers/usb/host/xhci-dbgcap.h @@ -108,6 +108,7 @@ struct dbc_port { struct tasklet_struct push; struct list_head write_pool; + unsigned int tx_boundary; bool registered; }; diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c index 881f5a7e6e0e..0266c2f5bc0d 100644 --- a/drivers/usb/host/xhci-dbgtty.c +++ b/drivers/usb/host/xhci-dbgtty.c @@ -24,6 +24,29 @@ static inline struct dbc_port *dbc_to_port(struct xhci_dbc *dbc) return dbc->priv; } +static unsigned int +dbc_kfifo_to_req(struct dbc_port *port, char *packet) +{ + unsigned int len; + + len = kfifo_len(&port->port.xmit_fifo); + + if (len == 0) + return 0; + + len = min(len, DBC_MAX_PACKET); + + if (port->tx_boundary) + len = min(port->tx_boundary, len); + + len = kfifo_out(&port->port.xmit_fifo, packet, len); + + if (port->tx_boundary) + port->tx_boundary -= len; + + return len; +} + static int dbc_start_tx(struct dbc_port *port) __releases(&port->port_lock) __acquires(&port->port_lock) @@ -36,7 +59,7 @@ static int dbc_start_tx(struct dbc_port *port) while (!list_empty(pool)) { req = list_entry(pool->next, struct dbc_request, list_pool); - len = kfifo_out(&port->port.xmit_fifo, req->buf, DBC_MAX_PACKET); + len = dbc_kfifo_to_req(port, req->buf); if (len == 0) break; do_tty_wake = true; @@ -200,14 +223,32 @@ static ssize_t dbc_tty_write(struct tty_struct *tty, const u8 *buf, { struct dbc_port *port = tty->driver_data; unsigned long flags; + unsigned int written = 0; spin_lock_irqsave(&port->port_lock, flags); - if (count) - count = kfifo_in(&port->port.xmit_fifo, buf, count); - dbc_start_tx(port); + + /* + * Treat tty write as one usb transfer. Make sure the writes are turned + * into TRB request having the same size boundaries as the tty writes. + * Don't add data to kfifo before previous write is turned into TRBs + */ + if (port->tx_boundary) { + spin_unlock_irqrestore(&port->port_lock, flags); + return 0; + } + + if (count) { + written = kfifo_in(&port->port.xmit_fifo, buf, count); + + if (written == count) + port->tx_boundary = kfifo_len(&port->port.xmit_fifo); + + dbc_start_tx(port); + } + spin_unlock_irqrestore(&port->port_lock, flags); - return count; + return written; } static int dbc_tty_put_char(struct tty_struct *tty, u8 ch) @@ -241,6 +282,10 @@ static unsigned int dbc_tty_write_room(struct tty_struct *tty) spin_lock_irqsave(&port->port_lock, flags); room = kfifo_avail(&port->port.xmit_fifo); + + if (port->tx_boundary) + room = 0; + spin_unlock_irqrestore(&port->port_lock, flags); return room; From 3af6c72854138d241e8a80560b444dd6752481b1 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 13 Dec 2023 16:42:32 +0000 Subject: [PATCH 102/207] usb: gadget: f_uac2: Replace snprintf() with the safer scnprintf() variant [ Upstream commit 60034e0aedf507888c4a880f57011bb7f5d7700c ] There is a general misunderstanding amongst engineers that {v}snprintf() returns the length of the data *actually* encoded into the destination array. However, as per the C99 standard {v}snprintf() really returns the length of the data that *would have been* written if there were enough space for it. This misunderstanding has led to buffer-overruns in the past. It's generally considered safer to use the {v}scnprintf() variants in their place (or even sprintf() in simple cases). So let's do that. Link: https://lwn.net/Articles/69419/ Link: https://github.com/KSPP/linux/issues/105 Cc: James Gruber Cc: Yadwinder Singh Cc: Jaswinder Singh Cc: Ruslan Bilovol Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20231213164246.1021885-4-lee@kernel.org Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 9499327714de ("usb: gadget: f_uac2: fix return value for UAC2_ATTRIBUTE_STRING store") Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/f_uac2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 0219cd79493a..55a4f07bc9cc 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -2042,7 +2042,7 @@ static ssize_t f_uac2_opts_##name##_show(struct config_item *item, \ int result; \ \ mutex_lock(&opts->lock); \ - result = snprintf(page, sizeof(opts->name), "%s", opts->name); \ + result = scnprintf(page, sizeof(opts->name), "%s", opts->name); \ mutex_unlock(&opts->lock); \ \ return result; \ @@ -2060,7 +2060,7 @@ static ssize_t f_uac2_opts_##name##_store(struct config_item *item, \ goto end; \ } \ \ - ret = snprintf(opts->name, min(sizeof(opts->name), len), \ + ret = scnprintf(opts->name, min(sizeof(opts->name), len), \ "%s", page); \ \ end: \ @@ -2178,7 +2178,7 @@ static struct usb_function_instance *afunc_alloc_inst(void) opts->req_number = UAC2_DEF_REQ_NUM; opts->fb_max = FBACK_FAST_MAX; - snprintf(opts->function_name, sizeof(opts->function_name), "Source/Sink"); + scnprintf(opts->function_name, sizeof(opts->function_name), "Source/Sink"); return &opts->func_inst; } From 0a2794e244d8704e876a5a34469e82e78cd5fc59 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Mon, 8 Jul 2024 15:25:53 +0100 Subject: [PATCH 103/207] usb: gadget: f_uac2: fix non-newline-terminated function name [ Upstream commit e60284b63245b84c3ae352427ed5ff8b79266b91 ] Most writes to configfs handle an optional newline, but do not require it. By using the number of bytes written as the limit for scnprintf() it is guaranteed that the final character in the buffer will be overwritten. This is expected if it is a newline but is undesirable when a string is written "as-is" (as libusbgx does, for example). Update the store function to strip an optional newline, matching the behaviour of usb_string_copy(). Signed-off-by: John Keeping Link: https://lore.kernel.org/r/20240708142553.3995022-1-jkeeping@inmusicbrands.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 9499327714de ("usb: gadget: f_uac2: fix return value for UAC2_ATTRIBUTE_STRING store") Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/f_uac2.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 55a4f07bc9cc..79d1f87c6cc5 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -2060,7 +2060,10 @@ static ssize_t f_uac2_opts_##name##_store(struct config_item *item, \ goto end; \ } \ \ - ret = scnprintf(opts->name, min(sizeof(opts->name), len), \ + if (len && page[len - 1] == '\n') \ + len--; \ + \ + ret = scnprintf(opts->name, min(sizeof(opts->name), len + 1), \ "%s", page); \ \ end: \ From ecc8a9502b3c878ddf5479d796c5677767452034 Mon Sep 17 00:00:00 2001 From: Kevin Groeneveld Date: Sun, 6 Oct 2024 19:26:31 -0400 Subject: [PATCH 104/207] usb: gadget: f_uac2: fix return value for UAC2_ATTRIBUTE_STRING store [ Upstream commit 9499327714de7bc5cf6c792112c1474932d8ad31 ] The configfs store callback should return the number of bytes consumed not the total number of bytes we actually stored. These could differ if for example the passed in string had a newline we did not store. If the returned value does not match the number of bytes written the writer might assume a failure or keep trying to write the remaining bytes. For example the following command will hang trying to write the final newline over and over again (tested on bash 2.05b): echo foo > function_name Fixes: 993a44fa85c1 ("usb: gadget: f_uac2: allow changing interface name via configfs") Cc: stable Signed-off-by: Kevin Groeneveld Link: https://lore.kernel.org/r/20241006232637.4267-1-kgroeneveld@lenbrook.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/f_uac2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 79d1f87c6cc5..b3dc5f5164f4 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -2052,7 +2052,7 @@ static ssize_t f_uac2_opts_##name##_store(struct config_item *item, \ const char *page, size_t len) \ { \ struct f_uac2_opts *opts = to_f_uac2_opts(item); \ - int ret = 0; \ + int ret = len; \ \ mutex_lock(&opts->lock); \ if (opts->refcnt) { \ @@ -2063,8 +2063,8 @@ static ssize_t f_uac2_opts_##name##_store(struct config_item *item, \ if (len && page[len - 1] == '\n') \ len--; \ \ - ret = scnprintf(opts->name, min(sizeof(opts->name), len + 1), \ - "%s", page); \ + scnprintf(opts->name, min(sizeof(opts->name), len + 1), \ + "%s", page); \ \ end: \ mutex_unlock(&opts->lock); \ From 66e56efc45cb6b733cf968d20822c0cd59465bca Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 24 Jan 2024 10:25:23 -0500 Subject: [PATCH 105/207] XHCI: Separate PORT and CAPs macros into dedicated file [ Upstream commit c35ba0ac48355df1d11fcce85945f76c42d250ac ] Split the PORT and CAPs macro definitions into a separate file to facilitate sharing with other files without the need to include the entire xhci.h. Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240124152525.3910311-2-Frank.Li@nxp.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 705e3ce37bcc ("usb: dwc3: core: Fix system suspend on TI AM62 platforms") Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-caps.h | 85 ++++++++++++ drivers/usb/host/xhci-port.h | 176 +++++++++++++++++++++++ drivers/usb/host/xhci.h | 262 +---------------------------------- 3 files changed, 264 insertions(+), 259 deletions(-) create mode 100644 drivers/usb/host/xhci-caps.h create mode 100644 drivers/usb/host/xhci-port.h diff --git a/drivers/usb/host/xhci-caps.h b/drivers/usb/host/xhci-caps.h new file mode 100644 index 000000000000..9e94cebf4a56 --- /dev/null +++ b/drivers/usb/host/xhci-caps.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* hc_capbase bitmasks */ +/* bits 7:0 - how long is the Capabilities register */ +#define HC_LENGTH(p) XHCI_HC_LENGTH(p) +/* bits 31:16 */ +#define HC_VERSION(p) (((p) >> 16) & 0xffff) + +/* HCSPARAMS1 - hcs_params1 - bitmasks */ +/* bits 0:7, Max Device Slots */ +#define HCS_MAX_SLOTS(p) (((p) >> 0) & 0xff) +#define HCS_SLOTS_MASK 0xff +/* bits 8:18, Max Interrupters */ +#define HCS_MAX_INTRS(p) (((p) >> 8) & 0x7ff) +/* bits 24:31, Max Ports - max value is 0x7F = 127 ports */ +#define HCS_MAX_PORTS(p) (((p) >> 24) & 0x7f) + +/* HCSPARAMS2 - hcs_params2 - bitmasks */ +/* bits 0:3, frames or uframes that SW needs to queue transactions + * ahead of the HW to meet periodic deadlines */ +#define HCS_IST(p) (((p) >> 0) & 0xf) +/* bits 4:7, max number of Event Ring segments */ +#define HCS_ERST_MAX(p) (((p) >> 4) & 0xf) +/* bits 21:25 Hi 5 bits of Scratchpad buffers SW must allocate for the HW */ +/* bit 26 Scratchpad restore - for save/restore HW state - not used yet */ +/* bits 27:31 Lo 5 bits of Scratchpad buffers SW must allocate for the HW */ +#define HCS_MAX_SCRATCHPAD(p) ((((p) >> 16) & 0x3e0) | (((p) >> 27) & 0x1f)) + +/* HCSPARAMS3 - hcs_params3 - bitmasks */ +/* bits 0:7, Max U1 to U0 latency for the roothub ports */ +#define HCS_U1_LATENCY(p) (((p) >> 0) & 0xff) +/* bits 16:31, Max U2 to U0 latency for the roothub ports */ +#define HCS_U2_LATENCY(p) (((p) >> 16) & 0xffff) + +/* HCCPARAMS - hcc_params - bitmasks */ +/* true: HC can use 64-bit address pointers */ +#define HCC_64BIT_ADDR(p) ((p) & (1 << 0)) +/* true: HC can do bandwidth negotiation */ +#define HCC_BANDWIDTH_NEG(p) ((p) & (1 << 1)) +/* true: HC uses 64-byte Device Context structures + * FIXME 64-byte context structures aren't supported yet. + */ +#define HCC_64BYTE_CONTEXT(p) ((p) & (1 << 2)) +/* true: HC has port power switches */ +#define HCC_PPC(p) ((p) & (1 << 3)) +/* true: HC has port indicators */ +#define HCS_INDICATOR(p) ((p) & (1 << 4)) +/* true: HC has Light HC Reset Capability */ +#define HCC_LIGHT_RESET(p) ((p) & (1 << 5)) +/* true: HC supports latency tolerance messaging */ +#define HCC_LTC(p) ((p) & (1 << 6)) +/* true: no secondary Stream ID Support */ +#define HCC_NSS(p) ((p) & (1 << 7)) +/* true: HC supports Stopped - Short Packet */ +#define HCC_SPC(p) ((p) & (1 << 9)) +/* true: HC has Contiguous Frame ID Capability */ +#define HCC_CFC(p) ((p) & (1 << 11)) +/* Max size for Primary Stream Arrays - 2^(n+1), where n is bits 12:15 */ +#define HCC_MAX_PSA(p) (1 << ((((p) >> 12) & 0xf) + 1)) +/* Extended Capabilities pointer from PCI base - section 5.3.6 */ +#define HCC_EXT_CAPS(p) XHCI_HCC_EXT_CAPS(p) + +#define CTX_SIZE(_hcc) (HCC_64BYTE_CONTEXT(_hcc) ? 64 : 32) + +/* db_off bitmask - bits 0:1 reserved */ +#define DBOFF_MASK (~0x3) + +/* run_regs_off bitmask - bits 0:4 reserved */ +#define RTSOFF_MASK (~0x1f) + +/* HCCPARAMS2 - hcc_params2 - bitmasks */ +/* true: HC supports U3 entry Capability */ +#define HCC2_U3C(p) ((p) & (1 << 0)) +/* true: HC supports Configure endpoint command Max exit latency too large */ +#define HCC2_CMC(p) ((p) & (1 << 1)) +/* true: HC supports Force Save context Capability */ +#define HCC2_FSC(p) ((p) & (1 << 2)) +/* true: HC supports Compliance Transition Capability */ +#define HCC2_CTC(p) ((p) & (1 << 3)) +/* true: HC support Large ESIT payload Capability > 48k */ +#define HCC2_LEC(p) ((p) & (1 << 4)) +/* true: HC support Configuration Information Capability */ +#define HCC2_CIC(p) ((p) & (1 << 5)) +/* true: HC support Extended TBC Capability, Isoc burst count > 65535 */ +#define HCC2_ETC(p) ((p) & (1 << 6)) diff --git a/drivers/usb/host/xhci-port.h b/drivers/usb/host/xhci-port.h new file mode 100644 index 000000000000..f19efb966d18 --- /dev/null +++ b/drivers/usb/host/xhci-port.h @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* PORTSC - Port Status and Control Register - port_status_base bitmasks */ +/* true: device connected */ +#define PORT_CONNECT (1 << 0) +/* true: port enabled */ +#define PORT_PE (1 << 1) +/* bit 2 reserved and zeroed */ +/* true: port has an over-current condition */ +#define PORT_OC (1 << 3) +/* true: port reset signaling asserted */ +#define PORT_RESET (1 << 4) +/* Port Link State - bits 5:8 + * A read gives the current link PM state of the port, + * a write with Link State Write Strobe set sets the link state. + */ +#define PORT_PLS_MASK (0xf << 5) +#define XDEV_U0 (0x0 << 5) +#define XDEV_U1 (0x1 << 5) +#define XDEV_U2 (0x2 << 5) +#define XDEV_U3 (0x3 << 5) +#define XDEV_DISABLED (0x4 << 5) +#define XDEV_RXDETECT (0x5 << 5) +#define XDEV_INACTIVE (0x6 << 5) +#define XDEV_POLLING (0x7 << 5) +#define XDEV_RECOVERY (0x8 << 5) +#define XDEV_HOT_RESET (0x9 << 5) +#define XDEV_COMP_MODE (0xa << 5) +#define XDEV_TEST_MODE (0xb << 5) +#define XDEV_RESUME (0xf << 5) + +/* true: port has power (see HCC_PPC) */ +#define PORT_POWER (1 << 9) +/* bits 10:13 indicate device speed: + * 0 - undefined speed - port hasn't be initialized by a reset yet + * 1 - full speed + * 2 - low speed + * 3 - high speed + * 4 - super speed + * 5-15 reserved + */ +#define DEV_SPEED_MASK (0xf << 10) +#define XDEV_FS (0x1 << 10) +#define XDEV_LS (0x2 << 10) +#define XDEV_HS (0x3 << 10) +#define XDEV_SS (0x4 << 10) +#define XDEV_SSP (0x5 << 10) +#define DEV_UNDEFSPEED(p) (((p) & DEV_SPEED_MASK) == (0x0<<10)) +#define DEV_FULLSPEED(p) (((p) & DEV_SPEED_MASK) == XDEV_FS) +#define DEV_LOWSPEED(p) (((p) & DEV_SPEED_MASK) == XDEV_LS) +#define DEV_HIGHSPEED(p) (((p) & DEV_SPEED_MASK) == XDEV_HS) +#define DEV_SUPERSPEED(p) (((p) & DEV_SPEED_MASK) == XDEV_SS) +#define DEV_SUPERSPEEDPLUS(p) (((p) & DEV_SPEED_MASK) == XDEV_SSP) +#define DEV_SUPERSPEED_ANY(p) (((p) & DEV_SPEED_MASK) >= XDEV_SS) +#define DEV_PORT_SPEED(p) (((p) >> 10) & 0x0f) + +/* Bits 20:23 in the Slot Context are the speed for the device */ +#define SLOT_SPEED_FS (XDEV_FS << 10) +#define SLOT_SPEED_LS (XDEV_LS << 10) +#define SLOT_SPEED_HS (XDEV_HS << 10) +#define SLOT_SPEED_SS (XDEV_SS << 10) +#define SLOT_SPEED_SSP (XDEV_SSP << 10) +/* Port Indicator Control */ +#define PORT_LED_OFF (0 << 14) +#define PORT_LED_AMBER (1 << 14) +#define PORT_LED_GREEN (2 << 14) +#define PORT_LED_MASK (3 << 14) +/* Port Link State Write Strobe - set this when changing link state */ +#define PORT_LINK_STROBE (1 << 16) +/* true: connect status change */ +#define PORT_CSC (1 << 17) +/* true: port enable change */ +#define PORT_PEC (1 << 18) +/* true: warm reset for a USB 3.0 device is done. A "hot" reset puts the port + * into an enabled state, and the device into the default state. A "warm" reset + * also resets the link, forcing the device through the link training sequence. + * SW can also look at the Port Reset register to see when warm reset is done. + */ +#define PORT_WRC (1 << 19) +/* true: over-current change */ +#define PORT_OCC (1 << 20) +/* true: reset change - 1 to 0 transition of PORT_RESET */ +#define PORT_RC (1 << 21) +/* port link status change - set on some port link state transitions: + * Transition Reason + * ------------------------------------------------------------------------------ + * - U3 to Resume Wakeup signaling from a device + * - Resume to Recovery to U0 USB 3.0 device resume + * - Resume to U0 USB 2.0 device resume + * - U3 to Recovery to U0 Software resume of USB 3.0 device complete + * - U3 to U0 Software resume of USB 2.0 device complete + * - U2 to U0 L1 resume of USB 2.1 device complete + * - U0 to U0 (???) L1 entry rejection by USB 2.1 device + * - U0 to disabled L1 entry error with USB 2.1 device + * - Any state to inactive Error on USB 3.0 port + */ +#define PORT_PLC (1 << 22) +/* port configure error change - port failed to configure its link partner */ +#define PORT_CEC (1 << 23) +#define PORT_CHANGE_MASK (PORT_CSC | PORT_PEC | PORT_WRC | PORT_OCC | \ + PORT_RC | PORT_PLC | PORT_CEC) + + +/* Cold Attach Status - xHC can set this bit to report device attached during + * Sx state. Warm port reset should be perfomed to clear this bit and move port + * to connected state. + */ +#define PORT_CAS (1 << 24) +/* wake on connect (enable) */ +#define PORT_WKCONN_E (1 << 25) +/* wake on disconnect (enable) */ +#define PORT_WKDISC_E (1 << 26) +/* wake on over-current (enable) */ +#define PORT_WKOC_E (1 << 27) +/* bits 28:29 reserved */ +/* true: device is non-removable - for USB 3.0 roothub emulation */ +#define PORT_DEV_REMOVE (1 << 30) +/* Initiate a warm port reset - complete when PORT_WRC is '1' */ +#define PORT_WR (1 << 31) + +/* We mark duplicate entries with -1 */ +#define DUPLICATE_ENTRY ((u8)(-1)) + +/* Port Power Management Status and Control - port_power_base bitmasks */ +/* Inactivity timer value for transitions into U1, in microseconds. + * Timeout can be up to 127us. 0xFF means an infinite timeout. + */ +#define PORT_U1_TIMEOUT(p) ((p) & 0xff) +#define PORT_U1_TIMEOUT_MASK 0xff +/* Inactivity timer value for transitions into U2 */ +#define PORT_U2_TIMEOUT(p) (((p) & 0xff) << 8) +#define PORT_U2_TIMEOUT_MASK (0xff << 8) +/* Bits 24:31 for port testing */ + +/* USB2 Protocol PORTSPMSC */ +#define PORT_L1S_MASK 7 +#define PORT_L1S_SUCCESS 1 +#define PORT_RWE (1 << 3) +#define PORT_HIRD(p) (((p) & 0xf) << 4) +#define PORT_HIRD_MASK (0xf << 4) +#define PORT_L1DS_MASK (0xff << 8) +#define PORT_L1DS(p) (((p) & 0xff) << 8) +#define PORT_HLE (1 << 16) +#define PORT_TEST_MODE_SHIFT 28 + +/* USB3 Protocol PORTLI Port Link Information */ +#define PORT_RX_LANES(p) (((p) >> 16) & 0xf) +#define PORT_TX_LANES(p) (((p) >> 20) & 0xf) + +/* USB2 Protocol PORTHLPMC */ +#define PORT_HIRDM(p)((p) & 3) +#define PORT_L1_TIMEOUT(p)(((p) & 0xff) << 2) +#define PORT_BESLD(p)(((p) & 0xf) << 10) + +/* use 512 microseconds as USB2 LPM L1 default timeout. */ +#define XHCI_L1_TIMEOUT 512 + +/* Set default HIRD/BESL value to 4 (350/400us) for USB2 L1 LPM resume latency. + * Safe to use with mixed HIRD and BESL systems (host and device) and is used + * by other operating systems. + * + * XHCI 1.0 errata 8/14/12 Table 13 notes: + * "Software should choose xHC BESL/BESLD field values that do not violate a + * device's resume latency requirements, + * e.g. not program values > '4' if BLC = '1' and a HIRD device is attached, + * or not program values < '4' if BLC = '0' and a BESL device is attached. + */ +#define XHCI_DEFAULT_BESL 4 + +/* + * USB3 specification define a 360ms tPollingLFPSTiemout for USB3 ports + * to complete link training. usually link trainig completes much faster + * so check status 10 times with 36ms sleep in places we need to wait for + * polling to complete. + */ +#define XHCI_PORT_POLLING_LFPS_TIME 36 diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 7754ed55d220..f2190d121233 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -23,6 +23,9 @@ #include "xhci-ext-caps.h" #include "pci-quirks.h" +#include "xhci-port.h" +#include "xhci-caps.h" + /* max buffer size for trace and debug messages */ #define XHCI_MSG_MAX 500 @@ -63,90 +66,6 @@ struct xhci_cap_regs { /* Reserved up to (CAPLENGTH - 0x1C) */ }; -/* hc_capbase bitmasks */ -/* bits 7:0 - how long is the Capabilities register */ -#define HC_LENGTH(p) XHCI_HC_LENGTH(p) -/* bits 31:16 */ -#define HC_VERSION(p) (((p) >> 16) & 0xffff) - -/* HCSPARAMS1 - hcs_params1 - bitmasks */ -/* bits 0:7, Max Device Slots */ -#define HCS_MAX_SLOTS(p) (((p) >> 0) & 0xff) -#define HCS_SLOTS_MASK 0xff -/* bits 8:18, Max Interrupters */ -#define HCS_MAX_INTRS(p) (((p) >> 8) & 0x7ff) -/* bits 24:31, Max Ports - max value is 0x7F = 127 ports */ -#define HCS_MAX_PORTS(p) (((p) >> 24) & 0x7f) - -/* HCSPARAMS2 - hcs_params2 - bitmasks */ -/* bits 0:3, frames or uframes that SW needs to queue transactions - * ahead of the HW to meet periodic deadlines */ -#define HCS_IST(p) (((p) >> 0) & 0xf) -/* bits 4:7, max number of Event Ring segments */ -#define HCS_ERST_MAX(p) (((p) >> 4) & 0xf) -/* bits 21:25 Hi 5 bits of Scratchpad buffers SW must allocate for the HW */ -/* bit 26 Scratchpad restore - for save/restore HW state - not used yet */ -/* bits 27:31 Lo 5 bits of Scratchpad buffers SW must allocate for the HW */ -#define HCS_MAX_SCRATCHPAD(p) ((((p) >> 16) & 0x3e0) | (((p) >> 27) & 0x1f)) - -/* HCSPARAMS3 - hcs_params3 - bitmasks */ -/* bits 0:7, Max U1 to U0 latency for the roothub ports */ -#define HCS_U1_LATENCY(p) (((p) >> 0) & 0xff) -/* bits 16:31, Max U2 to U0 latency for the roothub ports */ -#define HCS_U2_LATENCY(p) (((p) >> 16) & 0xffff) - -/* HCCPARAMS - hcc_params - bitmasks */ -/* true: HC can use 64-bit address pointers */ -#define HCC_64BIT_ADDR(p) ((p) & (1 << 0)) -/* true: HC can do bandwidth negotiation */ -#define HCC_BANDWIDTH_NEG(p) ((p) & (1 << 1)) -/* true: HC uses 64-byte Device Context structures - * FIXME 64-byte context structures aren't supported yet. - */ -#define HCC_64BYTE_CONTEXT(p) ((p) & (1 << 2)) -/* true: HC has port power switches */ -#define HCC_PPC(p) ((p) & (1 << 3)) -/* true: HC has port indicators */ -#define HCS_INDICATOR(p) ((p) & (1 << 4)) -/* true: HC has Light HC Reset Capability */ -#define HCC_LIGHT_RESET(p) ((p) & (1 << 5)) -/* true: HC supports latency tolerance messaging */ -#define HCC_LTC(p) ((p) & (1 << 6)) -/* true: no secondary Stream ID Support */ -#define HCC_NSS(p) ((p) & (1 << 7)) -/* true: HC supports Stopped - Short Packet */ -#define HCC_SPC(p) ((p) & (1 << 9)) -/* true: HC has Contiguous Frame ID Capability */ -#define HCC_CFC(p) ((p) & (1 << 11)) -/* Max size for Primary Stream Arrays - 2^(n+1), where n is bits 12:15 */ -#define HCC_MAX_PSA(p) (1 << ((((p) >> 12) & 0xf) + 1)) -/* Extended Capabilities pointer from PCI base - section 5.3.6 */ -#define HCC_EXT_CAPS(p) XHCI_HCC_EXT_CAPS(p) - -#define CTX_SIZE(_hcc) (HCC_64BYTE_CONTEXT(_hcc) ? 64 : 32) - -/* db_off bitmask - bits 0:1 reserved */ -#define DBOFF_MASK (~0x3) - -/* run_regs_off bitmask - bits 0:4 reserved */ -#define RTSOFF_MASK (~0x1f) - -/* HCCPARAMS2 - hcc_params2 - bitmasks */ -/* true: HC supports U3 entry Capability */ -#define HCC2_U3C(p) ((p) & (1 << 0)) -/* true: HC supports Configure endpoint command Max exit latency too large */ -#define HCC2_CMC(p) ((p) & (1 << 1)) -/* true: HC supports Force Save context Capability */ -#define HCC2_FSC(p) ((p) & (1 << 2)) -/* true: HC supports Compliance Transition Capability */ -#define HCC2_CTC(p) ((p) & (1 << 3)) -/* true: HC support Large ESIT payload Capability > 48k */ -#define HCC2_LEC(p) ((p) & (1 << 4)) -/* true: HC support Configuration Information Capability */ -#define HCC2_CIC(p) ((p) & (1 << 5)) -/* true: HC support Extended TBC Capability, Isoc burst count > 65535 */ -#define HCC2_ETC(p) ((p) & (1 << 6)) - /* Number of registers per port */ #define NUM_PORT_REGS 4 @@ -292,181 +211,6 @@ struct xhci_op_regs { #define CONFIG_CIE (1 << 9) /* bits 10:31 - reserved and should be preserved */ -/* PORTSC - Port Status and Control Register - port_status_base bitmasks */ -/* true: device connected */ -#define PORT_CONNECT (1 << 0) -/* true: port enabled */ -#define PORT_PE (1 << 1) -/* bit 2 reserved and zeroed */ -/* true: port has an over-current condition */ -#define PORT_OC (1 << 3) -/* true: port reset signaling asserted */ -#define PORT_RESET (1 << 4) -/* Port Link State - bits 5:8 - * A read gives the current link PM state of the port, - * a write with Link State Write Strobe set sets the link state. - */ -#define PORT_PLS_MASK (0xf << 5) -#define XDEV_U0 (0x0 << 5) -#define XDEV_U1 (0x1 << 5) -#define XDEV_U2 (0x2 << 5) -#define XDEV_U3 (0x3 << 5) -#define XDEV_DISABLED (0x4 << 5) -#define XDEV_RXDETECT (0x5 << 5) -#define XDEV_INACTIVE (0x6 << 5) -#define XDEV_POLLING (0x7 << 5) -#define XDEV_RECOVERY (0x8 << 5) -#define XDEV_HOT_RESET (0x9 << 5) -#define XDEV_COMP_MODE (0xa << 5) -#define XDEV_TEST_MODE (0xb << 5) -#define XDEV_RESUME (0xf << 5) - -/* true: port has power (see HCC_PPC) */ -#define PORT_POWER (1 << 9) -/* bits 10:13 indicate device speed: - * 0 - undefined speed - port hasn't be initialized by a reset yet - * 1 - full speed - * 2 - low speed - * 3 - high speed - * 4 - super speed - * 5-15 reserved - */ -#define DEV_SPEED_MASK (0xf << 10) -#define XDEV_FS (0x1 << 10) -#define XDEV_LS (0x2 << 10) -#define XDEV_HS (0x3 << 10) -#define XDEV_SS (0x4 << 10) -#define XDEV_SSP (0x5 << 10) -#define DEV_UNDEFSPEED(p) (((p) & DEV_SPEED_MASK) == (0x0<<10)) -#define DEV_FULLSPEED(p) (((p) & DEV_SPEED_MASK) == XDEV_FS) -#define DEV_LOWSPEED(p) (((p) & DEV_SPEED_MASK) == XDEV_LS) -#define DEV_HIGHSPEED(p) (((p) & DEV_SPEED_MASK) == XDEV_HS) -#define DEV_SUPERSPEED(p) (((p) & DEV_SPEED_MASK) == XDEV_SS) -#define DEV_SUPERSPEEDPLUS(p) (((p) & DEV_SPEED_MASK) == XDEV_SSP) -#define DEV_SUPERSPEED_ANY(p) (((p) & DEV_SPEED_MASK) >= XDEV_SS) -#define DEV_PORT_SPEED(p) (((p) >> 10) & 0x0f) - -/* Bits 20:23 in the Slot Context are the speed for the device */ -#define SLOT_SPEED_FS (XDEV_FS << 10) -#define SLOT_SPEED_LS (XDEV_LS << 10) -#define SLOT_SPEED_HS (XDEV_HS << 10) -#define SLOT_SPEED_SS (XDEV_SS << 10) -#define SLOT_SPEED_SSP (XDEV_SSP << 10) -/* Port Indicator Control */ -#define PORT_LED_OFF (0 << 14) -#define PORT_LED_AMBER (1 << 14) -#define PORT_LED_GREEN (2 << 14) -#define PORT_LED_MASK (3 << 14) -/* Port Link State Write Strobe - set this when changing link state */ -#define PORT_LINK_STROBE (1 << 16) -/* true: connect status change */ -#define PORT_CSC (1 << 17) -/* true: port enable change */ -#define PORT_PEC (1 << 18) -/* true: warm reset for a USB 3.0 device is done. A "hot" reset puts the port - * into an enabled state, and the device into the default state. A "warm" reset - * also resets the link, forcing the device through the link training sequence. - * SW can also look at the Port Reset register to see when warm reset is done. - */ -#define PORT_WRC (1 << 19) -/* true: over-current change */ -#define PORT_OCC (1 << 20) -/* true: reset change - 1 to 0 transition of PORT_RESET */ -#define PORT_RC (1 << 21) -/* port link status change - set on some port link state transitions: - * Transition Reason - * ------------------------------------------------------------------------------ - * - U3 to Resume Wakeup signaling from a device - * - Resume to Recovery to U0 USB 3.0 device resume - * - Resume to U0 USB 2.0 device resume - * - U3 to Recovery to U0 Software resume of USB 3.0 device complete - * - U3 to U0 Software resume of USB 2.0 device complete - * - U2 to U0 L1 resume of USB 2.1 device complete - * - U0 to U0 (???) L1 entry rejection by USB 2.1 device - * - U0 to disabled L1 entry error with USB 2.1 device - * - Any state to inactive Error on USB 3.0 port - */ -#define PORT_PLC (1 << 22) -/* port configure error change - port failed to configure its link partner */ -#define PORT_CEC (1 << 23) -#define PORT_CHANGE_MASK (PORT_CSC | PORT_PEC | PORT_WRC | PORT_OCC | \ - PORT_RC | PORT_PLC | PORT_CEC) - - -/* Cold Attach Status - xHC can set this bit to report device attached during - * Sx state. Warm port reset should be perfomed to clear this bit and move port - * to connected state. - */ -#define PORT_CAS (1 << 24) -/* wake on connect (enable) */ -#define PORT_WKCONN_E (1 << 25) -/* wake on disconnect (enable) */ -#define PORT_WKDISC_E (1 << 26) -/* wake on over-current (enable) */ -#define PORT_WKOC_E (1 << 27) -/* bits 28:29 reserved */ -/* true: device is non-removable - for USB 3.0 roothub emulation */ -#define PORT_DEV_REMOVE (1 << 30) -/* Initiate a warm port reset - complete when PORT_WRC is '1' */ -#define PORT_WR (1 << 31) - -/* We mark duplicate entries with -1 */ -#define DUPLICATE_ENTRY ((u8)(-1)) - -/* Port Power Management Status and Control - port_power_base bitmasks */ -/* Inactivity timer value for transitions into U1, in microseconds. - * Timeout can be up to 127us. 0xFF means an infinite timeout. - */ -#define PORT_U1_TIMEOUT(p) ((p) & 0xff) -#define PORT_U1_TIMEOUT_MASK 0xff -/* Inactivity timer value for transitions into U2 */ -#define PORT_U2_TIMEOUT(p) (((p) & 0xff) << 8) -#define PORT_U2_TIMEOUT_MASK (0xff << 8) -/* Bits 24:31 for port testing */ - -/* USB2 Protocol PORTSPMSC */ -#define PORT_L1S_MASK 7 -#define PORT_L1S_SUCCESS 1 -#define PORT_RWE (1 << 3) -#define PORT_HIRD(p) (((p) & 0xf) << 4) -#define PORT_HIRD_MASK (0xf << 4) -#define PORT_L1DS_MASK (0xff << 8) -#define PORT_L1DS(p) (((p) & 0xff) << 8) -#define PORT_HLE (1 << 16) -#define PORT_TEST_MODE_SHIFT 28 - -/* USB3 Protocol PORTLI Port Link Information */ -#define PORT_RX_LANES(p) (((p) >> 16) & 0xf) -#define PORT_TX_LANES(p) (((p) >> 20) & 0xf) - -/* USB2 Protocol PORTHLPMC */ -#define PORT_HIRDM(p)((p) & 3) -#define PORT_L1_TIMEOUT(p)(((p) & 0xff) << 2) -#define PORT_BESLD(p)(((p) & 0xf) << 10) - -/* use 512 microseconds as USB2 LPM L1 default timeout. */ -#define XHCI_L1_TIMEOUT 512 - -/* Set default HIRD/BESL value to 4 (350/400us) for USB2 L1 LPM resume latency. - * Safe to use with mixed HIRD and BESL systems (host and device) and is used - * by other operating systems. - * - * XHCI 1.0 errata 8/14/12 Table 13 notes: - * "Software should choose xHC BESL/BESLD field values that do not violate a - * device's resume latency requirements, - * e.g. not program values > '4' if BLC = '1' and a HIRD device is attached, - * or not program values < '4' if BLC = '0' and a BESL device is attached. - */ -#define XHCI_DEFAULT_BESL 4 - -/* - * USB3 specification define a 360ms tPollingLFPSTiemout for USB3 ports - * to complete link training. usually link trainig completes much faster - * so check status 10 times with 36ms sleep in places we need to wait for - * polling to complete. - */ -#define XHCI_PORT_POLLING_LFPS_TIME 36 - /** * struct xhci_intr_reg - Interrupt Register Set * @irq_pending: IMAN - Interrupt Management Register. Used to enable From 4fad7370086797afe6471493e3a5f36add8c48a7 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 11 Oct 2024 13:53:24 +0300 Subject: [PATCH 106/207] usb: dwc3: core: Fix system suspend on TI AM62 platforms [ Upstream commit 705e3ce37bccdf2ed6f848356ff355f480d51a91 ] Since commit 6d735722063a ("usb: dwc3: core: Prevent phy suspend during init"), system suspend is broken on AM62 TI platforms. Before that commit, both DWC3_GUSB3PIPECTL_SUSPHY and DWC3_GUSB2PHYCFG_SUSPHY bits (hence forth called 2 SUSPHY bits) were being set during core initialization and even during core re-initialization after a system suspend/resume. These bits are required to be set for system suspend/resume to work correctly on AM62 platforms. Since that commit, the 2 SUSPHY bits are not set for DEVICE/OTG mode if gadget driver is not loaded and started. For Host mode, the 2 SUSPHY bits are set before the first system suspend but get cleared at system resume during core re-init and are never set again. This patch resovles these two issues by ensuring the 2 SUSPHY bits are set before system suspend and restored to the original state during system resume. Cc: stable@vger.kernel.org # v6.9+ Fixes: 6d735722063a ("usb: dwc3: core: Prevent phy suspend during init") Link: https://lore.kernel.org/all/1519dbe7-73b6-4afc-bfe3-23f4f75d772f@kernel.org/ Signed-off-by: Roger Quadros Acked-by: Thinh Nguyen Tested-by: Markus Schneider-Pargmann Reviewed-by: Dhruva Gole Link: https://lore.kernel.org/r/20241011-am62-lpm-usb-v3-1-562d445625b5@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/dwc3/core.c | 19 +++++++++++++++++++ drivers/usb/dwc3/core.h | 3 +++ 2 files changed, 22 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index af851e4e8c8a..8cbe19574bbc 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -2106,6 +2106,11 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) { u32 reg; + dwc->susphy_state = (dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)) & + DWC3_GUSB2PHYCFG_SUSPHY) || + (dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)) & + DWC3_GUSB3PIPECTL_SUSPHY); + switch (dwc->current_dr_role) { case DWC3_GCTL_PRTCAP_DEVICE: if (pm_runtime_suspended(dwc->dev)) @@ -2153,6 +2158,15 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) break; } + if (!PMSG_IS_AUTO(msg)) { + /* + * TI AM62 platform requires SUSPHY to be + * enabled for system suspend to work. + */ + if (!dwc->susphy_state) + dwc3_enable_susphy(dwc, true); + } + return 0; } @@ -2215,6 +2229,11 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) break; } + if (!PMSG_IS_AUTO(msg)) { + /* restore SUSPHY state to that before system suspend. */ + dwc3_enable_susphy(dwc, dwc->susphy_state); + } + return 0; } diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 420753205faf..3325796f3cb4 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1127,6 +1127,8 @@ struct dwc3_scratchpad_array { * @sys_wakeup: set if the device may do system wakeup. * @wakeup_configured: set if the device is configured for remote wakeup. * @suspended: set to track suspend event due to U3/L2. + * @susphy_state: state of DWC3_GUSB2PHYCFG_SUSPHY + DWC3_GUSB3PIPECTL_SUSPHY + * before PM suspend. * @imod_interval: set the interrupt moderation interval in 250ns * increments or 0 to disable. * @max_cfg_eps: current max number of IN eps used across all USB configs. @@ -1351,6 +1353,7 @@ struct dwc3 { unsigned sys_wakeup:1; unsigned wakeup_configured:1; unsigned suspended:1; + unsigned susphy_state:1; u16 imod_interval; From 5868fa7ebe664bdd3e84b1dda4820ddb02619e1f Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 4 Mar 2024 12:40:16 +0900 Subject: [PATCH 107/207] tracing/fprobe-event: cleanup: Fix a wrong comment in fprobe event [ Upstream commit 7e37b6bc3cc096e24709908076807bb9c3cf0d38 ] Despite the fprobe event, "Kretprobe" was commented. So fix it. Link: https://lore.kernel.org/all/170952361630.229804.10832200172327797860.stgit@devnote2/ Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Stable-dep-of: 373b9338c972 ("uprobe: avoid out-of-bounds memory access of fetching args") Signed-off-by: Sasha Levin --- kernel/trace/trace_fprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 7d2ddbcfa377..3ccef4d82235 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -210,7 +210,7 @@ fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, } NOKPROBE_SYMBOL(fentry_trace_func); -/* Kretprobe handler */ +/* function exit handler */ static nokprobe_inline void __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, unsigned long ret_ip, struct pt_regs *regs, From e96b42d22f66982b84e31b6935a59d74d9a2d848 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 4 Mar 2024 12:40:36 +0900 Subject: [PATCH 108/207] tracing/probes: cleanup: Set trace_probe::nr_args at trace_probe_init [ Upstream commit 035ba76014c096316fa809a46ce0a1b9af1cde0d ] Instead of incrementing the trace_probe::nr_args, init it at trace_probe_init(). Without this change, there is no way to get the number of trace_probe arguments while parsing it. This is a cleanup, so the behavior is not changed. Link: https://lore.kernel.org/all/170952363585.229804.13060759900346411951.stgit@devnote2/ Signed-off-by: Masami Hiramatsu (Google) Stable-dep-of: 373b9338c972 ("uprobe: avoid out-of-bounds memory access of fetching args") Signed-off-by: Sasha Levin --- kernel/trace/trace_eprobe.c | 2 +- kernel/trace/trace_fprobe.c | 2 +- kernel/trace/trace_kprobe.c | 2 +- kernel/trace/trace_probe.c | 10 ++++++---- kernel/trace/trace_probe.h | 2 +- kernel/trace/trace_uprobe.c | 2 +- 6 files changed, 11 insertions(+), 9 deletions(-) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 72714cbf475c..42b76f02e57a 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -220,7 +220,7 @@ static struct trace_eprobe *alloc_event_probe(const char *group, if (!ep->event_system) goto error; - ret = trace_probe_init(&ep->tp, this_event, group, false); + ret = trace_probe_init(&ep->tp, this_event, group, false, nargs); if (ret < 0) goto error; diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 3ccef4d82235..5109650b0d82 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -389,7 +389,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group, tf->tpoint = tpoint; tf->fp.nr_maxactive = maxactive; - ret = trace_probe_init(&tf->tp, event, group, false); + ret = trace_probe_init(&tf->tp, event, group, false, nargs); if (ret < 0) goto error; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 52f8b537dd0a..d1a7c876e419 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -290,7 +290,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group, INIT_HLIST_NODE(&tk->rp.kp.hlist); INIT_LIST_HEAD(&tk->rp.kp.list); - ret = trace_probe_init(&tk->tp, event, group, false); + ret = trace_probe_init(&tk->tp, event, group, false, nargs); if (ret < 0) goto error; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index ae162ba36a48..5d6c6c105f3c 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1383,9 +1383,6 @@ int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, const char *arg, struct probe_arg *parg = &tp->args[i]; const char *body; - /* Increment count for freeing args in error case */ - tp->nr_args++; - body = strchr(arg, '='); if (body) { if (body - arg > MAX_ARG_NAME_LEN) { @@ -1770,7 +1767,7 @@ void trace_probe_cleanup(struct trace_probe *tp) } int trace_probe_init(struct trace_probe *tp, const char *event, - const char *group, bool alloc_filter) + const char *group, bool alloc_filter, int nargs) { struct trace_event_call *call; size_t size = sizeof(struct trace_probe_event); @@ -1806,6 +1803,11 @@ int trace_probe_init(struct trace_probe *tp, const char *event, goto error; } + tp->nr_args = nargs; + /* Make sure pointers in args[] are NULL */ + if (nargs) + memset(tp->args, 0, sizeof(tp->args[0]) * nargs); + return 0; error: diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index c1877d018269..ed8d1052f8a7 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -338,7 +338,7 @@ static inline bool trace_probe_has_single_file(struct trace_probe *tp) } int trace_probe_init(struct trace_probe *tp, const char *event, - const char *group, bool alloc_filter); + const char *group, bool alloc_filter, int nargs); void trace_probe_cleanup(struct trace_probe *tp); int trace_probe_append(struct trace_probe *tp, struct trace_probe *to); void trace_probe_unlink(struct trace_probe *tp); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 99c051de412a..49d9af6d446e 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -337,7 +337,7 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) if (!tu) return ERR_PTR(-ENOMEM); - ret = trace_probe_init(&tu->tp, event, group, true); + ret = trace_probe_init(&tu->tp, event, group, true, nargs); if (ret < 0) goto error; From 91f62703146a4b6161ef12d4614164657898fc2c Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 4 Mar 2024 12:40:55 +0900 Subject: [PATCH 109/207] tracing/probes: Support $argN in return probe (kprobe and fprobe) [ Upstream commit 25f00e40ce7953db197af3a59233711d154c9d80 ] Support accessing $argN in the return probe events. This will help users to record entry data in function return (exit) event for simplfing the function entry/exit information in one event, and record the result values (e.g. allocated object/initialized object) at function exit. For example, if we have a function `int init_foo(struct foo *obj, int param)` sometimes we want to check how `obj` is initialized. In such case, we can define a new return event like below; # echo 'r init_foo retval=$retval param=$arg2 field1=+0($arg1)' >> kprobe_events Thus it records the function parameter `param` and its result `obj->field1` (the dereference will be done in the function exit timing) value at once. This also support fprobe, BTF args and'$arg*'. So if CONFIG_DEBUG_INFO_BTF is enabled, we can trace both function parameters and the return value by following command. # echo 'f target_function%return $arg* $retval' >> dynamic_events Link: https://lore.kernel.org/all/170952365552.229804.224112990211602895.stgit@devnote2/ Signed-off-by: Masami Hiramatsu (Google) Stable-dep-of: 373b9338c972 ("uprobe: avoid out-of-bounds memory access of fetching args") Signed-off-by: Sasha Levin --- kernel/trace/trace.c | 1 + kernel/trace/trace_eprobe.c | 6 +- kernel/trace/trace_fprobe.c | 55 ++++-- kernel/trace/trace_kprobe.c | 56 +++++- kernel/trace/trace_probe.c | 177 +++++++++++++++--- kernel/trace/trace_probe.h | 28 ++- kernel/trace/trace_probe_tmpl.h | 10 +- kernel/trace/trace_uprobe.c | 12 +- .../test.d/dynevent/fprobe_syntax_errors.tc | 4 + .../test.d/kprobe/kprobe_syntax_errors.tc | 2 + 10 files changed, 289 insertions(+), 62 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4f93d57cc029..ecd869ed2767 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5757,6 +5757,7 @@ static const char readme_msg[] = "\t $stack, $stack, $retval, $comm,\n" #endif "\t +|-[u](), \\imm-value, \\\"imm-string\"\n" + "\t kernel return probes support: $retval, $arg, $comm\n" "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n" "\t b@/, ustring,\n" "\t symstr, \\[\\]\n" diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 42b76f02e57a..b03bc30f85ee 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -390,8 +390,8 @@ static int get_eprobe_size(struct trace_probe *tp, void *rec) /* Note that we don't verify it, since the code does not come from user space */ static int -process_fetch_insn(struct fetch_insn *code, void *rec, void *dest, - void *base) +process_fetch_insn(struct fetch_insn *code, void *rec, void *edata, + void *dest, void *base) { unsigned long val; int ret; @@ -438,7 +438,7 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec) return; entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); - store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize); + store_trace_args(&entry[1], &edata->ep->tp, rec, NULL, sizeof(*entry), dsize); trace_event_buffer_commit(&fbuffer); } diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 5109650b0d82..4f4280815522 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -4,6 +4,7 @@ * Copyright (C) 2022 Google LLC. */ #define pr_fmt(fmt) "trace_fprobe: " fmt +#include #include #include @@ -129,8 +130,8 @@ static bool trace_fprobe_is_registered(struct trace_fprobe *tf) * from user space. */ static int -process_fetch_insn(struct fetch_insn *code, void *rec, void *dest, - void *base) +process_fetch_insn(struct fetch_insn *code, void *rec, void *edata, + void *dest, void *base) { struct pt_regs *regs = rec; unsigned long val; @@ -152,6 +153,9 @@ retry: case FETCH_OP_ARG: val = regs_get_kernel_argument(regs, code->param); break; + case FETCH_OP_EDATA: + val = *(unsigned long *)((unsigned long)edata + code->offset); + break; #endif case FETCH_NOP_SYMBOL: /* Ignore a place holder */ code++; @@ -184,7 +188,7 @@ __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, if (trace_trigger_soft_disabled(trace_file)) return; - dsize = __get_data_size(&tf->tp, regs); + dsize = __get_data_size(&tf->tp, regs, NULL); entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry) + tf->tp.size + dsize); @@ -194,7 +198,7 @@ __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, fbuffer.regs = regs; entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); entry->ip = entry_ip; - store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize); + store_trace_args(&entry[1], &tf->tp, regs, NULL, sizeof(*entry), dsize); trace_event_buffer_commit(&fbuffer); } @@ -211,10 +215,23 @@ fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, NOKPROBE_SYMBOL(fentry_trace_func); /* function exit handler */ +static int trace_fprobe_entry_handler(struct fprobe *fp, unsigned long entry_ip, + unsigned long ret_ip, struct pt_regs *regs, + void *entry_data) +{ + struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp); + + if (tf->tp.entry_arg) + store_trace_entry_data(entry_data, &tf->tp, regs); + + return 0; +} +NOKPROBE_SYMBOL(trace_fprobe_entry_handler) + static nokprobe_inline void __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, unsigned long ret_ip, struct pt_regs *regs, - struct trace_event_file *trace_file) + void *entry_data, struct trace_event_file *trace_file) { struct fexit_trace_entry_head *entry; struct trace_event_buffer fbuffer; @@ -227,7 +244,7 @@ __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, if (trace_trigger_soft_disabled(trace_file)) return; - dsize = __get_data_size(&tf->tp, regs); + dsize = __get_data_size(&tf->tp, regs, entry_data); entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry) + tf->tp.size + dsize); @@ -238,19 +255,19 @@ __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); entry->func = entry_ip; entry->ret_ip = ret_ip; - store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize); + store_trace_args(&entry[1], &tf->tp, regs, entry_data, sizeof(*entry), dsize); trace_event_buffer_commit(&fbuffer); } static void fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, - unsigned long ret_ip, struct pt_regs *regs) + unsigned long ret_ip, struct pt_regs *regs, void *entry_data) { struct event_file_link *link; trace_probe_for_each_link_rcu(link, &tf->tp) - __fexit_trace_func(tf, entry_ip, ret_ip, regs, link->file); + __fexit_trace_func(tf, entry_ip, ret_ip, regs, entry_data, link->file); } NOKPROBE_SYMBOL(fexit_trace_func); @@ -269,7 +286,7 @@ static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip, if (hlist_empty(head)) return 0; - dsize = __get_data_size(&tf->tp, regs); + dsize = __get_data_size(&tf->tp, regs, NULL); __size = sizeof(*entry) + tf->tp.size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); @@ -280,7 +297,7 @@ static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip, entry->ip = entry_ip; memset(&entry[1], 0, dsize); - store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize); + store_trace_args(&entry[1], &tf->tp, regs, NULL, sizeof(*entry), dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); return 0; @@ -289,7 +306,8 @@ NOKPROBE_SYMBOL(fentry_perf_func); static void fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip, - unsigned long ret_ip, struct pt_regs *regs) + unsigned long ret_ip, struct pt_regs *regs, + void *entry_data) { struct trace_event_call *call = trace_probe_event_call(&tf->tp); struct fexit_trace_entry_head *entry; @@ -301,7 +319,7 @@ fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip, if (hlist_empty(head)) return; - dsize = __get_data_size(&tf->tp, regs); + dsize = __get_data_size(&tf->tp, regs, entry_data); __size = sizeof(*entry) + tf->tp.size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); @@ -312,7 +330,7 @@ fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip, entry->func = entry_ip; entry->ret_ip = ret_ip; - store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize); + store_trace_args(&entry[1], &tf->tp, regs, entry_data, sizeof(*entry), dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); } @@ -343,10 +361,10 @@ static void fexit_dispatcher(struct fprobe *fp, unsigned long entry_ip, struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp); if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE)) - fexit_trace_func(tf, entry_ip, ret_ip, regs); + fexit_trace_func(tf, entry_ip, ret_ip, regs, entry_data); #ifdef CONFIG_PERF_EVENTS if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE)) - fexit_perf_func(tf, entry_ip, ret_ip, regs); + fexit_perf_func(tf, entry_ip, ret_ip, regs, entry_data); #endif } NOKPROBE_SYMBOL(fexit_dispatcher); @@ -1109,6 +1127,11 @@ static int __trace_fprobe_create(int argc, const char *argv[]) goto error; /* This can be -ENOMEM */ } + if (is_return && tf->tp.entry_arg) { + tf->fp.entry_handler = trace_fprobe_entry_handler; + tf->fp.entry_data_size = traceprobe_get_entry_data_size(&tf->tp); + } + ret = traceprobe_set_print_fmt(&tf->tp, is_return ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL); if (ret < 0) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d1a7c876e419..32c617123f37 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -740,6 +740,9 @@ static unsigned int number_of_same_symbols(char *func_name) return ctx.count; } +static int trace_kprobe_entry_handler(struct kretprobe_instance *ri, + struct pt_regs *regs); + static int __trace_kprobe_create(int argc, const char *argv[]) { /* @@ -948,6 +951,11 @@ static int __trace_kprobe_create(int argc, const char *argv[]) if (ret) goto error; /* This can be -ENOMEM */ } + /* entry handler for kretprobe */ + if (is_return && tk->tp.entry_arg) { + tk->rp.entry_handler = trace_kprobe_entry_handler; + tk->rp.data_size = traceprobe_get_entry_data_size(&tk->tp); + } ptype = is_return ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL; ret = traceprobe_set_print_fmt(&tk->tp, ptype); @@ -1303,8 +1311,8 @@ static const struct file_operations kprobe_profile_ops = { /* Note that we don't verify it, since the code does not come from user space */ static int -process_fetch_insn(struct fetch_insn *code, void *rec, void *dest, - void *base) +process_fetch_insn(struct fetch_insn *code, void *rec, void *edata, + void *dest, void *base) { struct pt_regs *regs = rec; unsigned long val; @@ -1329,6 +1337,9 @@ retry: case FETCH_OP_ARG: val = regs_get_kernel_argument(regs, code->param); break; + case FETCH_OP_EDATA: + val = *(unsigned long *)((unsigned long)edata + code->offset); + break; #endif case FETCH_NOP_SYMBOL: /* Ignore a place holder */ code++; @@ -1359,7 +1370,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, if (trace_trigger_soft_disabled(trace_file)) return; - dsize = __get_data_size(&tk->tp, regs); + dsize = __get_data_size(&tk->tp, regs, NULL); entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry) + tk->tp.size + dsize); @@ -1368,7 +1379,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, fbuffer.regs = regs; entry->ip = (unsigned long)tk->rp.kp.addr; - store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); + store_trace_args(&entry[1], &tk->tp, regs, NULL, sizeof(*entry), dsize); trace_event_buffer_commit(&fbuffer); } @@ -1384,6 +1395,31 @@ kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs) NOKPROBE_SYMBOL(kprobe_trace_func); /* Kretprobe handler */ + +static int trace_kprobe_entry_handler(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + struct kretprobe *rp = get_kretprobe(ri); + struct trace_kprobe *tk; + + /* + * There is a small chance that get_kretprobe(ri) returns NULL when + * the kretprobe is unregister on another CPU between kretprobe's + * trampoline_handler and this function. + */ + if (unlikely(!rp)) + return -ENOENT; + + tk = container_of(rp, struct trace_kprobe, rp); + + /* store argument values into ri->data as entry data */ + if (tk->tp.entry_arg) + store_trace_entry_data(ri->data, &tk->tp, regs); + + return 0; +} + + static nokprobe_inline void __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs, @@ -1399,7 +1435,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, if (trace_trigger_soft_disabled(trace_file)) return; - dsize = __get_data_size(&tk->tp, regs); + dsize = __get_data_size(&tk->tp, regs, ri->data); entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry) + tk->tp.size + dsize); @@ -1409,7 +1445,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, fbuffer.regs = regs; entry->func = (unsigned long)tk->rp.kp.addr; entry->ret_ip = get_kretprobe_retaddr(ri); - store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); + store_trace_args(&entry[1], &tk->tp, regs, ri->data, sizeof(*entry), dsize); trace_event_buffer_commit(&fbuffer); } @@ -1557,7 +1593,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) if (hlist_empty(head)) return 0; - dsize = __get_data_size(&tk->tp, regs); + dsize = __get_data_size(&tk->tp, regs, NULL); __size = sizeof(*entry) + tk->tp.size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); @@ -1568,7 +1604,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) entry->ip = (unsigned long)tk->rp.kp.addr; memset(&entry[1], 0, dsize); - store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); + store_trace_args(&entry[1], &tk->tp, regs, NULL, sizeof(*entry), dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); return 0; @@ -1593,7 +1629,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, if (hlist_empty(head)) return; - dsize = __get_data_size(&tk->tp, regs); + dsize = __get_data_size(&tk->tp, regs, ri->data); __size = sizeof(*entry) + tk->tp.size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); @@ -1604,7 +1640,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, entry->func = (unsigned long)tk->rp.kp.addr; entry->ret_ip = get_kretprobe_retaddr(ri); - store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); + store_trace_args(&entry[1], &tk->tp, regs, ri->data, sizeof(*entry), dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 5d6c6c105f3c..58a6275c7f49 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -598,6 +598,8 @@ static int parse_btf_field(char *fieldname, const struct btf_type *type, return 0; } +static int __store_entry_arg(struct trace_probe *tp, int argnum); + static int parse_btf_arg(char *varname, struct fetch_insn **pcode, struct fetch_insn *end, struct traceprobe_parse_context *ctx) @@ -622,11 +624,7 @@ static int parse_btf_arg(char *varname, return -EOPNOTSUPP; } - if (ctx->flags & TPARG_FL_RETURN) { - if (strcmp(varname, "$retval") != 0) { - trace_probe_log_err(ctx->offset, NO_BTFARG); - return -ENOENT; - } + if (ctx->flags & TPARG_FL_RETURN && !strcmp(varname, "$retval")) { code->op = FETCH_OP_RETVAL; /* Check whether the function return type is not void */ if (query_btf_context(ctx) == 0) { @@ -658,11 +656,21 @@ static int parse_btf_arg(char *varname, const char *name = btf_name_by_offset(ctx->btf, params[i].name_off); if (name && !strcmp(name, varname)) { - code->op = FETCH_OP_ARG; - if (ctx->flags & TPARG_FL_TPOINT) - code->param = i + 1; - else - code->param = i; + if (tparg_is_function_entry(ctx->flags)) { + code->op = FETCH_OP_ARG; + if (ctx->flags & TPARG_FL_TPOINT) + code->param = i + 1; + else + code->param = i; + } else if (tparg_is_function_return(ctx->flags)) { + code->op = FETCH_OP_EDATA; + ret = __store_entry_arg(ctx->tp, i); + if (ret < 0) { + /* internal error */ + return ret; + } + code->offset = ret; + } tid = params[i].type; goto found; } @@ -759,6 +767,110 @@ static int check_prepare_btf_string_fetch(char *typename, #endif +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API + +static int __store_entry_arg(struct trace_probe *tp, int argnum) +{ + struct probe_entry_arg *earg = tp->entry_arg; + bool match = false; + int i, offset; + + if (!earg) { + earg = kzalloc(sizeof(*tp->entry_arg), GFP_KERNEL); + if (!earg) + return -ENOMEM; + earg->size = 2 * tp->nr_args + 1; + earg->code = kcalloc(earg->size, sizeof(struct fetch_insn), + GFP_KERNEL); + if (!earg->code) { + kfree(earg); + return -ENOMEM; + } + /* Fill the code buffer with 'end' to simplify it */ + for (i = 0; i < earg->size; i++) + earg->code[i].op = FETCH_OP_END; + tp->entry_arg = earg; + } + + offset = 0; + for (i = 0; i < earg->size - 1; i++) { + switch (earg->code[i].op) { + case FETCH_OP_END: + earg->code[i].op = FETCH_OP_ARG; + earg->code[i].param = argnum; + earg->code[i + 1].op = FETCH_OP_ST_EDATA; + earg->code[i + 1].offset = offset; + return offset; + case FETCH_OP_ARG: + match = (earg->code[i].param == argnum); + break; + case FETCH_OP_ST_EDATA: + offset = earg->code[i].offset; + if (match) + return offset; + offset += sizeof(unsigned long); + break; + default: + break; + } + } + return -ENOSPC; +} + +int traceprobe_get_entry_data_size(struct trace_probe *tp) +{ + struct probe_entry_arg *earg = tp->entry_arg; + int i, size = 0; + + if (!earg) + return 0; + + for (i = 0; i < earg->size; i++) { + switch (earg->code[i].op) { + case FETCH_OP_END: + goto out; + case FETCH_OP_ST_EDATA: + size = earg->code[i].offset + sizeof(unsigned long); + break; + default: + break; + } + } +out: + return size; +} + +void store_trace_entry_data(void *edata, struct trace_probe *tp, struct pt_regs *regs) +{ + struct probe_entry_arg *earg = tp->entry_arg; + unsigned long val; + int i; + + if (!earg) + return; + + for (i = 0; i < earg->size; i++) { + struct fetch_insn *code = &earg->code[i]; + + switch (code->op) { + case FETCH_OP_ARG: + val = regs_get_kernel_argument(regs, code->param); + break; + case FETCH_OP_ST_EDATA: + *(unsigned long *)((unsigned long)edata + code->offset) = val; + break; + case FETCH_OP_END: + goto end; + default: + break; + } + } +end: + return; +} +NOKPROBE_SYMBOL(store_trace_entry_data) +#endif + #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) /* Parse $vars. @orig_arg points '$', which syncs to @ctx->offset */ @@ -834,7 +946,7 @@ static int parse_probe_vars(char *orig_arg, const struct fetch_type *t, #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API len = str_has_prefix(arg, "arg"); - if (len && tparg_is_function_entry(ctx->flags)) { + if (len) { ret = kstrtoul(arg + len, 10, ¶m); if (ret) goto inval; @@ -843,15 +955,29 @@ static int parse_probe_vars(char *orig_arg, const struct fetch_type *t, err = TP_ERR_BAD_ARG_NUM; goto inval; } + param--; /* argN starts from 1, but internal arg[N] starts from 0 */ - code->op = FETCH_OP_ARG; - code->param = (unsigned int)param - 1; - /* - * The tracepoint probe will probe a stub function, and the - * first parameter of the stub is a dummy and should be ignored. - */ - if (ctx->flags & TPARG_FL_TPOINT) - code->param++; + if (tparg_is_function_entry(ctx->flags)) { + code->op = FETCH_OP_ARG; + code->param = (unsigned int)param; + /* + * The tracepoint probe will probe a stub function, and the + * first parameter of the stub is a dummy and should be ignored. + */ + if (ctx->flags & TPARG_FL_TPOINT) + code->param++; + } else if (tparg_is_function_return(ctx->flags)) { + /* function entry argument access from return probe */ + ret = __store_entry_arg(ctx->tp, param); + if (ret < 0) /* This error should be an internal error */ + return ret; + + code->op = FETCH_OP_EDATA; + code->offset = ret; + } else { + err = TP_ERR_NOFENTRY_ARGS; + goto inval; + } return 0; } #endif @@ -1041,7 +1167,8 @@ parse_probe_arg(char *arg, const struct fetch_type *type, break; default: if (isalpha(arg[0]) || arg[0] == '_') { /* BTF variable */ - if (!tparg_is_function_entry(ctx->flags)) { + if (!tparg_is_function_entry(ctx->flags) && + !tparg_is_function_return(ctx->flags)) { trace_probe_log_err(ctx->offset, NOSUP_BTFARG); return -EINVAL; } @@ -1383,6 +1510,7 @@ int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, const char *arg, struct probe_arg *parg = &tp->args[i]; const char *body; + ctx->tp = tp; body = strchr(arg, '='); if (body) { if (body - arg > MAX_ARG_NAME_LEN) { @@ -1439,7 +1567,8 @@ static int argv_has_var_arg(int argc, const char *argv[], int *args_idx, if (str_has_prefix(argv[i], "$arg")) { trace_probe_log_set_index(i + 2); - if (!tparg_is_function_entry(ctx->flags)) { + if (!tparg_is_function_entry(ctx->flags) && + !tparg_is_function_return(ctx->flags)) { trace_probe_log_err(0, NOFENTRY_ARGS); return -EINVAL; } @@ -1762,6 +1891,12 @@ void trace_probe_cleanup(struct trace_probe *tp) for (i = 0; i < tp->nr_args; i++) traceprobe_free_probe_arg(&tp->args[i]); + if (tp->entry_arg) { + kfree(tp->entry_arg->code); + kfree(tp->entry_arg); + tp->entry_arg = NULL; + } + if (tp->event) trace_probe_unlink(tp); } diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index ed8d1052f8a7..cef3a50628a3 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -92,6 +92,7 @@ enum fetch_op { FETCH_OP_ARG, /* Function argument : .param */ FETCH_OP_FOFFS, /* File offset: .immediate */ FETCH_OP_DATA, /* Allocated data: .data */ + FETCH_OP_EDATA, /* Entry data: .offset */ // Stage 2 (dereference) op FETCH_OP_DEREF, /* Dereference: .offset */ FETCH_OP_UDEREF, /* User-space Dereference: .offset */ @@ -102,6 +103,7 @@ enum fetch_op { FETCH_OP_ST_STRING, /* String: .offset, .size */ FETCH_OP_ST_USTRING, /* User String: .offset, .size */ FETCH_OP_ST_SYMSTR, /* Kernel Symbol String: .offset, .size */ + FETCH_OP_ST_EDATA, /* Store Entry Data: .offset */ // Stage 4 (modify) op FETCH_OP_MOD_BF, /* Bitfield: .basesize, .lshift, .rshift */ // Stage 5 (loop) op @@ -232,6 +234,11 @@ struct probe_arg { const struct fetch_type *type; /* Type of this argument */ }; +struct probe_entry_arg { + struct fetch_insn *code; + unsigned int size; /* The entry data size */ +}; + struct trace_uprobe_filter { rwlock_t rwlock; int nr_systemwide; @@ -253,6 +260,7 @@ struct trace_probe { struct trace_probe_event *event; ssize_t size; /* trace entry size */ unsigned int nr_args; + struct probe_entry_arg *entry_arg; /* This is only for return probe */ struct probe_arg args[]; }; @@ -355,6 +363,18 @@ int trace_probe_create(const char *raw_command, int (*createfn)(int, const char int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args, u8 *data, void *field); +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API +int traceprobe_get_entry_data_size(struct trace_probe *tp); +/* This is a runtime function to store entry data */ +void store_trace_entry_data(void *edata, struct trace_probe *tp, struct pt_regs *regs); +#else /* !CONFIG_HAVE_FUNCTION_ARG_ACCESS_API */ +static inline int traceprobe_get_entry_data_size(struct trace_probe *tp) +{ + return 0; +} +#define store_trace_entry_data(edata, tp, regs) do { } while (0) +#endif + #define trace_probe_for_each_link(pos, tp) \ list_for_each_entry(pos, &(tp)->event->files, list) #define trace_probe_for_each_link_rcu(pos, tp) \ @@ -381,6 +401,11 @@ static inline bool tparg_is_function_entry(unsigned int flags) return (flags & TPARG_FL_LOC_MASK) == (TPARG_FL_KERNEL | TPARG_FL_FENTRY); } +static inline bool tparg_is_function_return(unsigned int flags) +{ + return (flags & TPARG_FL_LOC_MASK) == (TPARG_FL_KERNEL | TPARG_FL_RETURN); +} + struct traceprobe_parse_context { struct trace_event_call *event; /* BTF related parameters */ @@ -392,6 +417,7 @@ struct traceprobe_parse_context { const struct btf_type *last_type; /* Saved type */ u32 last_bitoffs; /* Saved bitoffs */ u32 last_bitsize; /* Saved bitsize */ + struct trace_probe *tp; unsigned int flags; int offset; }; @@ -506,7 +532,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(NO_BTFARG, "This variable is not found at this probe point"),\ C(NO_BTF_ENTRY, "No BTF entry for this probe point"), \ C(BAD_VAR_ARGS, "$arg* must be an independent parameter without name etc."),\ - C(NOFENTRY_ARGS, "$arg* can be used only on function entry"), \ + C(NOFENTRY_ARGS, "$arg* can be used only on function entry or exit"), \ C(DOUBLE_ARGS, "$arg* can be used only once in the parameters"), \ C(ARGS_2LONG, "$arg* failed because the argument list is too long"), \ C(ARGIDX_2BIG, "$argN index is too big"), \ diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index 3935b347f874..2caf0d2afb32 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -54,7 +54,7 @@ fetch_apply_bitfield(struct fetch_insn *code, void *buf) * If dest is NULL, don't store result and return required dynamic data size. */ static int -process_fetch_insn(struct fetch_insn *code, void *rec, +process_fetch_insn(struct fetch_insn *code, void *rec, void *edata, void *dest, void *base); static nokprobe_inline int fetch_store_strlen(unsigned long addr); static nokprobe_inline int @@ -232,7 +232,7 @@ array: /* Sum up total data length for dynamic arrays (strings) */ static nokprobe_inline int -__get_data_size(struct trace_probe *tp, struct pt_regs *regs) +__get_data_size(struct trace_probe *tp, struct pt_regs *regs, void *edata) { struct probe_arg *arg; int i, len, ret = 0; @@ -240,7 +240,7 @@ __get_data_size(struct trace_probe *tp, struct pt_regs *regs) for (i = 0; i < tp->nr_args; i++) { arg = tp->args + i; if (unlikely(arg->dynamic)) { - len = process_fetch_insn(arg->code, regs, NULL, NULL); + len = process_fetch_insn(arg->code, regs, edata, NULL, NULL); if (len > 0) ret += len; } @@ -251,7 +251,7 @@ __get_data_size(struct trace_probe *tp, struct pt_regs *regs) /* Store the value of each argument */ static nokprobe_inline void -store_trace_args(void *data, struct trace_probe *tp, void *rec, +store_trace_args(void *data, struct trace_probe *tp, void *rec, void *edata, int header_size, int maxlen) { struct probe_arg *arg; @@ -266,7 +266,7 @@ store_trace_args(void *data, struct trace_probe *tp, void *rec, /* Point the dynamic data area if needed */ if (unlikely(arg->dynamic)) *dl = make_data_loc(maxlen, dyndata - base); - ret = process_fetch_insn(arg->code, rec, dl, base); + ret = process_fetch_insn(arg->code, rec, edata, dl, base); if (arg->dynamic && likely(ret > 0)) { dyndata += ret; maxlen -= ret; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 49d9af6d446e..78d76d74f45b 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -211,8 +211,8 @@ static unsigned long translate_user_vaddr(unsigned long file_offset) /* Note that we don't verify it, since the code does not come from user space */ static int -process_fetch_insn(struct fetch_insn *code, void *rec, void *dest, - void *base) +process_fetch_insn(struct fetch_insn *code, void *rec, void *edata, + void *dest, void *base) { struct pt_regs *regs = rec; unsigned long val; @@ -1490,11 +1490,11 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) if (WARN_ON_ONCE(!uprobe_cpu_buffer)) return 0; - dsize = __get_data_size(&tu->tp, regs); + dsize = __get_data_size(&tu->tp, regs, NULL); esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); ucb = uprobe_buffer_get(); - store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize); + store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize); if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) ret |= uprobe_trace_func(tu, regs, ucb, dsize); @@ -1525,11 +1525,11 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, if (WARN_ON_ONCE(!uprobe_cpu_buffer)) return 0; - dsize = __get_data_size(&tu->tp, regs); + dsize = __get_data_size(&tu->tp, regs, NULL); esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); ucb = uprobe_buffer_get(); - store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize); + store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize); if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) uretprobe_trace_func(tu, func, regs, ucb, dsize); diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc index 20e42c030095..61877d166451 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc @@ -34,7 +34,9 @@ check_error 'f vfs_read ^$stack10000' # BAD_STACK_NUM check_error 'f vfs_read ^$arg10000' # BAD_ARG_NUM +if !grep -q 'kernel return probes support:' README; then check_error 'f vfs_read $retval ^$arg1' # BAD_VAR +fi check_error 'f vfs_read ^$none_var' # BAD_VAR check_error 'f vfs_read ^'$REG # BAD_VAR @@ -99,7 +101,9 @@ if grep -q "" README; then check_error 'f vfs_read args=^$arg*' # BAD_VAR_ARGS check_error 'f vfs_read +0(^$arg*)' # BAD_VAR_ARGS check_error 'f vfs_read $arg* ^$arg*' # DOUBLE_ARGS +if !grep -q 'kernel return probes support:' README; then check_error 'f vfs_read%return ^$arg*' # NOFENTRY_ARGS +fi check_error 'f vfs_read ^hoge' # NO_BTFARG check_error 'f kfree ^$arg10' # NO_BTFARG (exceed the number of parameters) check_error 'f kfree%return ^$retval' # NO_RETVAL diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index 65fbb26fd58c..a16c6a6f6055 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -108,7 +108,9 @@ if grep -q "" README; then check_error 'p vfs_read args=^$arg*' # BAD_VAR_ARGS check_error 'p vfs_read +0(^$arg*)' # BAD_VAR_ARGS check_error 'p vfs_read $arg* ^$arg*' # DOUBLE_ARGS +if !grep -q 'kernel return probes support:' README; then check_error 'r vfs_read ^$arg*' # NOFENTRY_ARGS +fi check_error 'p vfs_read+8 ^$arg*' # NOFENTRY_ARGS check_error 'p vfs_read ^hoge' # NO_BTFARG check_error 'p kfree ^$arg10' # NO_BTFARG (exceed the number of parameters) From 1c7fa34863c7bc46f7618268b804d92a3519446b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 18 Mar 2024 11:17:26 -0700 Subject: [PATCH 110/207] uprobes: encapsulate preparation of uprobe args buffer [ Upstream commit 3eaea21b4d27cff0017c20549aeb53034c58fc23 ] Move the logic of fetching temporary per-CPU uprobe buffer and storing uprobes args into it to a new helper function. Store data size as part of this buffer, simplifying interfaces a bit, as now we only pass single uprobe_cpu_buffer reference around, instead of pointer + dsize. This logic was duplicated across uprobe_dispatcher and uretprobe_dispatcher, and now will be centralized. All this is also in preparation to make this uprobe_cpu_buffer handling logic optional in the next patch. Link: https://lore.kernel.org/all/20240318181728.2795838-2-andrii@kernel.org/ [Masami: update for v6.9-rc3 kernel] Signed-off-by: Andrii Nakryiko Reviewed-by: Jiri Olsa Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) Stable-dep-of: 373b9338c972 ("uprobe: avoid out-of-bounds memory access of fetching args") Signed-off-by: Sasha Levin --- kernel/trace/trace_uprobe.c | 78 +++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 78d76d74f45b..58506c9632ea 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -854,6 +854,7 @@ static const struct file_operations uprobe_profile_ops = { struct uprobe_cpu_buffer { struct mutex mutex; void *buf; + int dsize; }; static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; static int uprobe_buffer_refcnt; @@ -943,9 +944,26 @@ static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) mutex_unlock(&ucb->mutex); } +static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu, + struct pt_regs *regs) +{ + struct uprobe_cpu_buffer *ucb; + int dsize, esize; + + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + dsize = __get_data_size(&tu->tp, regs, NULL); + + ucb = uprobe_buffer_get(); + ucb->dsize = tu->tp.size + dsize; + + store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize); + + return ucb; +} + static void __uprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb, int dsize, + struct uprobe_cpu_buffer *ucb, struct trace_event_file *trace_file) { struct uprobe_trace_entry_head *entry; @@ -956,14 +974,14 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, WARN_ON(call != trace_file->event_call); - if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE)) + if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE)) return; if (trace_trigger_soft_disabled(trace_file)) return; esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); - size = esize + tu->tp.size + dsize; + size = esize + ucb->dsize; entry = trace_event_buffer_reserve(&fbuffer, trace_file, size); if (!entry) return; @@ -977,14 +995,14 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, data = DATAOF_TRACE_ENTRY(entry, false); } - memcpy(data, ucb->buf, tu->tp.size + dsize); + memcpy(data, ucb->buf, ucb->dsize); trace_event_buffer_commit(&fbuffer); } /* uprobe handler */ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb, int dsize) + struct uprobe_cpu_buffer *ucb) { struct event_file_link *link; @@ -993,7 +1011,7 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, rcu_read_lock(); trace_probe_for_each_link_rcu(link, &tu->tp) - __uprobe_trace_func(tu, 0, regs, ucb, dsize, link->file); + __uprobe_trace_func(tu, 0, regs, ucb, link->file); rcu_read_unlock(); return 0; @@ -1001,13 +1019,13 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb, int dsize) + struct uprobe_cpu_buffer *ucb) { struct event_file_link *link; rcu_read_lock(); trace_probe_for_each_link_rcu(link, &tu->tp) - __uprobe_trace_func(tu, func, regs, ucb, dsize, link->file); + __uprobe_trace_func(tu, func, regs, ucb, link->file); rcu_read_unlock(); } @@ -1335,7 +1353,7 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc, static void __uprobe_perf_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb, int dsize) + struct uprobe_cpu_buffer *ucb) { struct trace_event_call *call = trace_probe_event_call(&tu->tp); struct uprobe_trace_entry_head *entry; @@ -1356,7 +1374,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); - size = esize + tu->tp.size + dsize; + size = esize + ucb->dsize; size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) return; @@ -1379,13 +1397,10 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, data = DATAOF_TRACE_ENTRY(entry, false); } - memcpy(data, ucb->buf, tu->tp.size + dsize); + memcpy(data, ucb->buf, ucb->dsize); - if (size - esize > tu->tp.size + dsize) { - int len = tu->tp.size + dsize; - - memset(data + len, 0, size - esize - len); - } + if (size - esize > ucb->dsize) + memset(data + ucb->dsize, 0, size - esize - ucb->dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); @@ -1395,21 +1410,21 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, /* uprobe profile handler */ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb, int dsize) + struct uprobe_cpu_buffer *ucb) { if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) return UPROBE_HANDLER_REMOVE; if (!is_ret_probe(tu)) - __uprobe_perf_func(tu, 0, regs, ucb, dsize); + __uprobe_perf_func(tu, 0, regs, ucb); return 0; } static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb, int dsize) + struct uprobe_cpu_buffer *ucb) { - __uprobe_perf_func(tu, func, regs, ucb, dsize); + __uprobe_perf_func(tu, func, regs, ucb); } int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, @@ -1475,10 +1490,8 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) struct trace_uprobe *tu; struct uprobe_dispatch_data udd; struct uprobe_cpu_buffer *ucb; - int dsize, esize; int ret = 0; - tu = container_of(con, struct trace_uprobe, consumer); tu->nhit++; @@ -1490,18 +1503,14 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) if (WARN_ON_ONCE(!uprobe_cpu_buffer)) return 0; - dsize = __get_data_size(&tu->tp, regs, NULL); - esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); - - ucb = uprobe_buffer_get(); - store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize); + ucb = prepare_uprobe_buffer(tu, regs); if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) - ret |= uprobe_trace_func(tu, regs, ucb, dsize); + ret |= uprobe_trace_func(tu, regs, ucb); #ifdef CONFIG_PERF_EVENTS if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) - ret |= uprobe_perf_func(tu, regs, ucb, dsize); + ret |= uprobe_perf_func(tu, regs, ucb); #endif uprobe_buffer_put(ucb); return ret; @@ -1513,7 +1522,6 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, struct trace_uprobe *tu; struct uprobe_dispatch_data udd; struct uprobe_cpu_buffer *ucb; - int dsize, esize; tu = container_of(con, struct trace_uprobe, consumer); @@ -1525,18 +1533,14 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, if (WARN_ON_ONCE(!uprobe_cpu_buffer)) return 0; - dsize = __get_data_size(&tu->tp, regs, NULL); - esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); - - ucb = uprobe_buffer_get(); - store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize); + ucb = prepare_uprobe_buffer(tu, regs); if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) - uretprobe_trace_func(tu, func, regs, ucb, dsize); + uretprobe_trace_func(tu, func, regs, ucb); #ifdef CONFIG_PERF_EVENTS if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) - uretprobe_perf_func(tu, func, regs, ucb, dsize); + uretprobe_perf_func(tu, func, regs, ucb); #endif uprobe_buffer_put(ucb); return 0; From f5272c98a4a0e7d4d1718cceb114ab33092cae1c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 18 Mar 2024 11:17:27 -0700 Subject: [PATCH 111/207] uprobes: prepare uprobe args buffer lazily MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1b8f85defbc82e2eb8f27c5f6060ea507ad4d5a3 ] uprobe_cpu_buffer and corresponding logic to store uprobe args into it are used for uprobes/uretprobes that are created through tracefs or perf events. BPF is yet another user of uprobe/uretprobe infrastructure, but doesn't need uprobe_cpu_buffer and associated data. For BPF-only use cases this buffer handling and preparation is a pure overhead. At the same time, BPF-only uprobe/uretprobe usage is very common in practice. Also, for a lot of cases applications are very senstivie to performance overheads, as they might be tracing a very high frequency functions like malloc()/free(), so every bit of performance improvement matters. All that is to say that this uprobe_cpu_buffer preparation is an unnecessary overhead that each BPF user of uprobes/uretprobe has to pay. This patch is changing this by making uprobe_cpu_buffer preparation optional. It will happen only if either tracefs-based or perf event-based uprobe/uretprobe consumer is registered for given uprobe/uretprobe. For BPF-only use cases this step will be skipped. We used uprobe/uretprobe benchmark which is part of BPF selftests (see [0]) to estimate the improvements. We have 3 uprobe and 3 uretprobe scenarios, which vary an instruction that is replaced by uprobe: nop (fastest uprobe case), `push rbp` (typical case), and non-simulated `ret` instruction (slowest case). Benchmark thread is constantly calling user space function in a tight loop. User space function has attached BPF uprobe or uretprobe program doing nothing but atomic counter increments to count number of triggering calls. Benchmark emits throughput in millions of executions per second. BEFORE these changes ==================== uprobe-nop : 2.657 ± 0.024M/s uprobe-push : 2.499 ± 0.018M/s uprobe-ret : 1.100 ± 0.006M/s uretprobe-nop : 1.356 ± 0.004M/s uretprobe-push : 1.317 ± 0.019M/s uretprobe-ret : 0.785 ± 0.007M/s AFTER these changes =================== uprobe-nop : 2.732 ± 0.022M/s (+2.8%) uprobe-push : 2.621 ± 0.016M/s (+4.9%) uprobe-ret : 1.105 ± 0.007M/s (+0.5%) uretprobe-nop : 1.396 ± 0.007M/s (+2.9%) uretprobe-push : 1.347 ± 0.008M/s (+2.3%) uretprobe-ret : 0.800 ± 0.006M/s (+1.9) So the improvements on this particular machine seems to be between 2% and 5%. [0] https://github.com/torvalds/linux/blob/master/tools/testing/selftests/bpf/benchs/bench_trigger.c Reviewed-by: Jiri Olsa Link: https://lore.kernel.org/all/20240318181728.2795838-3-andrii@kernel.org/ Signed-off-by: Andrii Nakryiko Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) Stable-dep-of: 373b9338c972 ("uprobe: avoid out-of-bounds memory access of fetching args") Signed-off-by: Sasha Levin --- kernel/trace/trace_uprobe.c | 49 +++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 58506c9632ea..6c2ab0e316d6 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -941,15 +941,21 @@ static struct uprobe_cpu_buffer *uprobe_buffer_get(void) static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) { + if (!ucb) + return; mutex_unlock(&ucb->mutex); } static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu, - struct pt_regs *regs) + struct pt_regs *regs, + struct uprobe_cpu_buffer **ucbp) { struct uprobe_cpu_buffer *ucb; int dsize, esize; + if (*ucbp) + return *ucbp; + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); dsize = __get_data_size(&tu->tp, regs, NULL); @@ -958,22 +964,25 @@ static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu, store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize); + *ucbp = ucb; return ucb; } static void __uprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb, + struct uprobe_cpu_buffer **ucbp, struct trace_event_file *trace_file) { struct uprobe_trace_entry_head *entry; struct trace_event_buffer fbuffer; + struct uprobe_cpu_buffer *ucb; void *data; int size, esize; struct trace_event_call *call = trace_probe_event_call(&tu->tp); WARN_ON(call != trace_file->event_call); + ucb = prepare_uprobe_buffer(tu, regs, ucbp); if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE)) return; @@ -1002,7 +1011,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, /* uprobe handler */ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb) + struct uprobe_cpu_buffer **ucbp) { struct event_file_link *link; @@ -1011,7 +1020,7 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, rcu_read_lock(); trace_probe_for_each_link_rcu(link, &tu->tp) - __uprobe_trace_func(tu, 0, regs, ucb, link->file); + __uprobe_trace_func(tu, 0, regs, ucbp, link->file); rcu_read_unlock(); return 0; @@ -1019,13 +1028,13 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb) + struct uprobe_cpu_buffer **ucbp) { struct event_file_link *link; rcu_read_lock(); trace_probe_for_each_link_rcu(link, &tu->tp) - __uprobe_trace_func(tu, func, regs, ucb, link->file); + __uprobe_trace_func(tu, func, regs, ucbp, link->file); rcu_read_unlock(); } @@ -1353,10 +1362,11 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc, static void __uprobe_perf_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb) + struct uprobe_cpu_buffer **ucbp) { struct trace_event_call *call = trace_probe_event_call(&tu->tp); struct uprobe_trace_entry_head *entry; + struct uprobe_cpu_buffer *ucb; struct hlist_head *head; void *data; int size, esize; @@ -1374,6 +1384,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + ucb = prepare_uprobe_buffer(tu, regs, ucbp); size = esize + ucb->dsize; size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) @@ -1410,21 +1421,21 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, /* uprobe profile handler */ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb) + struct uprobe_cpu_buffer **ucbp) { if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) return UPROBE_HANDLER_REMOVE; if (!is_ret_probe(tu)) - __uprobe_perf_func(tu, 0, regs, ucb); + __uprobe_perf_func(tu, 0, regs, ucbp); return 0; } static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb) + struct uprobe_cpu_buffer **ucbp) { - __uprobe_perf_func(tu, func, regs, ucb); + __uprobe_perf_func(tu, func, regs, ucbp); } int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, @@ -1489,7 +1500,7 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) { struct trace_uprobe *tu; struct uprobe_dispatch_data udd; - struct uprobe_cpu_buffer *ucb; + struct uprobe_cpu_buffer *ucb = NULL; int ret = 0; tu = container_of(con, struct trace_uprobe, consumer); @@ -1503,14 +1514,12 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) if (WARN_ON_ONCE(!uprobe_cpu_buffer)) return 0; - ucb = prepare_uprobe_buffer(tu, regs); - if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) - ret |= uprobe_trace_func(tu, regs, ucb); + ret |= uprobe_trace_func(tu, regs, &ucb); #ifdef CONFIG_PERF_EVENTS if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) - ret |= uprobe_perf_func(tu, regs, ucb); + ret |= uprobe_perf_func(tu, regs, &ucb); #endif uprobe_buffer_put(ucb); return ret; @@ -1521,7 +1530,7 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, { struct trace_uprobe *tu; struct uprobe_dispatch_data udd; - struct uprobe_cpu_buffer *ucb; + struct uprobe_cpu_buffer *ucb = NULL; tu = container_of(con, struct trace_uprobe, consumer); @@ -1533,14 +1542,12 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, if (WARN_ON_ONCE(!uprobe_cpu_buffer)) return 0; - ucb = prepare_uprobe_buffer(tu, regs); - if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) - uretprobe_trace_func(tu, func, regs, ucb); + uretprobe_trace_func(tu, func, regs, &ucb); #ifdef CONFIG_PERF_EVENTS if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) - uretprobe_perf_func(tu, func, regs, ucb); + uretprobe_perf_func(tu, func, regs, &ucb); #endif uprobe_buffer_put(ucb); return 0; From f0a7ea54f3dc7ae2dd55e54918b04304dd28d986 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 20 May 2024 22:30:17 -0700 Subject: [PATCH 112/207] uprobes: prevent mutex_lock() under rcu_read_lock() [ Upstream commit 699646734ab51bf5b1cd4a7a30c20074f6e74f6e ] Recent changes made uprobe_cpu_buffer preparation lazy, and moved it deeper into __uprobe_trace_func(). This is problematic because __uprobe_trace_func() is called inside rcu_read_lock()/rcu_read_unlock() block, which then calls prepare_uprobe_buffer() -> uprobe_buffer_get() -> mutex_lock(&ucb->mutex), leading to a splat about using mutex under non-sleepable RCU: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:585 in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 98231, name: stress-ng-sigq preempt_count: 0, expected: 0 RCU nest depth: 1, expected: 0 ... Call Trace: dump_stack_lvl+0x3d/0xe0 __might_resched+0x24c/0x270 ? prepare_uprobe_buffer+0xd5/0x1d0 __mutex_lock+0x41/0x820 ? ___perf_sw_event+0x206/0x290 ? __perf_event_task_sched_in+0x54/0x660 ? __perf_event_task_sched_in+0x54/0x660 prepare_uprobe_buffer+0xd5/0x1d0 __uprobe_trace_func+0x4a/0x140 uprobe_dispatcher+0x135/0x280 ? uprobe_dispatcher+0x94/0x280 uprobe_notify_resume+0x650/0xec0 ? atomic_notifier_call_chain+0x21/0x110 ? atomic_notifier_call_chain+0xf8/0x110 irqentry_exit_to_user_mode+0xe2/0x1e0 asm_exc_int3+0x35/0x40 RIP: 0033:0x7f7e1d4da390 Code: 33 04 00 0f 1f 80 00 00 00 00 f3 0f 1e fa b9 01 00 00 00 e9 b2 fc ff ff 66 90 f3 0f 1e fa 31 c9 e9 a5 fc ff ff 0f 1f 44 00 00 0f 1e fa b8 27 00 00 00 0f 05 c3 0f 1f 40 00 f3 0f 1e fa b8 6e RSP: 002b:00007ffd2abc3608 EFLAGS: 00000246 RAX: 0000000000000000 RBX: 0000000076d325f1 RCX: 0000000000000000 RDX: 0000000076d325f1 RSI: 000000000000000a RDI: 00007ffd2abc3690 RBP: 000000000000000a R08: 00017fb700000000 R09: 00017fb700000000 R10: 00017fb700000000 R11: 0000000000000246 R12: 0000000000017ff2 R13: 00007ffd2abc3610 R14: 0000000000000000 R15: 00007ffd2abc3780 Luckily, it's easy to fix by moving prepare_uprobe_buffer() to be called slightly earlier: into uprobe_trace_func() and uretprobe_trace_func(), outside of RCU locked section. This still keeps this buffer preparation lazy and helps avoid the overhead when it's not needed. E.g., if there is only BPF uprobe handler installed on a given uprobe, buffer won't be initialized. Note, the other user of prepare_uprobe_buffer(), __uprobe_perf_func(), is not affected, as it doesn't prepare buffer under RCU read lock. Link: https://lore.kernel.org/all/20240521053017.3708530-1-andrii@kernel.org/ Fixes: 1b8f85defbc8 ("uprobes: prepare uprobe args buffer lazily") Reported-by: Breno Leitao Signed-off-by: Andrii Nakryiko Signed-off-by: Masami Hiramatsu (Google) Stable-dep-of: 373b9338c972 ("uprobe: avoid out-of-bounds memory access of fetching args") Signed-off-by: Sasha Levin --- kernel/trace/trace_uprobe.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 6c2ab0e316d6..0d52588329b2 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -970,19 +970,17 @@ static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu, static void __uprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, - struct uprobe_cpu_buffer **ucbp, + struct uprobe_cpu_buffer *ucb, struct trace_event_file *trace_file) { struct uprobe_trace_entry_head *entry; struct trace_event_buffer fbuffer; - struct uprobe_cpu_buffer *ucb; void *data; int size, esize; struct trace_event_call *call = trace_probe_event_call(&tu->tp); WARN_ON(call != trace_file->event_call); - ucb = prepare_uprobe_buffer(tu, regs, ucbp); if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE)) return; @@ -1014,13 +1012,16 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, struct uprobe_cpu_buffer **ucbp) { struct event_file_link *link; + struct uprobe_cpu_buffer *ucb; if (is_ret_probe(tu)) return 0; + ucb = prepare_uprobe_buffer(tu, regs, ucbp); + rcu_read_lock(); trace_probe_for_each_link_rcu(link, &tu->tp) - __uprobe_trace_func(tu, 0, regs, ucbp, link->file); + __uprobe_trace_func(tu, 0, regs, ucb, link->file); rcu_read_unlock(); return 0; @@ -1031,10 +1032,13 @@ static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct uprobe_cpu_buffer **ucbp) { struct event_file_link *link; + struct uprobe_cpu_buffer *ucb; + + ucb = prepare_uprobe_buffer(tu, regs, ucbp); rcu_read_lock(); trace_probe_for_each_link_rcu(link, &tu->tp) - __uprobe_trace_func(tu, func, regs, ucbp, link->file); + __uprobe_trace_func(tu, func, regs, ucb, link->file); rcu_read_unlock(); } From 9e5f93788c9dd4309e75a56860a1ac44a8e117b9 Mon Sep 17 00:00:00 2001 From: Qiao Ma Date: Tue, 15 Oct 2024 14:01:48 +0800 Subject: [PATCH 113/207] uprobe: avoid out-of-bounds memory access of fetching args [ Upstream commit 373b9338c9722a368925d83bc622c596896b328e ] Uprobe needs to fetch args into a percpu buffer, and then copy to ring buffer to avoid non-atomic context problem. Sometimes user-space strings, arrays can be very large, but the size of percpu buffer is only page size. And store_trace_args() won't check whether these data exceeds a single page or not, caused out-of-bounds memory access. It could be reproduced by following steps: 1. build kernel with CONFIG_KASAN enabled 2. save follow program as test.c ``` \#include \#include \#include // If string length large than MAX_STRING_SIZE, the fetch_store_strlen() // will return 0, cause __get_data_size() return shorter size, and // store_trace_args() will not trigger out-of-bounds access. // So make string length less than 4096. \#define STRLEN 4093 void generate_string(char *str, int n) { int i; for (i = 0; i < n; ++i) { char c = i % 26 + 'a'; str[i] = c; } str[n-1] = '\0'; } void print_string(char *str) { printf("%s\n", str); } int main() { char tmp[STRLEN]; generate_string(tmp, STRLEN); print_string(tmp); return 0; } ``` 3. compile program `gcc -o test test.c` 4. get the offset of `print_string()` ``` objdump -t test | grep -w print_string 0000000000401199 g F .text 000000000000001b print_string ``` 5. configure uprobe with offset 0x1199 ``` off=0x1199 cd /sys/kernel/debug/tracing/ echo "p /root/test:${off} arg1=+0(%di):ustring arg2=\$comm arg3=+0(%di):ustring" > uprobe_events echo 1 > events/uprobes/enable echo 1 > tracing_on ``` 6. run `test`, and kasan will report error. ================================================================== BUG: KASAN: use-after-free in strncpy_from_user+0x1d6/0x1f0 Write of size 8 at addr ffff88812311c004 by task test/499CPU: 0 UID: 0 PID: 499 Comm: test Not tainted 6.12.0-rc3+ #18 Hardware name: Red Hat KVM, BIOS 1.16.0-4.al8 04/01/2014 Call Trace: dump_stack_lvl+0x55/0x70 print_address_description.constprop.0+0x27/0x310 kasan_report+0x10f/0x120 ? strncpy_from_user+0x1d6/0x1f0 strncpy_from_user+0x1d6/0x1f0 ? rmqueue.constprop.0+0x70d/0x2ad0 process_fetch_insn+0xb26/0x1470 ? __pfx_process_fetch_insn+0x10/0x10 ? _raw_spin_lock+0x85/0xe0 ? __pfx__raw_spin_lock+0x10/0x10 ? __pte_offset_map+0x1f/0x2d0 ? unwind_next_frame+0xc5f/0x1f80 ? arch_stack_walk+0x68/0xf0 ? is_bpf_text_address+0x23/0x30 ? kernel_text_address.part.0+0xbb/0xd0 ? __kernel_text_address+0x66/0xb0 ? unwind_get_return_address+0x5e/0xa0 ? __pfx_stack_trace_consume_entry+0x10/0x10 ? arch_stack_walk+0xa2/0xf0 ? _raw_spin_lock_irqsave+0x8b/0xf0 ? __pfx__raw_spin_lock_irqsave+0x10/0x10 ? depot_alloc_stack+0x4c/0x1f0 ? _raw_spin_unlock_irqrestore+0xe/0x30 ? stack_depot_save_flags+0x35d/0x4f0 ? kasan_save_stack+0x34/0x50 ? kasan_save_stack+0x24/0x50 ? mutex_lock+0x91/0xe0 ? __pfx_mutex_lock+0x10/0x10 prepare_uprobe_buffer.part.0+0x2cd/0x500 uprobe_dispatcher+0x2c3/0x6a0 ? __pfx_uprobe_dispatcher+0x10/0x10 ? __kasan_slab_alloc+0x4d/0x90 handler_chain+0xdd/0x3e0 handle_swbp+0x26e/0x3d0 ? __pfx_handle_swbp+0x10/0x10 ? uprobe_pre_sstep_notifier+0x151/0x1b0 irqentry_exit_to_user_mode+0xe2/0x1b0 asm_exc_int3+0x39/0x40 RIP: 0033:0x401199 Code: 01 c2 0f b6 45 fb 88 02 83 45 fc 01 8b 45 fc 3b 45 e4 7c b7 8b 45 e4 48 98 48 8d 50 ff 48 8b 45 e8 48 01 d0 ce RSP: 002b:00007ffdf00576a8 EFLAGS: 00000206 RAX: 00007ffdf00576b0 RBX: 0000000000000000 RCX: 0000000000000ff2 RDX: 0000000000000ffc RSI: 0000000000000ffd RDI: 00007ffdf00576b0 RBP: 00007ffdf00586b0 R08: 00007feb2f9c0d20 R09: 00007feb2f9c0d20 R10: 0000000000000001 R11: 0000000000000202 R12: 0000000000401040 R13: 00007ffdf0058780 R14: 0000000000000000 R15: 0000000000000000 This commit enforces the buffer's maxlen less than a page-size to avoid store_trace_args() out-of-memory access. Link: https://lore.kernel.org/all/20241015060148.1108331-1-mqaio@linux.alibaba.com/ Fixes: dcad1a204f72 ("tracing/uprobes: Fetch args before reserving a ring buffer") Signed-off-by: Qiao Ma Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace_uprobe.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 0d52588329b2..d72ac9dde532 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -858,6 +858,7 @@ struct uprobe_cpu_buffer { }; static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; static int uprobe_buffer_refcnt; +#define MAX_UCB_BUFFER_SIZE PAGE_SIZE static int uprobe_buffer_init(void) { @@ -962,6 +963,11 @@ static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu, ucb = uprobe_buffer_get(); ucb->dsize = tu->tp.size + dsize; + if (WARN_ON_ONCE(ucb->dsize > MAX_UCB_BUFFER_SIZE)) { + ucb->dsize = MAX_UCB_BUFFER_SIZE; + dsize = MAX_UCB_BUFFER_SIZE - tu->tp.size; + } + store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize); *ucbp = ucb; @@ -981,9 +987,6 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, WARN_ON(call != trace_file->event_call); - if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE)) - return; - if (trace_trigger_soft_disabled(trace_file)) return; From 0d16f53c91111cec914f0811fcc526a2ba77b20d Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 22 Oct 2024 15:44:58 -0300 Subject: [PATCH 114/207] exec: don't WARN for racy path_noexec check [ Upstream commit 0d196e7589cefe207d5d41f37a0a28a1fdeeb7c6 ] Both i_mode and noexec checks wrapped in WARN_ON stem from an artifact of the previous implementation. They used to legitimately check for the condition, but that got moved up in two commits: 633fb6ac3980 ("exec: move S_ISREG() check earlier") 0fd338b2d2cd ("exec: move path_noexec() check earlier") Instead of being removed said checks are WARN_ON'ed instead, which has some debug value. However, the spurious path_noexec check is racy, resulting in unwarranted warnings should someone race with setting the noexec flag. One can note there is more to perm-checking whether execve is allowed and none of the conditions are guaranteed to still hold after they were tested for. Additionally this does not validate whether the code path did any perm checking to begin with -- it will pass if the inode happens to be regular. Keep the redundant path_noexec() check even though it's mindless nonsense checking for guarantee that isn't given so drop the WARN. Reword the commentary and do small tidy ups while here. Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20240805131721.765484-1-mjguzik@gmail.com [brauner: keep redundant path_noexec() check] Signed-off-by: Christian Brauner [cascardo: keep exit label and use it] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Sasha Levin --- fs/exec.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index f49b352a6032..7776209d98c1 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -143,13 +143,11 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) goto out; /* - * may_open() has already checked for this, so it should be - * impossible to trip now. But we need to be extra cautious - * and check again at the very end too. + * Check do_open_execat() for an explanation. */ error = -EACCES; - if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) || - path_noexec(&file->f_path))) + if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) || + path_noexec(&file->f_path)) goto exit; error = -ENOEXEC; @@ -925,23 +923,22 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) file = do_filp_open(fd, name, &open_exec_flags); if (IS_ERR(file)) - goto out; + return file; /* - * may_open() has already checked for this, so it should be - * impossible to trip now. But we need to be extra cautious - * and check again at the very end too. + * In the past the regular type check was here. It moved to may_open() in + * 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is + * an invariant that all non-regular files error out before we get here. */ err = -EACCES; - if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) || - path_noexec(&file->f_path))) + if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) || + path_noexec(&file->f_path)) goto exit; err = deny_write_access(file); if (err) goto exit; -out: return file; exit: From 9eb32bd23bbcec44bcbef27b7f282b7a7f3d0391 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 27 Aug 2024 12:45:23 +0200 Subject: [PATCH 115/207] drm/vboxvideo: Replace fake VLA at end of vbva_mouse_pointer_shape with real VLA [ Upstream commit d92b90f9a54d9300a6e883258e79f36dab53bfae ] Replace the fake VLA at end of the vbva_mouse_pointer_shape shape with a real VLA to fix a "memcpy: detected field-spanning write error" warning: [ 13.319813] memcpy: detected field-spanning write (size 16896) of single field "p->data" at drivers/gpu/drm/vboxvideo/hgsmi_base.c:154 (size 4) [ 13.319841] WARNING: CPU: 0 PID: 1105 at drivers/gpu/drm/vboxvideo/hgsmi_base.c:154 hgsmi_update_pointer_shape+0x192/0x1c0 [vboxvideo] [ 13.320038] Call Trace: [ 13.320173] hgsmi_update_pointer_shape [vboxvideo] [ 13.320184] vbox_cursor_atomic_update [vboxvideo] Note as mentioned in the added comment it seems the original length calculation for the allocated and send hgsmi buffer is 4 bytes too large. Changing this is not the goal of this patch, so this behavior is kept. Signed-off-by: Hans de Goede Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240827104523.17442-1-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vboxvideo/hgsmi_base.c | 10 +++++++++- drivers/gpu/drm/vboxvideo/vboxvideo.h | 4 +--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vboxvideo/hgsmi_base.c b/drivers/gpu/drm/vboxvideo/hgsmi_base.c index 8c041d7ce4f1..87dccaecc3e5 100644 --- a/drivers/gpu/drm/vboxvideo/hgsmi_base.c +++ b/drivers/gpu/drm/vboxvideo/hgsmi_base.c @@ -139,7 +139,15 @@ int hgsmi_update_pointer_shape(struct gen_pool *ctx, u32 flags, flags |= VBOX_MOUSE_POINTER_VISIBLE; } - p = hgsmi_buffer_alloc(ctx, sizeof(*p) + pixel_len, HGSMI_CH_VBVA, + /* + * The 4 extra bytes come from switching struct vbva_mouse_pointer_shape + * from having a 4 bytes fixed array at the end to using a proper VLA + * at the end. These 4 extra bytes were not subtracted from sizeof(*p) + * before the switch to the VLA, so this way the behavior is unchanged. + * Chances are these 4 extra bytes are not necessary but they are kept + * to avoid regressions. + */ + p = hgsmi_buffer_alloc(ctx, sizeof(*p) + pixel_len + 4, HGSMI_CH_VBVA, VBVA_MOUSE_POINTER_SHAPE); if (!p) return -ENOMEM; diff --git a/drivers/gpu/drm/vboxvideo/vboxvideo.h b/drivers/gpu/drm/vboxvideo/vboxvideo.h index f60d82504da0..79ec8481de0e 100644 --- a/drivers/gpu/drm/vboxvideo/vboxvideo.h +++ b/drivers/gpu/drm/vboxvideo/vboxvideo.h @@ -351,10 +351,8 @@ struct vbva_mouse_pointer_shape { * Bytes in the gap between the AND and the XOR mask are undefined. * XOR mask scanlines have no gap between them and size of XOR mask is: * xor_len = width * 4 * height. - * - * Preallocate 4 bytes for accessing actual data as p->data. */ - u8 data[4]; + u8 data[]; } __packed; /* pointer is visible */ From b3d4f2329312855bb4e8877cf47f28ef9de4fcc6 Mon Sep 17 00:00:00 2001 From: David Lawrence Glanzman Date: Tue, 17 Sep 2024 00:44:08 -0400 Subject: [PATCH 116/207] ASoC: amd: yc: Add quirk for HP Dragonfly pro one [ Upstream commit 84e8d59651879b2ff8499bddbbc9549b7f1a646b ] Adds a quirk entry to enable the mic on HP Dragonfly pro one laptop Signed-off-by: David Lawrence Glanzman Link: https://patch.msgid.link/1249c09bd6bf696b59d087a4f546ae397828656c.camel@yahoo.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 248e3bcbf386..76f5d926d1ea 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -444,6 +444,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_BOARD_NAME, "8A3E"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8A7F"), + } + }, { .driver_data = &acp6x_card, .matches = { From 935e5c5f93dfb2a090c5408103d5814876ffb885 Mon Sep 17 00:00:00 2001 From: Alexey Klimov Date: Wed, 25 Sep 2024 05:38:23 +0100 Subject: [PATCH 117/207] ASoC: codecs: lpass-rx-macro: add missing CDC_RX_BCL_VBAT_RF_PROC2 to default regs values [ Upstream commit e249786b2188107a7c50e7174d35f955a60988a1 ] CDC_RX_BCL_VBAT_RF_PROC1 is listed twice and its default value is 0x2a which is overwriten by its next occurence in rx_defaults[]. The second one should be missing CDC_RX_BCL_VBAT_RF_PROC2 instead and its default value is expected 0x0. Signed-off-by: Alexey Klimov Link: https://patch.msgid.link/20240925043823.520218-2-alexey.klimov@linaro.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/lpass-rx-macro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c index 29197d34ec09..7e9f0ab91412 100644 --- a/sound/soc/codecs/lpass-rx-macro.c +++ b/sound/soc/codecs/lpass-rx-macro.c @@ -909,7 +909,7 @@ static const struct reg_default rx_defaults[] = { { CDC_RX_BCL_VBAT_PK_EST2, 0x01 }, { CDC_RX_BCL_VBAT_PK_EST3, 0x40 }, { CDC_RX_BCL_VBAT_RF_PROC1, 0x2A }, - { CDC_RX_BCL_VBAT_RF_PROC1, 0x00 }, + { CDC_RX_BCL_VBAT_RF_PROC2, 0x00 }, { CDC_RX_BCL_VBAT_TAC1, 0x00 }, { CDC_RX_BCL_VBAT_TAC2, 0x18 }, { CDC_RX_BCL_VBAT_TAC3, 0x18 }, From 85e70dcd1f8557ff7fa68d260c1806d5c32099a8 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Mon, 30 Sep 2024 14:08:28 +0800 Subject: [PATCH 118/207] ASoC: fsl_sai: Enable 'FIFO continue on error' FCONT bit [ Upstream commit 72455e33173c1a00c0ce93d2b0198eb45d5f4195 ] FCONT=1 means On FIFO error, the SAI will continue from the same word that caused the FIFO error to set after the FIFO warning flag has been cleared. Set FCONT bit in control register to avoid the channel swap issue after SAI xrun. Signed-off-by: Shengjiu Wang Link: https://patch.msgid.link/1727676508-22830-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/fsl_sai.c | 5 ++++- sound/soc/fsl/fsl_sai.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 3d202398c541..aa15f56ca139 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -604,6 +604,9 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream, val_cr4 |= FSL_SAI_CR4_FRSZ(slots); + /* Set to avoid channel swap */ + val_cr4 |= FSL_SAI_CR4_FCONT; + /* Set to output mode to avoid tri-stated data pins */ if (tx) val_cr4 |= FSL_SAI_CR4_CHMOD; @@ -690,7 +693,7 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream, regmap_update_bits(sai->regmap, FSL_SAI_xCR4(tx, ofs), FSL_SAI_CR4_SYWD_MASK | FSL_SAI_CR4_FRSZ_MASK | - FSL_SAI_CR4_CHMOD_MASK, + FSL_SAI_CR4_CHMOD_MASK | FSL_SAI_CR4_FCONT_MASK, val_cr4); regmap_update_bits(sai->regmap, FSL_SAI_xCR5(tx, ofs), FSL_SAI_CR5_WNW_MASK | FSL_SAI_CR5_W0W_MASK | diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h index 550df87b6a06..eba465c2387b 100644 --- a/sound/soc/fsl/fsl_sai.h +++ b/sound/soc/fsl/fsl_sai.h @@ -137,6 +137,7 @@ /* SAI Transmit and Receive Configuration 4 Register */ +#define FSL_SAI_CR4_FCONT_MASK BIT(28) #define FSL_SAI_CR4_FCONT BIT(28) #define FSL_SAI_CR4_FCOMB_SHIFT BIT(26) #define FSL_SAI_CR4_FCOMB_SOFT BIT(27) From 8c213cc2172485a1f3bba5369bf478b4d8a5d0fd Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 27 Sep 2024 11:18:38 +0100 Subject: [PATCH 119/207] arm64: Force position-independent veneers [ Upstream commit 9abe390e689f4f5c23c5f507754f8678431b4f72 ] Certain portions of code always need to be position-independent regardless of CONFIG_RELOCATABLE, including code which is executed in an idmap or which is executed before relocations are applied. In some kernel configurations the LLD linker generates position-dependent veneers for such code, and when executed these result in early boot-time failures. Marc Zyngier encountered a boot failure resulting from this when building a (particularly cursed) configuration with LLVM, as he reported to the list: https://lore.kernel.org/linux-arm-kernel/86wmjwvatn.wl-maz@kernel.org/ In Marc's kernel configuration, the .head.text and .rodata.text sections end up more than 128MiB apart, requiring a veneer to branch between the two: | [mark@lakrids:~/src/linux]% usekorg 14.1.0 aarch64-linux-objdump -t vmlinux | grep -w _text | ffff800080000000 g .head.text 0000000000000000 _text | [mark@lakrids:~/src/linux]% usekorg 14.1.0 aarch64-linux-objdump -t vmlinux | grep -w primary_entry | ffff8000889df0e0 g .rodata.text 000000000000006c primary_entry, ... consequently, LLD inserts a position-dependent veneer for the branch from _stext (in .head.text) to primary_entry (in .rodata.text): | ffff800080000000 <_text>: | ffff800080000000: fa405a4d ccmp x18, #0x0, #0xd, pl // pl = nfrst | ffff800080000004: 14003fff b ffff800080010000 <__AArch64AbsLongThunk_primary_entry> ... | ffff800080010000 <__AArch64AbsLongThunk_primary_entry>: | ffff800080010000: 58000050 ldr x16, ffff800080010008 <__AArch64AbsLongThunk_primary_entry+0x8> | ffff800080010004: d61f0200 br x16 | ffff800080010008: 889df0e0 .word 0x889df0e0 | ffff80008001000c: ffff8000 .word 0xffff8000 ... and as this is executed early in boot before the kernel is mapped in TTBR1 this results in a silent boot failure. Fix this by passing '--pic-veneer' to the linker, which will cause the linker to use position-independent veneers, e.g. | ffff800080000000 <_text>: | ffff800080000000: fa405a4d ccmp x18, #0x0, #0xd, pl // pl = nfrst | ffff800080000004: 14003fff b ffff800080010000 <__AArch64ADRPThunk_primary_entry> ... | ffff800080010000 <__AArch64ADRPThunk_primary_entry>: | ffff800080010000: f004e3f0 adrp x16, ffff800089c8f000 <__idmap_text_start> | ffff800080010004: 91038210 add x16, x16, #0xe0 | ffff800080010008: d61f0200 br x16 I've opted to pass '--pic-veneer' unconditionally, as: * In addition to solving the boot failure, these sequences are generally nicer as they require fewer instructions and don't need to perform data accesses. * While the position-independent veneer sequences have a limited +/-2GiB range, this is not a new restriction. Even kernels built with CONFIG_RELOCATABLE=n are limited to 2GiB in size as we have several structues using 32-bit relative offsets and PPREL32 relocations, which are similarly limited to +/-2GiB in range. These include extable entries, jump table entries, and alt_instr entries. * GNU LD defaults to using position-independent veneers, and supports the same '--pic-veneer' option, so this change is not expected to adversely affect GNU LD. I've tested with GNU LD 2.30 to 2.42 inclusive and LLVM 13.0.1 to 19.1.0 inclusive, using the kernel.org binaries from: * https://mirrors.edge.kernel.org/pub/tools/crosstool/ * https://mirrors.edge.kernel.org/pub/tools/llvm/ Signed-off-by: Mark Rutland Reported-by: Marc Zyngier Cc: Ard Biesheuvel Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Will Deacon Acked-by: Ard Biesheuvel Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20240927101838.3061054-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 9a2d3723cd0f..11782860717f 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=--no-undefined -X +LDFLAGS_vmlinux :=--no-undefined -X --pic-veneer ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour From 402c8862fe54f7999523b4251a68665209610921 Mon Sep 17 00:00:00 2001 From: Zhao Mengmeng Date: Tue, 1 Oct 2024 19:54:23 +0800 Subject: [PATCH 120/207] udf: refactor udf_current_aext() to handle error [ Upstream commit ee703a7068f95764cfb62b57db1d36e465cb9b26 ] As Jan suggested in links below, refactor udf_current_aext() to differentiate between error, hit EOF and success, it now takes pointer to etype to store the extent type, return 1 when getting etype success, return 0 when hitting EOF and return -errno when err. Link: https://lore.kernel.org/all/20240912111235.6nr3wuqvktecy3vh@quack3/ Signed-off-by: Zhao Mengmeng Suggested-by: Jan Kara Signed-off-by: Jan Kara Link: https://patch.msgid.link/20241001115425.266556-2-zhaomzhao@126.com Signed-off-by: Sasha Levin --- fs/udf/inode.c | 40 ++++++++++++++++++++++++++-------------- fs/udf/truncate.c | 10 ++++++++-- fs/udf/udfdecl.h | 5 +++-- 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 8db07d1f56bc..911be5bcb98e 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1953,6 +1953,7 @@ int udf_setup_indirect_aext(struct inode *inode, udf_pblk_t block, struct extent_position nepos; struct kernel_lb_addr neloc; int ver, adsize; + int err = 0; if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(struct short_ad); @@ -1997,10 +1998,12 @@ int udf_setup_indirect_aext(struct inode *inode, udf_pblk_t block, if (epos->offset + adsize > sb->s_blocksize) { struct kernel_lb_addr cp_loc; uint32_t cp_len; - int cp_type; + int8_t cp_type; epos->offset -= adsize; - cp_type = udf_current_aext(inode, epos, &cp_loc, &cp_len, 0); + err = udf_current_aext(inode, epos, &cp_loc, &cp_len, &cp_type, 0); + if (err <= 0) + goto err_out; cp_len |= ((uint32_t)cp_type) << 30; __udf_add_aext(inode, &nepos, &cp_loc, cp_len, 1); @@ -2015,6 +2018,9 @@ int udf_setup_indirect_aext(struct inode *inode, udf_pblk_t block, *epos = nepos; return 0; +err_out: + brelse(bh); + return err; } /* @@ -2165,9 +2171,12 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, { int8_t etype; unsigned int indirections = 0; + int ret = 0; - while ((etype = udf_current_aext(inode, epos, eloc, elen, inc)) == - (EXT_NEXT_EXTENT_ALLOCDESCS >> 30)) { + while ((ret = udf_current_aext(inode, epos, eloc, elen, + &etype, inc)) > 0) { + if (etype != (EXT_NEXT_EXTENT_ALLOCDESCS >> 30)) + break; udf_pblk_t block; if (++indirections > UDF_MAX_INDIR_EXTS) { @@ -2188,14 +2197,17 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, } } - return etype; + return ret > 0 ? etype : -1; } -int8_t udf_current_aext(struct inode *inode, struct extent_position *epos, - struct kernel_lb_addr *eloc, uint32_t *elen, int inc) +/* + * Returns 1 on success, -errno on error, 0 on hit EOF. + */ +int udf_current_aext(struct inode *inode, struct extent_position *epos, + struct kernel_lb_addr *eloc, uint32_t *elen, int8_t *etype, + int inc) { int alen; - int8_t etype; uint8_t *ptr; struct short_ad *sad; struct long_ad *lad; @@ -2222,8 +2234,8 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos, case ICBTAG_FLAG_AD_SHORT: sad = udf_get_fileshortad(ptr, alen, &epos->offset, inc); if (!sad) - return -1; - etype = le32_to_cpu(sad->extLength) >> 30; + return 0; + *etype = le32_to_cpu(sad->extLength) >> 30; eloc->logicalBlockNum = le32_to_cpu(sad->extPosition); eloc->partitionReferenceNum = iinfo->i_location.partitionReferenceNum; @@ -2232,17 +2244,17 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos, case ICBTAG_FLAG_AD_LONG: lad = udf_get_filelongad(ptr, alen, &epos->offset, inc); if (!lad) - return -1; - etype = le32_to_cpu(lad->extLength) >> 30; + return 0; + *etype = le32_to_cpu(lad->extLength) >> 30; *eloc = lelb_to_cpu(lad->extLocation); *elen = le32_to_cpu(lad->extLength) & UDF_EXTENT_LENGTH_MASK; break; default: udf_debug("alloc_type = %u unsupported\n", iinfo->i_alloc_type); - return -1; + return -EINVAL; } - return etype; + return 1; } static int udf_insert_aext(struct inode *inode, struct extent_position epos, diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index a686c10fd709..4758ba7b5f51 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -188,6 +188,7 @@ int udf_truncate_extents(struct inode *inode) loff_t byte_offset; int adsize; struct udf_inode_info *iinfo = UDF_I(inode); + int ret = 0; if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(struct short_ad); @@ -217,8 +218,8 @@ int udf_truncate_extents(struct inode *inode) else lenalloc -= sizeof(struct allocExtDesc); - while ((etype = udf_current_aext(inode, &epos, &eloc, - &elen, 0)) != -1) { + while ((ret = udf_current_aext(inode, &epos, &eloc, + &elen, &etype, 0)) > 0) { if (etype == (EXT_NEXT_EXTENT_ALLOCDESCS >> 30)) { udf_write_aext(inode, &epos, &neloc, nelen, 0); if (indirect_ext_len) { @@ -253,6 +254,11 @@ int udf_truncate_extents(struct inode *inode) } } + if (ret < 0) { + brelse(epos.bh); + return ret; + } + if (indirect_ext_len) { BUG_ON(!epos.bh); udf_free_blocks(sb, NULL, &epos.block, 0, indirect_ext_len); diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 88692512a466..d893db95ac70 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -171,8 +171,9 @@ extern void udf_write_aext(struct inode *, struct extent_position *, extern int8_t udf_delete_aext(struct inode *, struct extent_position); extern int8_t udf_next_aext(struct inode *, struct extent_position *, struct kernel_lb_addr *, uint32_t *, int); -extern int8_t udf_current_aext(struct inode *, struct extent_position *, - struct kernel_lb_addr *, uint32_t *, int); +extern int udf_current_aext(struct inode *inode, struct extent_position *epos, + struct kernel_lb_addr *eloc, uint32_t *elen, + int8_t *etype, int inc); extern void udf_update_extra_perms(struct inode *inode, umode_t mode); /* misc.c */ From 5fc8da4d326d88c12924996b2bec85a194fe27f9 Mon Sep 17 00:00:00 2001 From: Zhao Mengmeng Date: Tue, 1 Oct 2024 19:54:24 +0800 Subject: [PATCH 121/207] udf: refactor udf_next_aext() to handle error [ Upstream commit b405c1e58b73981da0f8df03b00666b22b9397ae ] Since udf_current_aext() has error handling, udf_next_aext() should have error handling too. Besides, when too many indirect extents found in one inode, return -EFSCORRUPTED; when reading block failed, return -EIO. Signed-off-by: Zhao Mengmeng Suggested-by: Jan Kara Signed-off-by: Jan Kara Link: https://patch.msgid.link/20241001115425.266556-3-zhaomzhao@126.com Signed-off-by: Sasha Levin --- fs/udf/balloc.c | 38 +++++++++----- fs/udf/directory.c | 10 +++- fs/udf/inode.c | 125 ++++++++++++++++++++++++++++++--------------- fs/udf/super.c | 3 +- fs/udf/truncate.c | 27 ++++++++-- fs/udf/udfdecl.h | 5 +- 6 files changed, 143 insertions(+), 65 deletions(-) diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index bb471ec36404..f5de2030e769 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -387,6 +387,7 @@ static void udf_table_free_blocks(struct super_block *sb, struct extent_position oepos, epos; int8_t etype; struct udf_inode_info *iinfo; + int ret = 0; mutex_lock(&sbi->s_alloc_mutex); iinfo = UDF_I(table); @@ -400,8 +401,12 @@ static void udf_table_free_blocks(struct super_block *sb, epos.block = oepos.block = iinfo->i_location; epos.bh = oepos.bh = NULL; - while (count && - (etype = udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) { + while (count) { + ret = udf_next_aext(table, &epos, &eloc, &elen, &etype, 1); + if (ret < 0) + goto error_return; + if (ret == 0) + break; if (((eloc.logicalBlockNum + (elen >> sb->s_blocksize_bits)) == start)) { if ((0x3FFFFFFF - elen) < @@ -476,11 +481,8 @@ static void udf_table_free_blocks(struct super_block *sb, adsize = sizeof(struct short_ad); else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) adsize = sizeof(struct long_ad); - else { - brelse(oepos.bh); - brelse(epos.bh); + else goto error_return; - } if (epos.offset + (2 * adsize) > sb->s_blocksize) { /* Steal a block from the extent being free'd */ @@ -496,10 +498,10 @@ static void udf_table_free_blocks(struct super_block *sb, __udf_add_aext(table, &epos, &eloc, elen, 1); } +error_return: brelse(epos.bh); brelse(oepos.bh); -error_return: mutex_unlock(&sbi->s_alloc_mutex); return; } @@ -515,6 +517,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb, struct extent_position epos; int8_t etype = -1; struct udf_inode_info *iinfo; + int ret = 0; if (first_block >= sbi->s_partmaps[partition].s_partition_len) return 0; @@ -533,11 +536,14 @@ static int udf_table_prealloc_blocks(struct super_block *sb, epos.bh = NULL; eloc.logicalBlockNum = 0xFFFFFFFF; - while (first_block != eloc.logicalBlockNum && - (etype = udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) { + while (first_block != eloc.logicalBlockNum) { + ret = udf_next_aext(table, &epos, &eloc, &elen, &etype, 1); + if (ret < 0) + goto err_out; + if (ret == 0) + break; udf_debug("eloc=%u, elen=%u, first_block=%u\n", eloc.logicalBlockNum, elen, first_block); - ; /* empty loop body */ } if (first_block == eloc.logicalBlockNum) { @@ -556,6 +562,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb, alloc_count = 0; } +err_out: brelse(epos.bh); if (alloc_count) @@ -577,6 +584,7 @@ static udf_pblk_t udf_table_new_block(struct super_block *sb, struct extent_position epos, goal_epos; int8_t etype; struct udf_inode_info *iinfo = UDF_I(table); + int ret = 0; *err = -ENOSPC; @@ -600,8 +608,10 @@ static udf_pblk_t udf_table_new_block(struct super_block *sb, epos.block = iinfo->i_location; epos.bh = goal_epos.bh = NULL; - while (spread && - (etype = udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) { + while (spread) { + ret = udf_next_aext(table, &epos, &eloc, &elen, &etype, 1); + if (ret <= 0) + break; if (goal >= eloc.logicalBlockNum) { if (goal < eloc.logicalBlockNum + (elen >> sb->s_blocksize_bits)) @@ -629,9 +639,11 @@ static udf_pblk_t udf_table_new_block(struct super_block *sb, brelse(epos.bh); - if (spread == 0xFFFFFFFF) { + if (ret < 0 || spread == 0xFFFFFFFF) { brelse(goal_epos.bh); mutex_unlock(&sbi->s_alloc_mutex); + if (ret < 0) + *err = ret; return 0; } diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 93153665eb37..c6950050e7ae 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -166,13 +166,19 @@ static struct buffer_head *udf_fiiter_bread_blk(struct udf_fileident_iter *iter) */ static int udf_fiiter_advance_blk(struct udf_fileident_iter *iter) { + int8_t etype = -1; + int err = 0; + iter->loffset++; if (iter->loffset < DIV_ROUND_UP(iter->elen, 1<dir->i_blkbits)) return 0; iter->loffset = 0; - if (udf_next_aext(iter->dir, &iter->epos, &iter->eloc, &iter->elen, 1) - != (EXT_RECORDED_ALLOCATED >> 30)) { + err = udf_next_aext(iter->dir, &iter->epos, &iter->eloc, + &iter->elen, &etype, 1); + if (err < 0) + return err; + else if (err == 0 || etype != (EXT_RECORDED_ALLOCATED >> 30)) { if (iter->pos == iter->dir->i_size) { iter->elen = 0; return 0; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 911be5bcb98e..f7623b49ec34 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -547,6 +547,7 @@ static int udf_do_extend_file(struct inode *inode, } else { struct kernel_lb_addr tmploc; uint32_t tmplen; + int8_t tmptype; udf_write_aext(inode, last_pos, &last_ext->extLocation, last_ext->extLength, 1); @@ -556,8 +557,12 @@ static int udf_do_extend_file(struct inode *inode, * more extents, we may need to enter possible following * empty indirect extent. */ - if (new_block_bytes) - udf_next_aext(inode, last_pos, &tmploc, &tmplen, 0); + if (new_block_bytes) { + err = udf_next_aext(inode, last_pos, &tmploc, &tmplen, + &tmptype, 0); + if (err < 0) + goto out_err; + } } iinfo->i_lenExtents += add; @@ -676,8 +681,10 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) extent.extLength = EXT_NOT_RECORDED_NOT_ALLOCATED; } else { epos.offset -= adsize; - etype = udf_next_aext(inode, &epos, &extent.extLocation, - &extent.extLength, 0); + err = udf_next_aext(inode, &epos, &extent.extLocation, + &extent.extLength, &etype, 0); + if (err <= 0) + goto out; extent.extLength |= etype << 30; } @@ -714,11 +721,11 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map) loff_t lbcount = 0, b_off = 0; udf_pblk_t newblocknum; sector_t offset = 0; - int8_t etype; + int8_t etype, tmpetype; struct udf_inode_info *iinfo = UDF_I(inode); udf_pblk_t goal = 0, pgoal = iinfo->i_location.logicalBlockNum; int lastblock = 0; - bool isBeyondEOF; + bool isBeyondEOF = false; int ret = 0; prev_epos.offset = udf_file_entry_alloc_offset(inode); @@ -750,9 +757,13 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map) prev_epos.offset = cur_epos.offset; cur_epos.offset = next_epos.offset; - etype = udf_next_aext(inode, &next_epos, &eloc, &elen, 1); - if (etype == -1) + ret = udf_next_aext(inode, &next_epos, &eloc, &elen, &etype, 1); + if (ret < 0) { + goto out_free; + } else if (ret == 0) { + isBeyondEOF = true; break; + } c = !c; @@ -773,13 +784,17 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map) * Move prev_epos and cur_epos into indirect extent if we are at * the pointer to it */ - udf_next_aext(inode, &prev_epos, &tmpeloc, &tmpelen, 0); - udf_next_aext(inode, &cur_epos, &tmpeloc, &tmpelen, 0); + ret = udf_next_aext(inode, &prev_epos, &tmpeloc, &tmpelen, &tmpetype, 0); + if (ret < 0) + goto out_free; + ret = udf_next_aext(inode, &cur_epos, &tmpeloc, &tmpelen, &tmpetype, 0); + if (ret < 0) + goto out_free; /* if the extent is allocated and recorded, return the block if the extent is not a multiple of the blocksize, round up */ - if (etype == (EXT_RECORDED_ALLOCATED >> 30)) { + if (!isBeyondEOF && etype == (EXT_RECORDED_ALLOCATED >> 30)) { if (elen & (inode->i_sb->s_blocksize - 1)) { elen = EXT_RECORDED_ALLOCATED | ((elen + inode->i_sb->s_blocksize - 1) & @@ -795,10 +810,9 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map) } /* Are we beyond EOF and preallocated extent? */ - if (etype == -1) { + if (isBeyondEOF) { loff_t hole_len; - isBeyondEOF = true; if (count) { if (c) laarr[0] = laarr[1]; @@ -834,7 +848,6 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map) endnum = c + 1; lastblock = 1; } else { - isBeyondEOF = false; endnum = startnum = ((count > 2) ? 2 : count); /* if the current extent is in position 0, @@ -848,15 +861,17 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map) /* if the current block is located in an extent, read the next extent */ - etype = udf_next_aext(inode, &next_epos, &eloc, &elen, 0); - if (etype != -1) { + ret = udf_next_aext(inode, &next_epos, &eloc, &elen, &etype, 0); + if (ret > 0) { laarr[c + 1].extLength = (etype << 30) | elen; laarr[c + 1].extLocation = eloc; count++; startnum++; endnum++; - } else + } else if (ret == 0) lastblock = 1; + else + goto out_free; } /* if the current extent is not recorded but allocated, get the @@ -1174,6 +1189,7 @@ static int udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr, int start = 0, i; struct kernel_lb_addr tmploc; uint32_t tmplen; + int8_t tmpetype; int err; if (startnum > endnum) { @@ -1191,14 +1207,19 @@ static int udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr, */ if (err < 0) return err; - udf_next_aext(inode, epos, &laarr[i].extLocation, - &laarr[i].extLength, 1); + err = udf_next_aext(inode, epos, &laarr[i].extLocation, + &laarr[i].extLength, &tmpetype, 1); + if (err < 0) + return err; start++; } } for (i = start; i < endnum; i++) { - udf_next_aext(inode, epos, &tmploc, &tmplen, 0); + err = udf_next_aext(inode, epos, &tmploc, &tmplen, &tmpetype, 0); + if (err < 0) + return err; + udf_write_aext(inode, epos, &laarr[i].extLocation, laarr[i].extLength, 1); } @@ -2166,24 +2187,30 @@ void udf_write_aext(struct inode *inode, struct extent_position *epos, */ #define UDF_MAX_INDIR_EXTS 16 -int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, - struct kernel_lb_addr *eloc, uint32_t *elen, int inc) +/* + * Returns 1 on success, -errno on error, 0 on hit EOF. + */ +int udf_next_aext(struct inode *inode, struct extent_position *epos, + struct kernel_lb_addr *eloc, uint32_t *elen, int8_t *etype, + int inc) { - int8_t etype; unsigned int indirections = 0; int ret = 0; + udf_pblk_t block; - while ((ret = udf_current_aext(inode, epos, eloc, elen, - &etype, inc)) > 0) { - if (etype != (EXT_NEXT_EXTENT_ALLOCDESCS >> 30)) - break; - udf_pblk_t block; + while (1) { + ret = udf_current_aext(inode, epos, eloc, elen, + etype, inc); + if (ret <= 0) + return ret; + if (*etype != (EXT_NEXT_EXTENT_ALLOCDESCS >> 30)) + return ret; if (++indirections > UDF_MAX_INDIR_EXTS) { udf_err(inode->i_sb, "too many indirect extents in inode %lu\n", inode->i_ino); - return -1; + return -EFSCORRUPTED; } epos->block = *eloc; @@ -2193,11 +2220,9 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, epos->bh = sb_bread(inode->i_sb, block); if (!epos->bh) { udf_debug("reading block %u failed!\n", block); - return -1; + return -EIO; } } - - return ret > 0 ? etype : -1; } /* @@ -2263,20 +2288,24 @@ static int udf_insert_aext(struct inode *inode, struct extent_position epos, struct kernel_lb_addr oeloc; uint32_t oelen; int8_t etype; - int err; + int ret; if (epos.bh) get_bh(epos.bh); - while ((etype = udf_next_aext(inode, &epos, &oeloc, &oelen, 0)) != -1) { + while (1) { + ret = udf_next_aext(inode, &epos, &oeloc, &oelen, &etype, 0); + if (ret <= 0) + break; udf_write_aext(inode, &epos, &neloc, nelen, 1); neloc = oeloc; nelen = (etype << 30) | oelen; } - err = udf_add_aext(inode, &epos, &neloc, nelen, 1); + if (ret == 0) + ret = udf_add_aext(inode, &epos, &neloc, nelen, 1); brelse(epos.bh); - return err; + return ret; } int8_t udf_delete_aext(struct inode *inode, struct extent_position epos) @@ -2288,6 +2317,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos) struct udf_inode_info *iinfo; struct kernel_lb_addr eloc; uint32_t elen; + int ret; if (epos.bh) { get_bh(epos.bh); @@ -2303,10 +2333,18 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos) adsize = 0; oepos = epos; - if (udf_next_aext(inode, &epos, &eloc, &elen, 1) == -1) + if (udf_next_aext(inode, &epos, &eloc, &elen, &etype, 1) <= 0) return -1; - while ((etype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { + while (1) { + ret = udf_next_aext(inode, &epos, &eloc, &elen, &etype, 1); + if (ret < 0) { + brelse(epos.bh); + brelse(oepos.bh); + return -1; + } + if (ret == 0) + break; udf_write_aext(inode, &oepos, &eloc, (etype << 30) | elen, 1); if (oepos.bh != epos.bh) { oepos.block = epos.block; @@ -2371,6 +2409,7 @@ int8_t inode_bmap(struct inode *inode, sector_t block, loff_t lbcount = 0, bcount = (loff_t) block << blocksize_bits; int8_t etype; struct udf_inode_info *iinfo; + int err = 0; iinfo = UDF_I(inode); if (!udf_read_extent_cache(inode, bcount, &lbcount, pos)) { @@ -2380,10 +2419,12 @@ int8_t inode_bmap(struct inode *inode, sector_t block, } *elen = 0; do { - etype = udf_next_aext(inode, pos, eloc, elen, 1); - if (etype == -1) { - *offset = (bcount - lbcount) >> blocksize_bits; - iinfo->i_lenExtents = lbcount; + err = udf_next_aext(inode, pos, eloc, elen, &etype, 1); + if (err <= 0) { + if (err == 0) { + *offset = (bcount - lbcount) >> blocksize_bits; + iinfo->i_lenExtents = lbcount; + } return -1; } lbcount += *elen; diff --git a/fs/udf/super.c b/fs/udf/super.c index 3c78535f406b..20dff9ed2471 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -2454,13 +2454,14 @@ static unsigned int udf_count_free_table(struct super_block *sb, uint32_t elen; struct kernel_lb_addr eloc; struct extent_position epos; + int8_t etype; mutex_lock(&UDF_SB(sb)->s_alloc_mutex); epos.block = UDF_I(table)->i_location; epos.offset = sizeof(struct unallocSpaceEntry); epos.bh = NULL; - while (udf_next_aext(table, &epos, &eloc, &elen, 1) != -1) + while (udf_next_aext(table, &epos, &eloc, &elen, &etype, 1) > 0) accum += (elen >> table->i_sb->s_blocksize_bits); brelse(epos.bh); diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 4758ba7b5f51..399958f891d1 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -69,6 +69,7 @@ void udf_truncate_tail_extent(struct inode *inode) int8_t etype = -1, netype; int adsize; struct udf_inode_info *iinfo = UDF_I(inode); + int ret; if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB || inode->i_size == iinfo->i_lenExtents) @@ -85,7 +86,10 @@ void udf_truncate_tail_extent(struct inode *inode) BUG(); /* Find the last extent in the file */ - while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { + while (1) { + ret = udf_next_aext(inode, &epos, &eloc, &elen, &netype, 1); + if (ret <= 0) + break; etype = netype; lbcount += elen; if (lbcount > inode->i_size) { @@ -101,7 +105,8 @@ void udf_truncate_tail_extent(struct inode *inode) epos.offset -= adsize; extent_trunc(inode, &epos, &eloc, etype, elen, nelen); epos.offset += adsize; - if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1) + if (udf_next_aext(inode, &epos, &eloc, &elen, + &netype, 1) > 0) udf_err(inode->i_sb, "Extent after EOF in inode %u\n", (unsigned)inode->i_ino); @@ -110,7 +115,8 @@ void udf_truncate_tail_extent(struct inode *inode) } /* This inode entry is in-memory only and thus we don't have to mark * the inode dirty */ - iinfo->i_lenExtents = inode->i_size; + if (ret == 0) + iinfo->i_lenExtents = inode->i_size; brelse(epos.bh); } @@ -124,6 +130,8 @@ void udf_discard_prealloc(struct inode *inode) int8_t etype = -1; struct udf_inode_info *iinfo = UDF_I(inode); int bsize = i_blocksize(inode); + int8_t tmpetype = -1; + int ret; if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB || ALIGN(inode->i_size, bsize) == ALIGN(iinfo->i_lenExtents, bsize)) @@ -132,15 +140,23 @@ void udf_discard_prealloc(struct inode *inode) epos.block = iinfo->i_location; /* Find the last extent in the file */ - while (udf_next_aext(inode, &epos, &eloc, &elen, 0) != -1) { + while (1) { + ret = udf_next_aext(inode, &epos, &eloc, &elen, &tmpetype, 0); + if (ret < 0) + goto out; + if (ret == 0) + break; brelse(prev_epos.bh); prev_epos = epos; if (prev_epos.bh) get_bh(prev_epos.bh); - etype = udf_next_aext(inode, &epos, &eloc, &elen, 1); + ret = udf_next_aext(inode, &epos, &eloc, &elen, &etype, 1); + if (ret < 0) + goto out; lbcount += elen; } + if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { lbcount -= elen; udf_delete_aext(inode, prev_epos); @@ -150,6 +166,7 @@ void udf_discard_prealloc(struct inode *inode) /* This inode entry is in-memory only and thus we don't have to mark * the inode dirty */ iinfo->i_lenExtents = lbcount; +out: brelse(epos.bh); brelse(prev_epos.bh); } diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index d893db95ac70..5067ed68a8b4 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -169,8 +169,9 @@ extern int udf_add_aext(struct inode *, struct extent_position *, extern void udf_write_aext(struct inode *, struct extent_position *, struct kernel_lb_addr *, uint32_t, int); extern int8_t udf_delete_aext(struct inode *, struct extent_position); -extern int8_t udf_next_aext(struct inode *, struct extent_position *, - struct kernel_lb_addr *, uint32_t *, int); +extern int udf_next_aext(struct inode *inode, struct extent_position *epos, + struct kernel_lb_addr *eloc, uint32_t *elen, + int8_t *etype, int inc); extern int udf_current_aext(struct inode *inode, struct extent_position *epos, struct kernel_lb_addr *eloc, uint32_t *elen, int8_t *etype, int inc); From 493447dd8336607fce426f7879e581095f6c606e Mon Sep 17 00:00:00 2001 From: Zhao Mengmeng Date: Tue, 1 Oct 2024 19:54:25 +0800 Subject: [PATCH 122/207] udf: refactor inode_bmap() to handle error [ Upstream commit c226964ec786f3797ed389a16392ce4357697d24 ] Refactor inode_bmap() to handle error since udf_next_aext() can return error now. On situations like ftruncate, udf_extend_file() can now detect errors and bail out early without resorting to checking for particular offsets and assuming internal behavior of these functions. Reported-by: syzbot+7a4842f0b1801230a989@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7a4842f0b1801230a989 Tested-by: syzbot+7a4842f0b1801230a989@syzkaller.appspotmail.com Signed-off-by: Zhao Mengmeng Suggested-by: Jan Kara Signed-off-by: Jan Kara Link: https://patch.msgid.link/20241001115425.266556-4-zhaomzhao@126.com Signed-off-by: Sasha Levin --- fs/udf/directory.c | 13 ++++++++----- fs/udf/inode.c | 40 +++++++++++++++++++++++++--------------- fs/udf/partition.c | 6 ++++-- fs/udf/truncate.c | 6 ++++-- fs/udf/udfdecl.h | 5 +++-- 5 files changed, 44 insertions(+), 26 deletions(-) diff --git a/fs/udf/directory.c b/fs/udf/directory.c index c6950050e7ae..632453aa3893 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -246,6 +246,7 @@ int udf_fiiter_init(struct udf_fileident_iter *iter, struct inode *dir, { struct udf_inode_info *iinfo = UDF_I(dir); int err = 0; + int8_t etype; iter->dir = dir; iter->bh[0] = iter->bh[1] = NULL; @@ -265,9 +266,9 @@ int udf_fiiter_init(struct udf_fileident_iter *iter, struct inode *dir, goto out; } - if (inode_bmap(dir, iter->pos >> dir->i_blkbits, &iter->epos, - &iter->eloc, &iter->elen, &iter->loffset) != - (EXT_RECORDED_ALLOCATED >> 30)) { + err = inode_bmap(dir, iter->pos >> dir->i_blkbits, &iter->epos, + &iter->eloc, &iter->elen, &iter->loffset, &etype); + if (err <= 0 || etype != (EXT_RECORDED_ALLOCATED >> 30)) { if (pos == dir->i_size) return 0; udf_err(dir->i_sb, @@ -463,6 +464,7 @@ int udf_fiiter_append_blk(struct udf_fileident_iter *iter) sector_t block; uint32_t old_elen = iter->elen; int err; + int8_t etype; if (WARN_ON_ONCE(iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)) return -EINVAL; @@ -477,8 +479,9 @@ int udf_fiiter_append_blk(struct udf_fileident_iter *iter) udf_fiiter_update_elen(iter, old_elen); return err; } - if (inode_bmap(iter->dir, block, &iter->epos, &iter->eloc, &iter->elen, - &iter->loffset) != (EXT_RECORDED_ALLOCATED >> 30)) { + err = inode_bmap(iter->dir, block, &iter->epos, &iter->eloc, &iter->elen, + &iter->loffset, &etype); + if (err <= 0 || etype != (EXT_RECORDED_ALLOCATED >> 30)) { udf_err(iter->dir->i_sb, "block %llu not allocated in directory (ino %lu)\n", (unsigned long long)block, iter->dir->i_ino); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index f7623b49ec34..37fa27136faf 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -408,7 +408,7 @@ struct udf_map_rq { static int udf_map_block(struct inode *inode, struct udf_map_rq *map) { - int err; + int ret; struct udf_inode_info *iinfo = UDF_I(inode); if (WARN_ON_ONCE(iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)) @@ -420,18 +420,24 @@ static int udf_map_block(struct inode *inode, struct udf_map_rq *map) uint32_t elen; sector_t offset; struct extent_position epos = {}; + int8_t etype; down_read(&iinfo->i_data_sem); - if (inode_bmap(inode, map->lblk, &epos, &eloc, &elen, &offset) - == (EXT_RECORDED_ALLOCATED >> 30)) { + ret = inode_bmap(inode, map->lblk, &epos, &eloc, &elen, &offset, + &etype); + if (ret < 0) + goto out_read; + if (ret > 0 && etype == (EXT_RECORDED_ALLOCATED >> 30)) { map->pblk = udf_get_lb_pblock(inode->i_sb, &eloc, offset); map->oflags |= UDF_BLK_MAPPED; + ret = 0; } +out_read: up_read(&iinfo->i_data_sem); brelse(epos.bh); - return 0; + return ret; } down_write(&iinfo->i_data_sem); @@ -442,9 +448,9 @@ static int udf_map_block(struct inode *inode, struct udf_map_rq *map) if (((loff_t)map->lblk) << inode->i_blkbits >= iinfo->i_lenExtents) udf_discard_prealloc(inode); udf_clear_extent_cache(inode); - err = inode_getblk(inode, map); + ret = inode_getblk(inode, map); up_write(&iinfo->i_data_sem); - return err; + return ret; } static int __udf_get_block(struct inode *inode, sector_t block, @@ -666,8 +672,10 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) */ udf_discard_prealloc(inode); - etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset); - within_last_ext = (etype != -1); + err = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset, &etype); + if (err < 0) + goto out; + within_last_ext = (err == 1); /* We don't expect extents past EOF... */ WARN_ON_ONCE(within_last_ext && elen > ((loff_t)offset + 1) << inode->i_blkbits); @@ -2401,13 +2409,15 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos) return (elen >> 30); } -int8_t inode_bmap(struct inode *inode, sector_t block, - struct extent_position *pos, struct kernel_lb_addr *eloc, - uint32_t *elen, sector_t *offset) +/* + * Returns 1 on success, -errno on error, 0 on hit EOF. + */ +int inode_bmap(struct inode *inode, sector_t block, struct extent_position *pos, + struct kernel_lb_addr *eloc, uint32_t *elen, sector_t *offset, + int8_t *etype) { unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; loff_t lbcount = 0, bcount = (loff_t) block << blocksize_bits; - int8_t etype; struct udf_inode_info *iinfo; int err = 0; @@ -2419,13 +2429,13 @@ int8_t inode_bmap(struct inode *inode, sector_t block, } *elen = 0; do { - err = udf_next_aext(inode, pos, eloc, elen, &etype, 1); + err = udf_next_aext(inode, pos, eloc, elen, etype, 1); if (err <= 0) { if (err == 0) { *offset = (bcount - lbcount) >> blocksize_bits; iinfo->i_lenExtents = lbcount; } - return -1; + return err; } lbcount += *elen; } while (lbcount <= bcount); @@ -2433,5 +2443,5 @@ int8_t inode_bmap(struct inode *inode, sector_t block, udf_update_extent_cache(inode, lbcount - *elen, pos); *offset = (bcount + *elen - lbcount) >> blocksize_bits; - return etype; + return 1; } diff --git a/fs/udf/partition.c b/fs/udf/partition.c index af877991edc1..2b85c9501bed 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -282,9 +282,11 @@ static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block, sector_t ext_offset; struct extent_position epos = {}; uint32_t phyblock; + int8_t etype; + int err = 0; - if (inode_bmap(inode, block, &epos, &eloc, &elen, &ext_offset) != - (EXT_RECORDED_ALLOCATED >> 30)) + err = inode_bmap(inode, block, &epos, &eloc, &elen, &ext_offset, &etype); + if (err <= 0 || etype != (EXT_RECORDED_ALLOCATED >> 30)) phyblock = 0xFFFFFFFF; else { map = &UDF_SB(sb)->s_partmaps[partition]; diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 399958f891d1..4f33a4a48886 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -214,10 +214,12 @@ int udf_truncate_extents(struct inode *inode) else BUG(); - etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset); + ret = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset, &etype); + if (ret < 0) + return ret; byte_offset = (offset << sb->s_blocksize_bits) + (inode->i_size & (sb->s_blocksize - 1)); - if (etype == -1) { + if (ret == 0) { /* We should extend the file? */ WARN_ON(byte_offset); return 0; diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 5067ed68a8b4..d159f20d61e8 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -157,8 +157,9 @@ extern struct buffer_head *udf_bread(struct inode *inode, udf_pblk_t block, extern int udf_setsize(struct inode *, loff_t); extern void udf_evict_inode(struct inode *); extern int udf_write_inode(struct inode *, struct writeback_control *wbc); -extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *, - struct kernel_lb_addr *, uint32_t *, sector_t *); +extern int inode_bmap(struct inode *inode, sector_t block, + struct extent_position *pos, struct kernel_lb_addr *eloc, + uint32_t *elen, sector_t *offset, int8_t *etype); int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); extern int udf_setup_indirect_aext(struct inode *inode, udf_pblk_t block, struct extent_position *epos); From 1ac49babc952f48d82676979b20885e480e69be8 Mon Sep 17 00:00:00 2001 From: Gianfranco Trad Date: Wed, 25 Sep 2024 09:46:15 +0200 Subject: [PATCH 123/207] udf: fix uninit-value use in udf_get_fileshortad [ Upstream commit 264db9d666ad9a35075cc9ed9ec09d021580fbb1 ] Check for overflow when computing alen in udf_current_aext to mitigate later uninit-value use in udf_get_fileshortad KMSAN bug[1]. After applying the patch reproducer did not trigger any issue[2]. [1] https://syzkaller.appspot.com/bug?extid=8901c4560b7ab5c2f9df [2] https://syzkaller.appspot.com/x/log.txt?x=10242227980000 Reported-by: syzbot+8901c4560b7ab5c2f9df@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=8901c4560b7ab5c2f9df Tested-by: syzbot+8901c4560b7ab5c2f9df@syzkaller.appspotmail.com Suggested-by: Jan Kara Signed-off-by: Gianfranco Trad Signed-off-by: Jan Kara Link: https://patch.msgid.link/20240925074613.8475-3-gianf.trad@gmail.com Signed-off-by: Sasha Levin --- fs/udf/inode.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 37fa27136faf..e98c198f85b9 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -2255,12 +2255,15 @@ int udf_current_aext(struct inode *inode, struct extent_position *epos, alen = udf_file_entry_alloc_offset(inode) + iinfo->i_lenAlloc; } else { + struct allocExtDesc *header = + (struct allocExtDesc *)epos->bh->b_data; + if (!epos->offset) epos->offset = sizeof(struct allocExtDesc); ptr = epos->bh->b_data + epos->offset; - alen = sizeof(struct allocExtDesc) + - le32_to_cpu(((struct allocExtDesc *)epos->bh->b_data)-> - lengthAllocDescs); + if (check_add_overflow(sizeof(struct allocExtDesc), + le32_to_cpu(header->lengthAllocDescs), &alen)) + return -1; } switch (iinfo->i_alloc_type) { From f47a40ee4cdba6191eae14879105dc3c75c60fb2 Mon Sep 17 00:00:00 2001 From: Alexey Klimov Date: Wed, 2 Oct 2024 03:20:10 +0100 Subject: [PATCH 124/207] ASoC: qcom: sm8250: add qrb4210-rb2-sndcard compatible string [ Upstream commit b97bc0656a66f89f78098d4d72dc04fa9518ab11 ] Add "qcom,qrb4210-rb2-sndcard" to the list of recognizable devices. Signed-off-by: Alexey Klimov Link: https://patch.msgid.link/20241002022015.867031-3-alexey.klimov@linaro.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/qcom/sm8250.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/qcom/sm8250.c b/sound/soc/qcom/sm8250.c index 6558bf2e14e8..9eb8ae0196d9 100644 --- a/sound/soc/qcom/sm8250.c +++ b/sound/soc/qcom/sm8250.c @@ -153,6 +153,7 @@ static int sm8250_platform_probe(struct platform_device *pdev) static const struct of_device_id snd_sm8250_dt_match[] = { {.compatible = "qcom,sm8250-sndcard"}, + {.compatible = "qcom,qrb4210-rb2-sndcard"}, {.compatible = "qcom,qrb5165-rb5-sndcard"}, {} }; From 6d52db315cfc68c2ff59a853f4311809fe7e7e83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 28 Sep 2024 23:59:47 +0200 Subject: [PATCH 125/207] cifs: Validate content of NFS reparse point buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 556ac52bb1e76cc28fd30aa117b42989965b3efd ] Symlink target location stored in DataBuffer is encoded in UTF-16. So check that symlink DataBuffer length is non-zero and even number. And check that DataBuffer does not contain UTF-16 null codepoint because Linux cannot process symlink with null byte. DataBuffer for char and block devices is 8 bytes long as it contains two 32-bit numbers (major and minor). Add check for this. DataBuffer buffer for sockets and fifos zero-length. Add checks for this. Signed-off-by: Pali Rohár Reviewed-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/reparse.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index ad0e0de9a165..7429b96a6ae5 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -330,6 +330,18 @@ static int parse_reparse_posix(struct reparse_posix_data *buf, switch ((type = le64_to_cpu(buf->InodeType))) { case NFS_SPECFILE_LNK: + if (len == 0 || (len % 2)) { + cifs_dbg(VFS, "srv returned malformed nfs symlink buffer\n"); + return -EIO; + } + /* + * Check that buffer does not contain UTF-16 null codepoint + * because Linux cannot process symlink with null byte. + */ + if (UniStrnlen((wchar_t *)buf->DataBuffer, len/2) != len/2) { + cifs_dbg(VFS, "srv returned null byte in nfs symlink target location\n"); + return -EIO; + } data->symlink_target = cifs_strndup_from_utf16(buf->DataBuffer, len, true, cifs_sb->local_nls); @@ -340,8 +352,19 @@ static int parse_reparse_posix(struct reparse_posix_data *buf, break; case NFS_SPECFILE_CHR: case NFS_SPECFILE_BLK: + /* DataBuffer for block and char devices contains two 32-bit numbers */ + if (len != 8) { + cifs_dbg(VFS, "srv returned malformed nfs buffer for type: 0x%llx\n", type); + return -EIO; + } + break; case NFS_SPECFILE_FIFO: case NFS_SPECFILE_SOCK: + /* DataBuffer for fifos and sockets is empty */ + if (len != 0) { + cifs_dbg(VFS, "srv returned malformed nfs buffer for type: 0x%llx\n", type); + return -EIO; + } break; default: cifs_dbg(VFS, "%s: unhandled inode type: 0x%llx\n", From 3d75f500f64de0a1cdb5648762cd689dc731e6fa Mon Sep 17 00:00:00 2001 From: Crag Wang Date: Fri, 4 Oct 2024 23:27:58 +0800 Subject: [PATCH 126/207] platform/x86: dell-sysman: add support for alienware products [ Upstream commit a561509b4187a8908eb7fbb2d1bf35bbc20ec74b ] Alienware supports firmware-attributes and has its own OEM string. Signed-off-by: Crag Wang Link: https://lore.kernel.org/r/20241004152826.93992-1-crag_wang@dell.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/dell/dell-wmi-sysman/sysman.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c index b929b4f82420..af49dd6b31ad 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c @@ -521,6 +521,7 @@ static int __init sysman_init(void) int ret = 0; if (!dmi_find_device(DMI_DEV_TYPE_OEM_STRING, "Dell System", NULL) && + !dmi_find_device(DMI_DEV_TYPE_OEM_STRING, "Alienware", NULL) && !dmi_find_device(DMI_DEV_TYPE_OEM_STRING, "www.dell.com", NULL)) { pr_err("Unable to run on non-Dell system\n"); return -ENODEV; From a94c197d4d749954dfaa37e907fcc8c04e4aad7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 21 Oct 2024 22:11:19 +0800 Subject: [PATCH 127/207] LoongArch: Don't crash in stack_top() for tasks without vDSO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 134475a9ab8487527238d270639a8cb74c10aab2 ] Not all tasks have a vDSO mapped, for example kthreads never do. If such a task ever ends up calling stack_top(), it will derefence the NULL vdso pointer and crash. This can for example happen when using kunit: [<9000000000203874>] stack_top+0x58/0xa8 [<90000000002956cc>] arch_pick_mmap_layout+0x164/0x220 [<90000000003c284c>] kunit_vm_mmap_init+0x108/0x12c [<90000000003c1fbc>] __kunit_add_resource+0x38/0x8c [<90000000003c2704>] kunit_vm_mmap+0x88/0xc8 [<9000000000410b14>] usercopy_test_init+0xbc/0x25c [<90000000003c1db4>] kunit_try_run_case+0x5c/0x184 [<90000000003c3d54>] kunit_generic_run_threadfn_adapter+0x24/0x48 [<900000000022e4bc>] kthread+0xc8/0xd4 [<9000000000200ce8>] ret_from_kernel_thread+0xc/0xa4 Fixes: 803b0fc5c3f2 ("LoongArch: Add process management") Signed-off-by: Thomas Weißschuh Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/kernel/process.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index f2ff8b5d591e..6e58f65455c7 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -293,13 +293,15 @@ unsigned long stack_top(void) { unsigned long top = TASK_SIZE & PAGE_MASK; - /* Space for the VDSO & data page */ - top -= PAGE_ALIGN(current->thread.vdso->size); - top -= VVAR_SIZE; + if (current->thread.vdso) { + /* Space for the VDSO & data page */ + top -= PAGE_ALIGN(current->thread.vdso->size); + top -= VVAR_SIZE; - /* Space to randomize the VDSO base */ - if (current->flags & PF_RANDOMIZE) - top -= VDSO_RANDOMIZE_SIZE; + /* Space to randomize the VDSO base */ + if (current->flags & PF_RANDOMIZE) + top -= VDSO_RANDOMIZE_SIZE; + } return top; } From f475d8a0cca76da393c361932f5fd26cda75236b Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Tue, 22 Oct 2024 09:40:37 -0500 Subject: [PATCH 128/207] jfs: Fix sanity check in dbMount [ Upstream commit 67373ca8404fe57eb1bb4b57f314cff77ce54932 ] MAXAG is a legitimate value for bmp->db_numag Fixes: e63866a47556 ("jfs: fix out-of-bounds in dbNextAG() and diAlloc()") Signed-off-by: Dave Kleikamp Signed-off-by: Sasha Levin --- fs/jfs/jfs_dmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 974ecf5e0d95..3ab410059dc2 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -187,7 +187,7 @@ int dbMount(struct inode *ipbmap) } bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); - if (!bmp->db_numag || bmp->db_numag >= MAXAG) { + if (!bmp->db_numag || bmp->db_numag > MAXAG) { err = -EINVAL; goto err_release_metapage; } From 6bc24db74fe4788cc7c2f30a113fc6aafba225a3 Mon Sep 17 00:00:00 2001 From: Mikel Rychliski Date: Mon, 30 Sep 2024 16:26:54 -0400 Subject: [PATCH 129/207] tracing/probes: Fix MAX_TRACE_ARGS limit handling [ Upstream commit 73f35080477e893aa6f4c8d388352b871b288fbc ] When creating a trace_probe we would set nr_args prior to truncating the arguments to MAX_TRACE_ARGS. However, we would only initialize arguments up to the limit. This caused invalid memory access when attempting to set up probes with more than 128 fetchargs. BUG: kernel NULL pointer dereference, address: 0000000000000020 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] PREEMPT SMP PTI CPU: 0 UID: 0 PID: 1769 Comm: cat Not tainted 6.11.0-rc7+ #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-1.fc39 04/01/2014 RIP: 0010:__set_print_fmt+0x134/0x330 Resolve the issue by applying the MAX_TRACE_ARGS limit earlier. Return an error when there are too many arguments instead of silently truncating. Link: https://lore.kernel.org/all/20240930202656.292869-1-mikel@mikelr.com/ Fixes: 035ba76014c0 ("tracing/probes: cleanup: Set trace_probe::nr_args at trace_probe_init") Signed-off-by: Mikel Rychliski Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace_eprobe.c | 7 ++++++- kernel/trace/trace_fprobe.c | 6 +++++- kernel/trace/trace_kprobe.c | 6 +++++- kernel/trace/trace_uprobe.c | 4 +++- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index b03bc30f85ee..31bb977670bd 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -915,6 +915,11 @@ static int __trace_eprobe_create(int argc, const char *argv[]) } } + if (argc - 2 > MAX_TRACE_ARGS) { + ret = -E2BIG; + goto error; + } + mutex_lock(&event_mutex); event_call = find_and_get_event(sys_name, sys_event); ep = alloc_event_probe(group, event, event_call, argc - 2); @@ -940,7 +945,7 @@ static int __trace_eprobe_create(int argc, const char *argv[]) argc -= 2; argv += 2; /* parse arguments */ - for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { + for (i = 0; i < argc; i++) { trace_probe_log_set_index(i + 2); ret = trace_eprobe_tp_update_arg(ep, argv, i); if (ret) diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 4f4280815522..f26bb8a90cb5 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -1103,6 +1103,10 @@ static int __trace_fprobe_create(int argc, const char *argv[]) argc = new_argc; argv = new_argv; } + if (argc > MAX_TRACE_ARGS) { + ret = -E2BIG; + goto out; + } /* setup a probe */ tf = alloc_trace_fprobe(group, event, symbol, tpoint, maxactive, @@ -1119,7 +1123,7 @@ static int __trace_fprobe_create(int argc, const char *argv[]) (unsigned long)tf->tpoint->probestub); /* parse arguments */ - for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { + for (i = 0; i < argc; i++) { trace_probe_log_set_index(i + 2); ctx.offset = 0; ret = traceprobe_parse_probe_arg(&tf->tp, i, argv[i], &ctx); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 32c617123f37..12d997bb3e78 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -932,6 +932,10 @@ static int __trace_kprobe_create(int argc, const char *argv[]) argc = new_argc; argv = new_argv; } + if (argc > MAX_TRACE_ARGS) { + ret = -E2BIG; + goto out; + } /* setup a probe */ tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive, @@ -944,7 +948,7 @@ static int __trace_kprobe_create(int argc, const char *argv[]) } /* parse arguments */ - for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { + for (i = 0; i < argc; i++) { trace_probe_log_set_index(i + 2); ctx.offset = 0; ret = traceprobe_parse_probe_arg(&tk->tp, i, argv[i], &ctx); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index d72ac9dde532..3e7d92d2650b 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -556,6 +556,8 @@ static int __trace_uprobe_create(int argc, const char **argv) if (argc < 2) return -ECANCELED; + if (argc - 2 > MAX_TRACE_ARGS) + return -E2BIG; if (argv[0][1] == ':') event = &argv[0][2]; @@ -681,7 +683,7 @@ static int __trace_uprobe_create(int argc, const char **argv) tu->filename = filename; /* parse arguments */ - for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { + for (i = 0; i < argc; i++) { struct traceprobe_parse_context ctx = { .flags = (is_return ? TPARG_FL_RETURN : 0) | TPARG_FL_USER, }; From a14a075a14af8d622c576145455702591bdde09d Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 7 Oct 2024 15:47:24 +0100 Subject: [PATCH 130/207] tracing: Consider the NULL character when validating the event length [ Upstream commit 0b6e2e22cb23105fcb171ab92f0f7516c69c8471 ] strlen() returns a string length excluding the null byte. If the string length equals to the maximum buffer length, the buffer will have no space for the NULL terminating character. This commit checks this condition and returns failure for it. Link: https://lore.kernel.org/all/20241007144724.920954-1-leo.yan@arm.com/ Fixes: dec65d79fd26 ("tracing/probe: Check event name length correctly") Signed-off-by: Leo Yan Reviewed-by: Steven Rostedt (Google) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace_probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 58a6275c7f49..a1bc49de648f 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -275,7 +275,7 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, } trace_probe_log_err(offset, NO_EVENT_NAME); return -EINVAL; - } else if (len > MAX_EVENT_NAME_LEN) { + } else if (len >= MAX_EVENT_NAME_LEN) { trace_probe_log_err(offset, EVENT_TOO_LONG); return -EINVAL; } From ac1d820eaa309c7cd80f151084b8dab15a5f2034 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Mon, 2 Sep 2024 17:07:09 -0700 Subject: [PATCH 131/207] xfrm: extract dst lookup parameters into a struct [ Upstream commit e509996b16728e37d5a909a5c63c1bd64f23b306 ] Preparation for adding more fields to dst lookup functions without changing their signatures. Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert Stable-dep-of: b84697210343 ("xfrm: respect ip protocols rules criteria when performing dst lookups") Signed-off-by: Sasha Levin --- include/net/xfrm.h | 26 +++++++++++++------------- net/ipv4/xfrm4_policy.c | 36 +++++++++++++++--------------------- net/ipv6/xfrm6_policy.c | 28 +++++++++++++--------------- net/xfrm/xfrm_device.c | 11 ++++++++--- net/xfrm/xfrm_policy.c | 35 +++++++++++++++++++++++------------ 5 files changed, 72 insertions(+), 64 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b280e7c46011..93207d87e1c7 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -342,20 +342,23 @@ struct xfrm_if_cb { void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb); void xfrm_if_unregister_cb(void); +struct xfrm_dst_lookup_params { + struct net *net; + int tos; + int oif; + xfrm_address_t *saddr; + xfrm_address_t *daddr; + u32 mark; +}; + struct net_device; struct xfrm_type; struct xfrm_dst; struct xfrm_policy_afinfo { struct dst_ops *dst_ops; - struct dst_entry *(*dst_lookup)(struct net *net, - int tos, int oif, - const xfrm_address_t *saddr, - const xfrm_address_t *daddr, - u32 mark); - int (*get_saddr)(struct net *net, int oif, - xfrm_address_t *saddr, - xfrm_address_t *daddr, - u32 mark); + struct dst_entry *(*dst_lookup)(const struct xfrm_dst_lookup_params *params); + int (*get_saddr)(xfrm_address_t *saddr, + const struct xfrm_dst_lookup_params *params); int (*fill_dst)(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl); @@ -1728,10 +1731,7 @@ static inline int xfrm_user_policy(struct sock *sk, int optname, } #endif -struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, - const xfrm_address_t *saddr, - const xfrm_address_t *daddr, - int family, u32 mark); +struct dst_entry *__xfrm_dst_lookup(int family, const struct xfrm_dst_lookup_params *params); struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c33bca2c3841..01d4f6f4dbb8 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -17,47 +17,41 @@ #include #include -static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, - int tos, int oif, - const xfrm_address_t *saddr, - const xfrm_address_t *daddr, - u32 mark) +static struct dst_entry *__xfrm4_dst_lookup(struct flowi4 *fl4, + const struct xfrm_dst_lookup_params *params) { struct rtable *rt; memset(fl4, 0, sizeof(*fl4)); - fl4->daddr = daddr->a4; - fl4->flowi4_tos = tos; - fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif); - fl4->flowi4_mark = mark; - if (saddr) - fl4->saddr = saddr->a4; + fl4->daddr = params->daddr->a4; + fl4->flowi4_tos = params->tos; + fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(params->net, + params->oif); + fl4->flowi4_mark = params->mark; + if (params->saddr) + fl4->saddr = params->saddr->a4; - rt = __ip_route_output_key(net, fl4); + rt = __ip_route_output_key(params->net, fl4); if (!IS_ERR(rt)) return &rt->dst; return ERR_CAST(rt); } -static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif, - const xfrm_address_t *saddr, - const xfrm_address_t *daddr, - u32 mark) +static struct dst_entry *xfrm4_dst_lookup(const struct xfrm_dst_lookup_params *params) { struct flowi4 fl4; - return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr, mark); + return __xfrm4_dst_lookup(&fl4, params); } -static int xfrm4_get_saddr(struct net *net, int oif, - xfrm_address_t *saddr, xfrm_address_t *daddr, - u32 mark) +static int xfrm4_get_saddr(xfrm_address_t *saddr, + const struct xfrm_dst_lookup_params *params) { struct dst_entry *dst; struct flowi4 fl4; - dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr, mark); + dst = __xfrm4_dst_lookup(&fl4, params); if (IS_ERR(dst)) return -EHOSTUNREACH; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 444b0b4469a4..246a0cea77c2 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -23,23 +23,21 @@ #include #include -static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, - const xfrm_address_t *saddr, - const xfrm_address_t *daddr, - u32 mark) +static struct dst_entry *xfrm6_dst_lookup(const struct xfrm_dst_lookup_params *params) { struct flowi6 fl6; struct dst_entry *dst; int err; memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif); - fl6.flowi6_mark = mark; - memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); - if (saddr) - memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr)); + fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(params->net, + params->oif); + fl6.flowi6_mark = params->mark; + memcpy(&fl6.daddr, params->daddr, sizeof(fl6.daddr)); + if (params->saddr) + memcpy(&fl6.saddr, params->saddr, sizeof(fl6.saddr)); - dst = ip6_route_output(net, NULL, &fl6); + dst = ip6_route_output(params->net, NULL, &fl6); err = dst->error; if (dst->error) { @@ -50,15 +48,14 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, return dst; } -static int xfrm6_get_saddr(struct net *net, int oif, - xfrm_address_t *saddr, xfrm_address_t *daddr, - u32 mark) +static int xfrm6_get_saddr(xfrm_address_t *saddr, + const struct xfrm_dst_lookup_params *params) { struct dst_entry *dst; struct net_device *dev; struct inet6_dev *idev; - dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark); + dst = xfrm6_dst_lookup(params); if (IS_ERR(dst)) return -EHOSTUNREACH; @@ -68,7 +65,8 @@ static int xfrm6_get_saddr(struct net *net, int oif, return -EHOSTUNREACH; } dev = idev->dev; - ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6); + ipv6_dev_get_saddr(dev_net(dev), dev, ¶ms->daddr->in6, 0, + &saddr->in6); dst_release(dst); return 0; } diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 6346690d5c69..04dc0c8a8370 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -263,6 +263,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, dev = dev_get_by_index(net, xuo->ifindex); if (!dev) { + struct xfrm_dst_lookup_params params; + if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { saddr = &x->props.saddr; daddr = &x->id.daddr; @@ -271,9 +273,12 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, daddr = &x->props.saddr; } - dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr, - x->props.family, - xfrm_smark_get(0, x)); + memset(¶ms, 0, sizeof(params)); + params.net = net; + params.saddr = saddr; + params.daddr = daddr; + params.mark = xfrm_smark_get(0, x); + dst = __xfrm_dst_lookup(x->props.family, ¶ms); if (IS_ERR(dst)) return (is_packet_offload) ? -EINVAL : 0; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b699cc2ec35a..1395d3de1ec7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -251,10 +251,8 @@ static const struct xfrm_if_cb *xfrm_if_get_cb(void) return rcu_dereference(xfrm_if_cb); } -struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, - const xfrm_address_t *saddr, - const xfrm_address_t *daddr, - int family, u32 mark) +struct dst_entry *__xfrm_dst_lookup(int family, + const struct xfrm_dst_lookup_params *params) { const struct xfrm_policy_afinfo *afinfo; struct dst_entry *dst; @@ -263,7 +261,7 @@ struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, if (unlikely(afinfo == NULL)) return ERR_PTR(-EAFNOSUPPORT); - dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr, mark); + dst = afinfo->dst_lookup(params); rcu_read_unlock(); @@ -277,6 +275,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, xfrm_address_t *prev_daddr, int family, u32 mark) { + struct xfrm_dst_lookup_params params; struct net *net = xs_net(x); xfrm_address_t *saddr = &x->props.saddr; xfrm_address_t *daddr = &x->id.daddr; @@ -291,7 +290,14 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, daddr = x->coaddr; } - dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family, mark); + params.net = net; + params.saddr = saddr; + params.daddr = daddr; + params.tos = tos; + params.oif = oif; + params.mark = mark; + + dst = __xfrm_dst_lookup(family, ¶ms); if (!IS_ERR(dst)) { if (prev_saddr != saddr) @@ -2424,15 +2430,15 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) } static int -xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local, - xfrm_address_t *remote, unsigned short family, u32 mark) +xfrm_get_saddr(unsigned short family, xfrm_address_t *saddr, + const struct xfrm_dst_lookup_params *params) { int err; const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EINVAL; - err = afinfo->get_saddr(net, oif, local, remote, mark); + err = afinfo->get_saddr(saddr, params); rcu_read_unlock(); return err; } @@ -2461,9 +2467,14 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, remote = &tmpl->id.daddr; local = &tmpl->saddr; if (xfrm_addr_any(local, tmpl->encap_family)) { - error = xfrm_get_saddr(net, fl->flowi_oif, - &tmp, remote, - tmpl->encap_family, 0); + struct xfrm_dst_lookup_params params; + + memset(¶ms, 0, sizeof(params)); + params.net = net; + params.oif = fl->flowi_oif; + params.daddr = remote; + error = xfrm_get_saddr(tmpl->encap_family, &tmp, + ¶ms); if (error) goto fail; local = &tmp; From c1530660410cf3c4da7b66e0c9ee7117ef9f6c42 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Mon, 2 Sep 2024 17:07:10 -0700 Subject: [PATCH 132/207] xfrm: respect ip protocols rules criteria when performing dst lookups [ Upstream commit b8469721034300bbb6dec5b4bf32492c95e16a0c ] The series in the "fixes" tag added the ability to consider L4 attributes in routing rules. The dst lookup on the outer packet of encapsulated traffic in the xfrm code was not adapted to this change, thus routing behavior that relies on L4 information is not respected. Pass the ip protocol information when performing dst lookups. Fixes: a25724b05af0 ("Merge branch 'fib_rules-support-sport-dport-and-proto-match'") Signed-off-by: Eyal Birger Tested-by: Antony Antony Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- include/net/xfrm.h | 2 ++ net/ipv4/xfrm4_policy.c | 2 ++ net/ipv6/xfrm6_policy.c | 3 +++ net/xfrm/xfrm_policy.c | 15 +++++++++++++++ 4 files changed, 22 insertions(+) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 93207d87e1c7..93a9866ee481 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -349,6 +349,8 @@ struct xfrm_dst_lookup_params { xfrm_address_t *saddr; xfrm_address_t *daddr; u32 mark; + __u8 ipproto; + union flowi_uli uli; }; struct net_device; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 01d4f6f4dbb8..63bbc7f6a5a8 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -30,6 +30,8 @@ static struct dst_entry *__xfrm4_dst_lookup(struct flowi4 *fl4, fl4->flowi4_mark = params->mark; if (params->saddr) fl4->saddr = params->saddr->a4; + fl4->flowi4_proto = params->ipproto; + fl4->uli = params->uli; rt = __ip_route_output_key(params->net, fl4); if (!IS_ERR(rt)) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 246a0cea77c2..4cd625af91e6 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -37,6 +37,9 @@ static struct dst_entry *xfrm6_dst_lookup(const struct xfrm_dst_lookup_params *p if (params->saddr) memcpy(&fl6.saddr, params->saddr, sizeof(fl6.saddr)); + fl6.flowi4_proto = params->ipproto; + fl6.uli = params->uli; + dst = ip6_route_output(params->net, NULL, &fl6); err = dst->error; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1395d3de1ec7..d788baffbf10 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -296,6 +296,21 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, params.tos = tos; params.oif = oif; params.mark = mark; + params.ipproto = x->id.proto; + if (x->encap) { + switch (x->encap->encap_type) { + case UDP_ENCAP_ESPINUDP: + params.ipproto = IPPROTO_UDP; + params.uli.ports.sport = x->encap->encap_sport; + params.uli.ports.dport = x->encap->encap_dport; + break; + case TCP_ENCAP_ESPINTCP: + params.ipproto = IPPROTO_TCP; + params.uli.ports.sport = x->encap->encap_sport; + params.uli.ports.dport = x->encap->encap_dport; + break; + } + } dst = __xfrm_dst_lookup(family, ¶ms); From f41bd93b3e0508edc7ba820357f949071dcc0acc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 10 Oct 2024 18:34:05 +0200 Subject: [PATCH 133/207] netfilter: bpf: must hold reference on net namespace [ Upstream commit 1230fe7ad3974f7bf6c78901473e039b34d4fb1f ] BUG: KASAN: slab-use-after-free in __nf_unregister_net_hook+0x640/0x6b0 Read of size 8 at addr ffff8880106fe400 by task repro/72= bpf_nf_link_release+0xda/0x1e0 bpf_link_free+0x139/0x2d0 bpf_link_release+0x68/0x80 __fput+0x414/0xb60 Eric says: It seems that bpf was able to defer the __nf_unregister_net_hook() after exit()/close() time. Perhaps a netns reference is missing, because the netns has been dismantled/freed already. bpf_nf_link_attach() does : link->net = net; But I do not see a reference being taken on net. Add such a reference and release it after hook unreg. Note that I was unable to get syzbot reproducer to work, so I do not know if this resolves this splat. Fixes: 84601d6ee68a ("bpf: add bpf_link support for BPF_NETFILTER programs") Diagnosed-by: Eric Dumazet Reported-by: Lai, Yi Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_bpf_link.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index 96015a59db09..2aad0562a413 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -23,6 +23,7 @@ static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb, struct bpf_nf_link { struct bpf_link link; struct nf_hook_ops hook_ops; + netns_tracker ns_tracker; struct net *net; u32 dead; const struct nf_defrag_hook *defrag_hook; @@ -120,6 +121,7 @@ static void bpf_nf_link_release(struct bpf_link *link) if (!cmpxchg(&nf_link->dead, 0, 1)) { nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops); bpf_nf_disable_defrag(nf_link); + put_net_track(nf_link->net, &nf_link->ns_tracker); } } @@ -258,6 +260,8 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) return err; } + get_net_track(net, &link->ns_tracker, GFP_KERNEL); + return bpf_link_settle(&link_primer); } From 6dc937a3086e344f965ca5c459f8f3eb6b68d890 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Tue, 15 Oct 2024 22:41:48 +0800 Subject: [PATCH 134/207] net/sun3_82586: fix potential memory leak in sun3_82586_send_packet() [ Upstream commit 2cb3f56e827abb22c4168ad0c1bbbf401bb2f3b8 ] The sun3_82586_send_packet() returns NETDEV_TX_OK without freeing skb in case of skb->len being too long, add dev_kfree_skb() to fix it. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Wang Hai Reviewed-by: Simon Horman Message-ID: <20241015144148.7918-1-wanghai38@huawei.com> Signed-off-by: Andrew Lunn Signed-off-by: Sasha Levin --- drivers/net/ethernet/i825xx/sun3_82586.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c index f2d4669c81cf..58a3d28d938c 100644 --- a/drivers/net/ethernet/i825xx/sun3_82586.c +++ b/drivers/net/ethernet/i825xx/sun3_82586.c @@ -1012,6 +1012,7 @@ sun3_82586_send_packet(struct sk_buff *skb, struct net_device *dev) if(skb->len > XMIT_BUFF_SIZE) { printk("%s: Sorry, max. framelength is %d bytes. The length of your frame is %d bytes.\n",dev->name,XMIT_BUFF_SIZE,skb->len); + dev_kfree_skb(skb); return NETDEV_TX_OK; } From 641c1beed52bf3c6deb0193fe4d38ec9ff75d2ae Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Tue, 15 Oct 2024 22:48:02 +0800 Subject: [PATCH 135/207] be2net: fix potential memory leak in be_xmit() [ Upstream commit e4dd8bfe0f6a23acd305f9b892c00899089bd621 ] The be_xmit() returns NETDEV_TX_OK without freeing skb in case of be_xmit_enqueue() fails, add dev_kfree_skb_any() to fix it. Fixes: 760c295e0e8d ("be2net: Support for OS2BMC.") Signed-off-by: Wang Hai Reviewed-by: Simon Horman Reviewed-by: Kalesh AP Message-ID: <20241015144802.12150-1-wanghai38@huawei.com> Signed-off-by: Andrew Lunn Signed-off-by: Sasha Levin --- drivers/net/ethernet/emulex/benet/be_main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index ad862ed7888a..9d425ece33fd 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1381,10 +1381,8 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev) be_get_wrb_params_from_skb(adapter, skb, &wrb_params); wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params); - if (unlikely(!wrb_cnt)) { - dev_kfree_skb_any(skb); - goto drop; - } + if (unlikely(!wrb_cnt)) + goto drop_skb; /* if os2bmc is enabled and if the pkt is destined to bmc, * enqueue the pkt a 2nd time with mgmt bit set. @@ -1393,7 +1391,7 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev) BE_WRB_F_SET(wrb_params.features, OS2BMC, 1); wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params); if (unlikely(!wrb_cnt)) - goto drop; + goto drop_skb; else skb_get(skb); } @@ -1407,6 +1405,8 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev) be_xmit_flush(adapter, txo); return NETDEV_TX_OK; +drop_skb: + dev_kfree_skb_any(skb); drop: tx_stats(txo)->tx_drv_drops++; /* Flush the already enqueued tx requests */ From 64854b218703d1bcb288406a43f3a2210a85abfe Mon Sep 17 00:00:00 2001 From: Jakub Boehm Date: Tue, 15 Oct 2024 17:16:04 +0200 Subject: [PATCH 136/207] net: plip: fix break; causing plip to never transmit [ Upstream commit f99cf996ba5a315f8b9f13cc21dff0604a0eb749 ] Since commit 71ae2cb30531 ("net: plip: Fix fall-through warnings for Clang") plip was not able to send any packets, this patch replaces one unintended break; with fallthrough; which was originally missed by commit 9525d69a3667 ("net: plip: mark expected switch fall-throughs"). I have verified with a real hardware PLIP connection that everything works once again after applying this patch. Fixes: 71ae2cb30531 ("net: plip: Fix fall-through warnings for Clang") Signed-off-by: Jakub Boehm Reviewed-by: Simon Horman Message-ID: <20241015-net-plip-tx-fix-v1-1-32d8be1c7e0b@gmail.com> Signed-off-by: Andrew Lunn Signed-off-by: Sasha Levin --- drivers/net/plip/plip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c index 40ce8abe6999..6019811920a4 100644 --- a/drivers/net/plip/plip.c +++ b/drivers/net/plip/plip.c @@ -815,7 +815,7 @@ plip_send_packet(struct net_device *dev, struct net_local *nl, return HS_TIMEOUT; } } - break; + fallthrough; case PLIP_PK_LENGTH_LSB: if (plip_send(nibble_timeout, dev, From e71146ff378c7cae47e71c4eb95f2a63682dcf81 Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Thu, 17 Oct 2024 13:06:50 +0300 Subject: [PATCH 137/207] octeon_ep: Implement helper for iterating packets in Rx queue [ Upstream commit bd28df26197b2bd0913bf1b36770836481975143 ] The common code with some packet and index manipulations is extracted and moved to newly implemented helper to make the code more readable and avoid duplication. This is a preparation for skb allocation failure handling. Found by Linux Verification Center (linuxtesting.org) with SVACE. Suggested-by: Simon Horman Suggested-by: Paolo Abeni Signed-off-by: Aleksandr Mishin Reviewed-by: Jacob Keller Signed-off-by: Andrew Lunn Stable-dep-of: eb592008f79b ("octeon_ep: Add SKB allocation failures handling in __octep_oq_process_rx()") Signed-off-by: Sasha Levin --- .../net/ethernet/marvell/octeon_ep/octep_rx.c | 55 +++++++++++-------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c index 3c43f8078528..cfbfad95b021 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c @@ -336,6 +336,30 @@ static int octep_oq_check_hw_for_pkts(struct octep_device *oct, return new_pkts; } +/** + * octep_oq_next_pkt() - Move to the next packet in Rx queue. + * + * @oq: Octeon Rx queue data structure. + * @buff_info: Current packet buffer info. + * @read_idx: Current packet index in the ring. + * @desc_used: Current packet descriptor number. + * + * Free the resources associated with a packet. + * Increment packet index in the ring and packet descriptor number. + */ +static void octep_oq_next_pkt(struct octep_oq *oq, + struct octep_rx_buffer *buff_info, + u32 *read_idx, u32 *desc_used) +{ + dma_unmap_page(oq->dev, oq->desc_ring[*read_idx].buffer_ptr, + PAGE_SIZE, DMA_FROM_DEVICE); + buff_info->page = NULL; + (*read_idx)++; + (*desc_used)++; + if (*read_idx == oq->max_count) + *read_idx = 0; +} + /** * __octep_oq_process_rx() - Process hardware Rx queue and push to stack. * @@ -365,10 +389,7 @@ static int __octep_oq_process_rx(struct octep_device *oct, desc_used = 0; for (pkt = 0; pkt < pkts_to_process; pkt++) { buff_info = (struct octep_rx_buffer *)&oq->buff_info[read_idx]; - dma_unmap_page(oq->dev, oq->desc_ring[read_idx].buffer_ptr, - PAGE_SIZE, DMA_FROM_DEVICE); resp_hw = page_address(buff_info->page); - buff_info->page = NULL; /* Swap the length field that is in Big-Endian to CPU */ buff_info->len = be64_to_cpu(resp_hw->length); @@ -390,36 +411,27 @@ static int __octep_oq_process_rx(struct octep_device *oct, */ data_offset = OCTEP_OQ_RESP_HW_SIZE; } + + octep_oq_next_pkt(oq, buff_info, &read_idx, &desc_used); + + skb = build_skb((void *)resp_hw, PAGE_SIZE); + skb_reserve(skb, data_offset); + rx_bytes += buff_info->len; if (buff_info->len <= oq->max_single_buffer_size) { - skb = build_skb((void *)resp_hw, PAGE_SIZE); - skb_reserve(skb, data_offset); skb_put(skb, buff_info->len); - read_idx++; - desc_used++; - if (read_idx == oq->max_count) - read_idx = 0; } else { struct skb_shared_info *shinfo; u16 data_len; - skb = build_skb((void *)resp_hw, PAGE_SIZE); - skb_reserve(skb, data_offset); /* Head fragment includes response header(s); * subsequent fragments contains only data. */ skb_put(skb, oq->max_single_buffer_size); - read_idx++; - desc_used++; - if (read_idx == oq->max_count) - read_idx = 0; - shinfo = skb_shinfo(skb); data_len = buff_info->len - oq->max_single_buffer_size; while (data_len) { - dma_unmap_page(oq->dev, oq->desc_ring[read_idx].buffer_ptr, - PAGE_SIZE, DMA_FROM_DEVICE); buff_info = (struct octep_rx_buffer *) &oq->buff_info[read_idx]; if (data_len < oq->buffer_size) { @@ -434,11 +446,8 @@ static int __octep_oq_process_rx(struct octep_device *oct, buff_info->page, 0, buff_info->len, buff_info->len); - buff_info->page = NULL; - read_idx++; - desc_used++; - if (read_idx == oq->max_count) - read_idx = 0; + + octep_oq_next_pkt(oq, buff_info, &read_idx, &desc_used); } } From c2d2dc4f88bb3cfc4f3cc320fd3ff51b0ae5b0ea Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Thu, 17 Oct 2024 13:06:51 +0300 Subject: [PATCH 138/207] octeon_ep: Add SKB allocation failures handling in __octep_oq_process_rx() [ Upstream commit eb592008f79be52ccef88cd9a5249b3fc0367278 ] build_skb() returns NULL in case of a memory allocation failure so handle it inside __octep_oq_process_rx() to avoid NULL pointer dereference. __octep_oq_process_rx() is called during NAPI polling by the driver. If skb allocation fails, keep on pulling packets out of the Rx DMA queue: we shouldn't break the polling immediately and thus falsely indicate to the octep_napi_poll() that the Rx pressure is going down. As there is no associated skb in this case, don't process the packets and don't push them up the network stack - they are skipped. Helper function is implemented to unmmap/flush all the fragment buffers used by the dropped packet. 'alloc_failures' counter is incremented to mark the skb allocation error in driver statistics. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 37d79d059606 ("octeon_ep: add Tx/Rx processing and interrupt support") Suggested-by: Paolo Abeni Signed-off-by: Aleksandr Mishin Reviewed-by: Jacob Keller Signed-off-by: Andrew Lunn Signed-off-by: Sasha Levin --- .../net/ethernet/marvell/octeon_ep/octep_rx.c | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c index cfbfad95b021..c7f4e3c058b7 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c @@ -360,6 +360,27 @@ static void octep_oq_next_pkt(struct octep_oq *oq, *read_idx = 0; } +/** + * octep_oq_drop_rx() - Free the resources associated with a packet. + * + * @oq: Octeon Rx queue data structure. + * @buff_info: Current packet buffer info. + * @read_idx: Current packet index in the ring. + * @desc_used: Current packet descriptor number. + * + */ +static void octep_oq_drop_rx(struct octep_oq *oq, + struct octep_rx_buffer *buff_info, + u32 *read_idx, u32 *desc_used) +{ + int data_len = buff_info->len - oq->max_single_buffer_size; + + while (data_len > 0) { + octep_oq_next_pkt(oq, buff_info, read_idx, desc_used); + data_len -= oq->buffer_size; + }; +} + /** * __octep_oq_process_rx() - Process hardware Rx queue and push to stack. * @@ -415,6 +436,12 @@ static int __octep_oq_process_rx(struct octep_device *oct, octep_oq_next_pkt(oq, buff_info, &read_idx, &desc_used); skb = build_skb((void *)resp_hw, PAGE_SIZE); + if (!skb) { + octep_oq_drop_rx(oq, buff_info, + &read_idx, &desc_used); + oq->stats.alloc_failures++; + continue; + } skb_reserve(skb, data_offset); rx_bytes += buff_info->len; From 06b526846cc9e21232ae989bf89bd37b3adf9044 Mon Sep 17 00:00:00 2001 From: Peter Rashleigh Date: Tue, 15 Oct 2024 21:08:22 -0700 Subject: [PATCH 139/207] net: dsa: mv88e6xxx: Fix error when setting port policy on mv88e6393x [ Upstream commit 12bc14949c4a7272b509af0f1022a0deeb215fd8 ] mv88e6393x_port_set_policy doesn't correctly shift the ptr value when converting the policy format between the old and new styles, so the target register ends up with the ptr being written over the data bits. Shift the pointer to align with the format expected by mv88e6393x_port_policy_write(). Fixes: 6584b26020fc ("net: dsa: mv88e6xxx: implement .port_set_policy for Amethyst") Signed-off-by: Peter Rashleigh Reviewed-by: Simon Horman Message-ID: <20241016040822.3917-1-peter@rashleigh.ca> Signed-off-by: Andrew Lunn Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6xxx/port.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index 5394a8cf7bf1..04053fdc6489 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -1713,6 +1713,7 @@ int mv88e6393x_port_set_policy(struct mv88e6xxx_chip *chip, int port, ptr = shift / 8; shift %= 8; mask >>= ptr * 8; + ptr <<= 8; err = mv88e6393x_port_policy_read(chip, port, ptr, ®); if (err) From 275bebf5be504f41bc7f952a2caf6a172fc5112a Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Tue, 15 Oct 2024 09:01:21 +0300 Subject: [PATCH 140/207] fsl/fman: Save device references taken in mac_probe() [ Upstream commit efeddd552ec6767e4c8884caa516ac80b65f8823 ] In mac_probe() there are calls to of_find_device_by_node() which takes references to of_dev->dev. These references are not saved and not released later on error path in mac_probe() and in mac_remove(). Add new fields into mac_device structure to save references taken for future use in mac_probe() and mac_remove(). This is a preparation for further reference leaks fix. Signed-off-by: Aleksandr Mishin Signed-off-by: Paolo Abeni Stable-dep-of: 1dec67e0d9fb ("fsl/fman: Fix refcount handling of fman-related devices") Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fman/mac.c | 6 ++++-- drivers/net/ethernet/freescale/fman/mac.h | 6 +++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 9767586b4eb3..9b863db0bf08 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -197,6 +197,7 @@ static int mac_probe(struct platform_device *_of_dev) err = -EINVAL; goto _return_of_node_put; } + mac_dev->fman_dev = &of_dev->dev; /* Get the FMan cell-index */ err = of_property_read_u32(dev_node, "cell-index", &val); @@ -208,7 +209,7 @@ static int mac_probe(struct platform_device *_of_dev) /* cell-index 0 => FMan id 1 */ fman_id = (u8)(val + 1); - priv->fman = fman_bind(&of_dev->dev); + priv->fman = fman_bind(mac_dev->fman_dev); if (!priv->fman) { dev_err(dev, "fman_bind(%pOF) failed\n", dev_node); err = -ENODEV; @@ -284,8 +285,9 @@ static int mac_probe(struct platform_device *_of_dev) err = -EINVAL; goto _return_of_node_put; } + mac_dev->fman_port_devs[i] = &of_dev->dev; - mac_dev->port[i] = fman_port_bind(&of_dev->dev); + mac_dev->port[i] = fman_port_bind(mac_dev->fman_port_devs[i]); if (!mac_dev->port[i]) { dev_err(dev, "dev_get_drvdata(%pOF) failed\n", dev_node); diff --git a/drivers/net/ethernet/freescale/fman/mac.h b/drivers/net/ethernet/freescale/fman/mac.h index fe747915cc73..8b5b43d50f8e 100644 --- a/drivers/net/ethernet/freescale/fman/mac.h +++ b/drivers/net/ethernet/freescale/fman/mac.h @@ -19,12 +19,13 @@ struct fman_mac; struct mac_priv_s; +#define PORT_NUM 2 struct mac_device { void __iomem *vaddr; struct device *dev; struct resource *res; u8 addr[ETH_ALEN]; - struct fman_port *port[2]; + struct fman_port *port[PORT_NUM]; struct phylink *phylink; struct phylink_config phylink_config; phy_interface_t phy_if; @@ -52,6 +53,9 @@ struct mac_device { struct fman_mac *fman_mac; struct mac_priv_s *priv; + + struct device *fman_dev; + struct device *fman_port_devs[PORT_NUM]; }; static inline struct mac_device From 5ed4334fc9512f934fe2ae9c4cf7f8142e451b8b Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Tue, 15 Oct 2024 09:01:22 +0300 Subject: [PATCH 141/207] fsl/fman: Fix refcount handling of fman-related devices [ Upstream commit 1dec67e0d9fbb087c2ab17bf1bd17208231c3bb1 ] In mac_probe() there are multiple calls to of_find_device_by_node(), fman_bind() and fman_port_bind() which takes references to of_dev->dev. Not all references taken by these calls are released later on error path in mac_probe() and in mac_remove() which lead to reference leaks. Add references release. Fixes: 3933961682a3 ("fsl/fman: Add FMan MAC driver") Signed-off-by: Aleksandr Mishin Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fman/mac.c | 62 +++++++++++++++++------ 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 9b863db0bf08..11da139082e1 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -204,7 +204,7 @@ static int mac_probe(struct platform_device *_of_dev) if (err) { dev_err(dev, "failed to read cell-index for %pOF\n", dev_node); err = -EINVAL; - goto _return_of_node_put; + goto _return_dev_put; } /* cell-index 0 => FMan id 1 */ fman_id = (u8)(val + 1); @@ -213,40 +213,51 @@ static int mac_probe(struct platform_device *_of_dev) if (!priv->fman) { dev_err(dev, "fman_bind(%pOF) failed\n", dev_node); err = -ENODEV; - goto _return_of_node_put; + goto _return_dev_put; } + /* Two references have been taken in of_find_device_by_node() + * and fman_bind(). Release one of them here. The second one + * will be released in mac_remove(). + */ + put_device(mac_dev->fman_dev); of_node_put(dev_node); + dev_node = NULL; /* Get the address of the memory mapped registers */ mac_dev->res = platform_get_mem_or_io(_of_dev, 0); if (!mac_dev->res) { dev_err(dev, "could not get registers\n"); - return -EINVAL; + err = -EINVAL; + goto _return_dev_put; } err = devm_request_resource(dev, fman_get_mem_region(priv->fman), mac_dev->res); if (err) { dev_err_probe(dev, err, "could not request resource\n"); - return err; + goto _return_dev_put; } mac_dev->vaddr = devm_ioremap(dev, mac_dev->res->start, resource_size(mac_dev->res)); if (!mac_dev->vaddr) { dev_err(dev, "devm_ioremap() failed\n"); - return -EIO; + err = -EIO; + goto _return_dev_put; } - if (!of_device_is_available(mac_node)) - return -ENODEV; + if (!of_device_is_available(mac_node)) { + err = -ENODEV; + goto _return_dev_put; + } /* Get the cell-index */ err = of_property_read_u32(mac_node, "cell-index", &val); if (err) { dev_err(dev, "failed to read cell-index for %pOF\n", mac_node); - return -EINVAL; + err = -EINVAL; + goto _return_dev_put; } priv->cell_index = (u8)val; @@ -260,22 +271,26 @@ static int mac_probe(struct platform_device *_of_dev) if (unlikely(nph < 0)) { dev_err(dev, "of_count_phandle_with_args(%pOF, fsl,fman-ports) failed\n", mac_node); - return nph; + err = nph; + goto _return_dev_put; } if (nph != ARRAY_SIZE(mac_dev->port)) { dev_err(dev, "Not supported number of fman-ports handles of mac node %pOF from device tree\n", mac_node); - return -EINVAL; + err = -EINVAL; + goto _return_dev_put; } - for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) { + /* PORT_NUM determines the size of the port array */ + for (i = 0; i < PORT_NUM; i++) { /* Find the port node */ dev_node = of_parse_phandle(mac_node, "fsl,fman-ports", i); if (!dev_node) { dev_err(dev, "of_parse_phandle(%pOF, fsl,fman-ports) failed\n", mac_node); - return -EINVAL; + err = -EINVAL; + goto _return_dev_arr_put; } of_dev = of_find_device_by_node(dev_node); @@ -283,7 +298,7 @@ static int mac_probe(struct platform_device *_of_dev) dev_err(dev, "of_find_device_by_node(%pOF) failed\n", dev_node); err = -EINVAL; - goto _return_of_node_put; + goto _return_dev_arr_put; } mac_dev->fman_port_devs[i] = &of_dev->dev; @@ -292,9 +307,15 @@ static int mac_probe(struct platform_device *_of_dev) dev_err(dev, "dev_get_drvdata(%pOF) failed\n", dev_node); err = -EINVAL; - goto _return_of_node_put; + goto _return_dev_arr_put; } + /* Two references have been taken in of_find_device_by_node() + * and fman_port_bind(). Release one of them here. The second + * one will be released in mac_remove(). + */ + put_device(mac_dev->fman_port_devs[i]); of_node_put(dev_node); + dev_node = NULL; } /* Get the PHY connection type */ @@ -314,7 +335,7 @@ static int mac_probe(struct platform_device *_of_dev) err = init(mac_dev, mac_node, ¶ms); if (err < 0) - return err; + goto _return_dev_arr_put; if (!is_zero_ether_addr(mac_dev->addr)) dev_info(dev, "FMan MAC address: %pM\n", mac_dev->addr); @@ -329,6 +350,12 @@ static int mac_probe(struct platform_device *_of_dev) return err; +_return_dev_arr_put: + /* mac_dev is kzalloc'ed */ + for (i = 0; i < PORT_NUM; i++) + put_device(mac_dev->fman_port_devs[i]); +_return_dev_put: + put_device(mac_dev->fman_dev); _return_of_node_put: of_node_put(dev_node); return err; @@ -337,6 +364,11 @@ _return_of_node_put: static void mac_remove(struct platform_device *pdev) { struct mac_device *mac_dev = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < PORT_NUM; i++) + put_device(mac_dev->fman_port_devs[i]); + put_device(mac_dev->fman_dev); platform_device_unregister(mac_dev->priv->eth_dev); } From 433742ba96baf30c21e654ce3e698ad87100593b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 20 Oct 2024 14:49:51 +0200 Subject: [PATCH 142/207] netfilter: xtables: fix typo causing some targets not to load on IPv6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 306ed1728e8438caed30332e1ab46b28c25fe3d8 ] - There is no NFPROTO_IPV6 family for mark and NFLOG. - TRACE is also missing module autoload with NFPROTO_IPV6. This results in ip6tables failing to restore a ruleset. This issue has been reported by several users providing incomplete patches. Very similar to Ilya Katsnelson's patch including a missing chunk in the TRACE extension. Fixes: 0bfcb7b71e73 ("netfilter: xtables: avoid NFPROTO_UNSPEC where needed") Reported-by: Ignat Korchagin Reported-by: Ilya Katsnelson Reported-by: Krzysztof Olędzki Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/xt_NFLOG.c | 2 +- net/netfilter/xt_TRACE.c | 1 + net/netfilter/xt_mark.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index d80abd6ccaf8..6dcf4bc7e30b 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -79,7 +79,7 @@ static struct xt_target nflog_tg_reg[] __read_mostly = { { .name = "NFLOG", .revision = 0, - .family = NFPROTO_IPV4, + .family = NFPROTO_IPV6, .checkentry = nflog_tg_check, .destroy = nflog_tg_destroy, .target = nflog_tg, diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c index f3fa4f11348c..a642ff09fc8e 100644 --- a/net/netfilter/xt_TRACE.c +++ b/net/netfilter/xt_TRACE.c @@ -49,6 +49,7 @@ static struct xt_target trace_tg_reg[] __read_mostly = { .target = trace_tg, .checkentry = trace_tg_check, .destroy = trace_tg_destroy, + .me = THIS_MODULE, }, #endif }; diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index f76fe04fc9a4..65b965ca40ea 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -62,7 +62,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = { { .name = "MARK", .revision = 2, - .family = NFPROTO_IPV4, + .family = NFPROTO_IPV6, .target = mark_tg, .targetsize = sizeof(struct xt_mark_tginfo2), .me = THIS_MODULE, From 69076f8435c1c5dae5f814eaf4c361d1f00b22a3 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Tue, 15 Oct 2024 21:16:21 +0800 Subject: [PATCH 143/207] net: wwan: fix global oob in wwan_rtnl_policy [ Upstream commit 47dd5447cab8ce30a847a0337d5341ae4c7476a7 ] The variable wwan_rtnl_link_ops assign a *bigger* maxtype which leads to a global out-of-bounds read when parsing the netlink attributes. Exactly same bug cause as the oob fixed in commit b33fb5b801c6 ("net: qualcomm: rmnet: fix global oob in rmnet_policy"). ================================================================== BUG: KASAN: global-out-of-bounds in validate_nla lib/nlattr.c:388 [inline] BUG: KASAN: global-out-of-bounds in __nla_validate_parse+0x19d7/0x29a0 lib/nlattr.c:603 Read of size 1 at addr ffffffff8b09cb60 by task syz.1.66276/323862 CPU: 0 PID: 323862 Comm: syz.1.66276 Not tainted 6.1.70 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x177/0x231 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:284 [inline] print_report+0x14f/0x750 mm/kasan/report.c:395 kasan_report+0x139/0x170 mm/kasan/report.c:495 validate_nla lib/nlattr.c:388 [inline] __nla_validate_parse+0x19d7/0x29a0 lib/nlattr.c:603 __nla_parse+0x3c/0x50 lib/nlattr.c:700 nla_parse_nested_deprecated include/net/netlink.h:1269 [inline] __rtnl_newlink net/core/rtnetlink.c:3514 [inline] rtnl_newlink+0x7bc/0x1fd0 net/core/rtnetlink.c:3623 rtnetlink_rcv_msg+0x794/0xef0 net/core/rtnetlink.c:6122 netlink_rcv_skb+0x1de/0x420 net/netlink/af_netlink.c:2508 netlink_unicast_kernel net/netlink/af_netlink.c:1326 [inline] netlink_unicast+0x74b/0x8c0 net/netlink/af_netlink.c:1352 netlink_sendmsg+0x882/0xb90 net/netlink/af_netlink.c:1874 sock_sendmsg_nosec net/socket.c:716 [inline] __sock_sendmsg net/socket.c:728 [inline] ____sys_sendmsg+0x5cc/0x8f0 net/socket.c:2499 ___sys_sendmsg+0x21c/0x290 net/socket.c:2553 __sys_sendmsg net/socket.c:2582 [inline] __do_sys_sendmsg net/socket.c:2591 [inline] __se_sys_sendmsg+0x19e/0x270 net/socket.c:2589 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x45/0x90 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f67b19a24ad RSP: 002b:00007f67b17febb8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f67b1b45f80 RCX: 00007f67b19a24ad RDX: 0000000000000000 RSI: 0000000020005e40 RDI: 0000000000000004 RBP: 00007f67b1a1e01d R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffd2513764f R14: 00007ffd251376e0 R15: 00007f67b17fed40 The buggy address belongs to the variable: wwan_rtnl_policy+0x20/0x40 The buggy address belongs to the physical page: page:ffffea00002c2700 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0xb09c flags: 0xfff00000001000(reserved|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000001000 ffffea00002c2708 ffffea00002c2708 0000000000000000 raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner info is not present (never set?) Memory state around the buggy address: ffffffff8b09ca00: 05 f9 f9 f9 05 f9 f9 f9 00 01 f9 f9 00 01 f9 f9 ffffffff8b09ca80: 00 00 00 05 f9 f9 f9 f9 00 00 03 f9 f9 f9 f9 f9 >ffffffff8b09cb00: 00 00 00 00 05 f9 f9 f9 00 00 00 00 f9 f9 f9 f9 ^ ffffffff8b09cb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== According to the comment of `nla_parse_nested_deprecated`, use correct size `IFLA_WWAN_MAX` here to fix this issue. Fixes: 88b710532e53 ("wwan: add interface creation support") Signed-off-by: Lin Ma Reviewed-by: Loic Poulain Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241015131621.47503-1-linma@zju.edu.cn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/wwan/wwan_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c index 284ab1f56391..45ed3afc443d 100644 --- a/drivers/net/wwan/wwan_core.c +++ b/drivers/net/wwan/wwan_core.c @@ -1031,7 +1031,7 @@ static const struct nla_policy wwan_rtnl_policy[IFLA_WWAN_MAX + 1] = { static struct rtnl_link_ops wwan_rtnl_link_ops __read_mostly = { .kind = "wwan", - .maxtype = __IFLA_WWAN_MAX, + .maxtype = IFLA_WWAN_MAX, .alloc = wwan_rtnl_alloc, .validate = wwan_rtnl_validate, .newlink = wwan_rtnl_newlink, From 10cb940429c20238c7aa0a8f0e2b183c553571dc Mon Sep 17 00:00:00 2001 From: Praveen Kumar Kannoju Date: Wed, 8 May 2024 19:06:17 +0530 Subject: [PATCH 144/207] net/sched: adjust device watchdog timer to detect stopped queue at right time [ Upstream commit 33fb988b67050d9bb512f77f08453fa00088943c ] Applications are sensitive to long network latency, particularly heartbeat monitoring ones. Longer the tx timeout recovery higher the risk with such applications on a production machines. This patch remedies, yet honoring device set tx timeout. Modify watchdog next timeout to be shorter than the device specified. Compute the next timeout be equal to device watchdog timeout less the how long ago queue stop had been done. At next watchdog timeout tx timeout handler is called into if still in stopped state. Either called or not called, restore the watchdog timeout back to device specified. Signed-off-by: Praveen Kumar Kannoju Link: https://lore.kernel.org/r/20240508133617.4424-1-praveen.kannoju@oracle.com Signed-off-by: Jakub Kicinski Stable-dep-of: 95ecba62e2fd ("net: fix races in netdev_tx_sent_queue()/dev_watchdog()") Signed-off-by: Sasha Levin --- net/sched/sch_generic.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6ab9359c1706..7f0c8df7b63e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -505,19 +505,22 @@ static void dev_watchdog(struct timer_list *t) unsigned int timedout_ms = 0; unsigned int i; unsigned long trans_start; + unsigned long oldest_start = jiffies; for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq; txq = netdev_get_tx_queue(dev, i); trans_start = READ_ONCE(txq->trans_start); - if (netif_xmit_stopped(txq) && - time_after(jiffies, (trans_start + - dev->watchdog_timeo))) { + if (!netif_xmit_stopped(txq)) + continue; + if (time_after(jiffies, trans_start + dev->watchdog_timeo)) { timedout_ms = jiffies_to_msecs(jiffies - trans_start); atomic_long_inc(&txq->trans_timeout); break; } + if (time_after(oldest_start, trans_start)) + oldest_start = trans_start; } if (unlikely(timedout_ms)) { @@ -530,7 +533,7 @@ static void dev_watchdog(struct timer_list *t) netif_unfreeze_queues(dev); } if (!mod_timer(&dev->watchdog_timer, - round_jiffies(jiffies + + round_jiffies(oldest_start + dev->watchdog_timeo))) release = false; } From a7bdb199784fce9584144b51246abd2e01ddc206 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Oct 2024 19:41:18 +0000 Subject: [PATCH 145/207] net: fix races in netdev_tx_sent_queue()/dev_watchdog() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 95ecba62e2fd201bcdcca636f5d774f1cd4f1458 ] Some workloads hit the infamous dev_watchdog() message: "NETDEV WATCHDOG: eth0 (xxxx): transmit queue XX timed out" It seems possible to hit this even for perfectly normal BQL enabled drivers: 1) Assume a TX queue was idle for more than dev->watchdog_timeo (5 seconds unless changed by the driver) 2) Assume a big packet is sent, exceeding current BQL limit. 3) Driver ndo_start_xmit() puts the packet in TX ring, and netdev_tx_sent_queue() is called. 4) QUEUE_STATE_STACK_XOFF could be set from netdev_tx_sent_queue() before txq->trans_start has been written. 5) txq->trans_start is written later, from netdev_start_xmit() if (rc == NETDEV_TX_OK) txq_trans_update(txq) dev_watchdog() running on another cpu could read the old txq->trans_start, and then see QUEUE_STATE_STACK_XOFF, because 5) did not happen yet. To solve the issue, write txq->trans_start right before one XOFF bit is set : - _QUEUE_STATE_DRV_XOFF from netif_tx_stop_queue() - __QUEUE_STATE_STACK_XOFF from netdev_tx_sent_queue() From dev_watchdog(), we have to read txq->state before txq->trans_start. Add memory barriers to enforce correct ordering. In the future, we could avoid writing over txq->trans_start for normal operations, and rename this field to txq->xoff_start_time. Fixes: bec251bc8b6a ("net: no longer stop all TX queues in dev_watchdog()") Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241015194118.3951657-1-edumazet@google.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- include/linux/netdevice.h | 12 ++++++++++++ net/sched/sch_generic.c | 8 +++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8f5ac20b4c03..1576e7443eee 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3363,6 +3363,12 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) { + /* Paired with READ_ONCE() from dev_watchdog() */ + WRITE_ONCE(dev_queue->trans_start, jiffies); + + /* This barrier is paired with smp_mb() from dev_watchdog() */ + smp_mb__before_atomic(); + /* Must be an atomic op see netif_txq_try_stop() */ set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } @@ -3479,6 +3485,12 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, if (likely(dql_avail(&dev_queue->dql) >= 0)) return; + /* Paired with READ_ONCE() from dev_watchdog() */ + WRITE_ONCE(dev_queue->trans_start, jiffies); + + /* This barrier is paired with smp_mb() from dev_watchdog() */ + smp_mb__before_atomic(); + set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); /* diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 7f0c8df7b63e..b51af871a621 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -511,9 +511,15 @@ static void dev_watchdog(struct timer_list *t) struct netdev_queue *txq; txq = netdev_get_tx_queue(dev, i); - trans_start = READ_ONCE(txq->trans_start); if (!netif_xmit_stopped(txq)) continue; + + /* Paired with WRITE_ONCE() + smp_mb...() in + * netdev_tx_sent_queue() and netif_tx_stop_queue(). + */ + smp_mb(); + trans_start = READ_ONCE(txq->trans_start); + if (time_after(jiffies, trans_start + dev->watchdog_timeo)) { timedout_ms = jiffies_to_msecs(jiffies - trans_start); atomic_long_inc(&txq->trans_timeout); From 7caee37c466a9dee4c51529c50baf09769f12ef0 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 17 Oct 2024 09:18:37 +0200 Subject: [PATCH 146/207] net: usb: usbnet: fix name regression [ Upstream commit 8a7d12d674ac6f2147c18f36d1e15f1a48060edf ] The fix for MAC addresses broke detection of the naming convention because it gave network devices no random MAC before bind() was called. This means that the check for the local assignment bit was always negative as the address was zeroed from allocation, instead of from overwriting the MAC with a unique hardware address. The correct check for whether bind() has altered the MAC is done with is_zero_ether_addr Signed-off-by: Oliver Neukum Reported-by: Greg Thelen Diagnosed-by: John Sperbeck Fixes: bab8eb0dd4cb9 ("usbnet: modern method to get random MAC") Link: https://patch.msgid.link/20241017071849.389636-1-oneukum@suse.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/usb/usbnet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 4f5a3a4aac89..9f66c47dc58b 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1771,7 +1771,8 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) // can rename the link if it knows better. if ((dev->driver_info->flags & FLAG_ETHER) != 0 && ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 || - (net->dev_addr [0] & 0x02) == 0)) + /* somebody touched it*/ + !is_zero_ether_addr(net->dev_addr))) strscpy(net->name, "eth%d", sizeof(net->name)); /* WLAN devices should always be named "wlan%d" */ if ((dev->driver_info->flags & FLAG_WLAN) != 0) From d1100acab464e054cbd056aac0e24b790926f18d Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Thu, 21 Dec 2023 18:22:24 -0500 Subject: [PATCH 147/207] bpf: Simplify checking size of helper accesses [ Upstream commit 8a021e7fa10576eeb3938328f39bbf98fe7d4715 ] This patch simplifies the verification of size arguments associated to pointer arguments to helpers and kfuncs. Many helpers take a pointer argument followed by the size of the memory access performed to be performed through that pointer. Before this patch, the handling of the size argument in check_mem_size_reg() was confusing and wasteful: if the size register's lower bound was 0, then the verification was done twice: once considering the size of the access to be the lower-bound of the respective argument, and once considering the upper bound (even if the two are the same). The upper bound checking is a super-set of the lower-bound checking(*), except: the only point of the lower-bound check is to handle the case where zero-sized-accesses are explicitly not allowed and the lower-bound is zero. This static condition is now checked explicitly, replacing a much more complex, expensive and confusing verification call to check_helper_mem_access(). Error messages change in this patch. Before, messages about illegal zero-size accesses depended on the type of the pointer and on other conditions, and sometimes the message was plain wrong: in some tests that changed you'll see that the old message was something like "R1 min value is outside of the allowed memory range", where R1 is the pointer register; the error was wrongly claiming that the pointer was bad instead of the size being bad. Other times the information that the size came for a register with a possible range of values was wrong, and the error presented the size as a fixed zero. Now the errors refer to the right register. However, the old error messages did contain useful information about the pointer register which is now lost; recovering this information was deemed not important enough. (*) Besides standing to reason that the checks for a bigger size access are a super-set of the checks for a smaller size access, I have also mechanically verified this by reading the code for all types of pointers. I could convince myself that it's true for all but PTR_TO_BTF_ID (check_ptr_to_btf_access). There, simply looking line-by-line does not immediately prove what we want. If anyone has any qualms, let me know. Signed-off-by: Andrei Matei Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231221232225.568730-2-andreimatei1@gmail.com Stable-dep-of: 8ea607330a39 ("bpf: Fix overloading of MEM_UNINIT's meaning") Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 10 ++++------ .../selftests/bpf/progs/verifier_helper_value_access.c | 8 ++++---- tools/testing/selftests/bpf/progs/verifier_raw_stack.c | 2 +- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 28b09ca5525f..f24d570d67ca 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7324,12 +7324,10 @@ static int check_mem_size_reg(struct bpf_verifier_env *env, return -EACCES; } - if (reg->umin_value == 0) { - err = check_helper_mem_access(env, regno - 1, 0, - zero_size_allowed, - meta); - if (err) - return err; + if (reg->umin_value == 0 && !zero_size_allowed) { + verbose(env, "R%d invalid zero-sized read: u64=[%lld,%lld]\n", + regno, reg->umin_value, reg->umax_value); + return -EACCES; } if (reg->umax_value >= BPF_MAX_VAR_SIZ) { diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c index 692216c0ad3d..3e8340c2408f 100644 --- a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c @@ -91,7 +91,7 @@ l0_%=: exit; \ SEC("tracepoint") __description("helper access to map: empty range") -__failure __msg("invalid access to map value, value_size=48 off=0 size=0") +__failure __msg("R2 invalid zero-sized read") __naked void access_to_map_empty_range(void) { asm volatile (" \ @@ -221,7 +221,7 @@ l0_%=: exit; \ SEC("tracepoint") __description("helper access to adjusted map (via const imm): empty range") -__failure __msg("invalid access to map value, value_size=48 off=4 size=0") +__failure __msg("R2 invalid zero-sized read") __naked void via_const_imm_empty_range(void) { asm volatile (" \ @@ -386,7 +386,7 @@ l0_%=: exit; \ SEC("tracepoint") __description("helper access to adjusted map (via const reg): empty range") -__failure __msg("R1 min value is outside of the allowed memory range") +__failure __msg("R2 invalid zero-sized read") __naked void via_const_reg_empty_range(void) { asm volatile (" \ @@ -556,7 +556,7 @@ l0_%=: exit; \ SEC("tracepoint") __description("helper access to adjusted map (via variable): empty range") -__failure __msg("R1 min value is outside of the allowed memory range") +__failure __msg("R2 invalid zero-sized read") __naked void map_via_variable_empty_range(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c index f67390224a9c..7cc83acac727 100644 --- a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c @@ -64,7 +64,7 @@ __naked void load_bytes_negative_len_2(void) SEC("tc") __description("raw_stack: skb_load_bytes, zero len") -__failure __msg("invalid zero-sized read") +__failure __msg("R4 invalid zero-sized read: u64=[0,0]") __naked void skb_load_bytes_zero_len(void) { asm volatile (" \ From 8a33a047bd31a7047b8dffe688c2c10e1ab871eb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 21 Oct 2024 17:28:05 +0200 Subject: [PATCH 148/207] bpf: Add MEM_WRITE attribute [ Upstream commit 6fad274f06f038c29660aa53fbad14241c9fd976 ] Add a MEM_WRITE attribute for BPF helper functions which can be used in bpf_func_proto to annotate an argument type in order to let the verifier know that the helper writes into the memory passed as an argument. In the past MEM_UNINIT has been (ab)used for this function, but the latter merely tells the verifier that the passed memory can be uninitialized. There have been bugs with overloading the latter but aside from that there are also cases where the passed memory is read + written which currently cannot be expressed, see also 4b3786a6c539 ("bpf: Zero former ARG_PTR_TO_{LONG,INT} args in case of error"). Signed-off-by: Daniel Borkmann Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241021152809.33343-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov Stable-dep-of: 8ea607330a39 ("bpf: Fix overloading of MEM_UNINIT's meaning") Signed-off-by: Sasha Levin --- include/linux/bpf.h | 14 +++++++++++--- kernel/bpf/helpers.c | 10 +++++----- kernel/bpf/ringbuf.c | 2 +- kernel/bpf/syscall.c | 2 +- kernel/trace/bpf_trace.c | 4 ++-- net/core/filter.c | 4 ++-- 6 files changed, 22 insertions(+), 14 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1e05cc80e048..5a27fd533fab 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -616,6 +616,7 @@ enum bpf_type_flag { */ PTR_UNTRUSTED = BIT(6 + BPF_BASE_TYPE_BITS), + /* MEM can be uninitialized. */ MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS), /* DYNPTR points to memory local to the bpf program. */ @@ -681,6 +682,13 @@ enum bpf_type_flag { */ MEM_ALIGNED = BIT(17 + BPF_BASE_TYPE_BITS), + /* MEM is being written to, often combined with MEM_UNINIT. Non-presence + * of MEM_WRITE means that MEM is only being read. MEM_WRITE without the + * MEM_UNINIT means that memory needs to be initialized since it is also + * read. + */ + MEM_WRITE = BIT(18 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; @@ -738,10 +746,10 @@ enum bpf_arg_type { ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID, - /* pointer to memory does not need to be initialized, helper function must fill - * all bytes or clear them in error case. + /* Pointer to memory does not need to be initialized, since helper function + * fills all bytes or clears them in error case. */ - ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM, + ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | MEM_WRITE | ARG_PTR_TO_MEM, /* Pointer to valid memory of size known at compile time. */ ARG_PTR_TO_FIXED_SIZE_MEM = MEM_FIXED_SIZE | ARG_PTR_TO_MEM, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 3dba5bb294d8..41d62405c852 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -110,7 +110,7 @@ const struct bpf_func_proto bpf_map_pop_elem_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, + .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE, }; BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) @@ -123,7 +123,7 @@ const struct bpf_func_proto bpf_map_peek_elem_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, + .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE, }; BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu) @@ -538,7 +538,7 @@ const struct bpf_func_proto bpf_strtol_proto = { .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, .arg4_size = sizeof(s64), }; @@ -568,7 +568,7 @@ const struct bpf_func_proto bpf_strtoul_proto = { .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, .arg4_size = sizeof(u64), }; @@ -1607,7 +1607,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = { .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT, + .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE, }; BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src, diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 238d9b206bbd..246559c3e93d 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -632,7 +632,7 @@ const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto = { .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_UNINIT, + .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_UNINIT | MEM_WRITE, }; BPF_CALL_2(bpf_ringbuf_submit_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b43302c80cac..8a1cadc1ff9d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5689,7 +5689,7 @@ static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { .arg1_type = ARG_PTR_TO_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, .arg4_size = sizeof(u64), }; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index bbdc4199748b..ecc86a595b75 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1220,7 +1220,7 @@ static const struct bpf_func_proto bpf_get_func_arg_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, .arg3_size = sizeof(u64), }; @@ -1237,7 +1237,7 @@ static const struct bpf_func_proto bpf_get_func_ret_proto = { .func = get_func_ret, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, .arg2_size = sizeof(u64), }; diff --git a/net/core/filter.c b/net/core/filter.c index bbb143248843..a7d928345b1f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6310,7 +6310,7 @@ static const struct bpf_func_proto bpf_skb_check_mtu_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, .arg3_size = sizeof(u32), .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, @@ -6322,7 +6322,7 @@ static const struct bpf_func_proto bpf_xdp_check_mtu_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, .arg3_size = sizeof(u32), .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, From 48068ccaea957469f1adf78dfd2c1c9a7e18f0fe Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 21 Oct 2024 17:28:06 +0200 Subject: [PATCH 149/207] bpf: Fix overloading of MEM_UNINIT's meaning [ Upstream commit 8ea607330a39184f51737c6ae706db7fdca7628e ] Lonial reported an issue in the BPF verifier where check_mem_size_reg() has the following code: if (!tnum_is_const(reg->var_off)) /* For unprivileged variable accesses, disable raw * mode so that the program is required to * initialize all the memory that the helper could * just partially fill up. */ meta = NULL; This means that writes are not checked when the register containing the size of the passed buffer has not a fixed size. Through this bug, a BPF program can write to a map which is marked as read-only, for example, .rodata global maps. The problem is that MEM_UNINIT's initial meaning that "the passed buffer to the BPF helper does not need to be initialized" which was added back in commit 435faee1aae9 ("bpf, verifier: add ARG_PTR_TO_RAW_STACK type") got overloaded over time with "the passed buffer is being written to". The problem however is that checks such as the above which were added later via 06c1c049721a ("bpf: allow helpers access to variable memory") set meta to NULL in order force the user to always initialize the passed buffer to the helper. Due to the current double meaning of MEM_UNINIT, this bypasses verifier write checks to the memory (not boundary checks though) and only assumes the latter memory is read instead. Fix this by reverting MEM_UNINIT back to its original meaning, and having MEM_WRITE as an annotation to BPF helpers in order to then trigger the BPF verifier checks for writing to memory. Some notes: check_arg_pair_ok() ensures that for ARG_CONST_SIZE{,_OR_ZERO} we can access fn->arg_type[arg - 1] since it must contain a preceding ARG_PTR_TO_MEM. For check_mem_reg() the meta argument can be removed altogether since we do check both BPF_READ and BPF_WRITE. Same for the equivalent check_kfunc_mem_size_reg(). Fixes: 7b3552d3f9f6 ("bpf: Reject writes for PTR_TO_MAP_KEY in check_helper_mem_access") Fixes: 97e6d7dab1ca ("bpf: Check PTR_TO_MEM | MEM_RDONLY in check_helper_mem_access") Fixes: 15baa55ff5b0 ("bpf/verifier: allow all functions to read user provided context") Reported-by: Lonial Con Signed-off-by: Daniel Borkmann Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241021152809.33343-2-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 73 +++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 38 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f24d570d67ca..03b5797b8fca 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7198,7 +7198,8 @@ mark: } static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, - int access_size, bool zero_size_allowed, + int access_size, enum bpf_access_type access_type, + bool zero_size_allowed, struct bpf_call_arg_meta *meta) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; @@ -7210,7 +7211,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed); case PTR_TO_MAP_KEY: - if (meta && meta->raw_mode) { + if (access_type == BPF_WRITE) { verbose(env, "R%d cannot write into %s\n", regno, reg_type_str(env, reg->type)); return -EACCES; @@ -7218,15 +7219,13 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, return check_mem_region_access(env, regno, reg->off, access_size, reg->map_ptr->key_size, false); case PTR_TO_MAP_VALUE: - if (check_map_access_type(env, regno, reg->off, access_size, - meta && meta->raw_mode ? BPF_WRITE : - BPF_READ)) + if (check_map_access_type(env, regno, reg->off, access_size, access_type)) return -EACCES; return check_map_access(env, regno, reg->off, access_size, zero_size_allowed, ACCESS_HELPER); case PTR_TO_MEM: if (type_is_rdonly_mem(reg->type)) { - if (meta && meta->raw_mode) { + if (access_type == BPF_WRITE) { verbose(env, "R%d cannot write into %s\n", regno, reg_type_str(env, reg->type)); return -EACCES; @@ -7237,7 +7236,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, zero_size_allowed); case PTR_TO_BUF: if (type_is_rdonly_mem(reg->type)) { - if (meta && meta->raw_mode) { + if (access_type == BPF_WRITE) { verbose(env, "R%d cannot write into %s\n", regno, reg_type_str(env, reg->type)); return -EACCES; @@ -7265,7 +7264,6 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, * Dynamically check it now. */ if (!env->ops->convert_ctx_access) { - enum bpf_access_type atype = meta && meta->raw_mode ? BPF_WRITE : BPF_READ; int offset = access_size - 1; /* Allow zero-byte read from PTR_TO_CTX */ @@ -7273,7 +7271,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, return zero_size_allowed ? 0 : -EACCES; return check_mem_access(env, env->insn_idx, regno, offset, BPF_B, - atype, -1, false, false); + access_type, -1, false, false); } fallthrough; @@ -7292,6 +7290,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, static int check_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, + enum bpf_access_type access_type, bool zero_size_allowed, struct bpf_call_arg_meta *meta) { @@ -7307,15 +7306,12 @@ static int check_mem_size_reg(struct bpf_verifier_env *env, */ meta->msize_max_value = reg->umax_value; - /* The register is SCALAR_VALUE; the access check - * happens using its boundaries. + /* The register is SCALAR_VALUE; the access check happens using + * its boundaries. For unprivileged variable accesses, disable + * raw mode so that the program is required to initialize all + * the memory that the helper could just partially fill up. */ if (!tnum_is_const(reg->var_off)) - /* For unprivileged variable accesses, disable raw - * mode so that the program is required to - * initialize all the memory that the helper could - * just partially fill up. - */ meta = NULL; if (reg->smin_value < 0) { @@ -7335,9 +7331,8 @@ static int check_mem_size_reg(struct bpf_verifier_env *env, regno); return -EACCES; } - err = check_helper_mem_access(env, regno - 1, - reg->umax_value, - zero_size_allowed, meta); + err = check_helper_mem_access(env, regno - 1, reg->umax_value, + access_type, zero_size_allowed, meta); if (!err) err = mark_chain_precision(env, regno); return err; @@ -7348,13 +7343,11 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, { bool may_be_null = type_may_be_null(reg->type); struct bpf_reg_state saved_reg; - struct bpf_call_arg_meta meta; int err; if (register_is_null(reg)) return 0; - memset(&meta, 0, sizeof(meta)); /* Assuming that the register contains a value check if the memory * access is safe. Temporarily save and restore the register's state as * the conversion shouldn't be visible to a caller. @@ -7364,10 +7357,8 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, mark_ptr_not_null_reg(reg); } - err = check_helper_mem_access(env, regno, mem_size, true, &meta); - /* Check access for BPF_WRITE */ - meta.raw_mode = true; - err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta); + err = check_helper_mem_access(env, regno, mem_size, BPF_READ, true, NULL); + err = err ?: check_helper_mem_access(env, regno, mem_size, BPF_WRITE, true, NULL); if (may_be_null) *reg = saved_reg; @@ -7393,13 +7384,12 @@ static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg mark_ptr_not_null_reg(mem_reg); } - err = check_mem_size_reg(env, reg, regno, true, &meta); - /* Check access for BPF_WRITE */ - meta.raw_mode = true; - err = err ?: check_mem_size_reg(env, reg, regno, true, &meta); + err = check_mem_size_reg(env, reg, regno, BPF_READ, true, &meta); + err = err ?: check_mem_size_reg(env, reg, regno, BPF_WRITE, true, &meta); if (may_be_null) *mem_reg = saved_reg; + return err; } @@ -8585,9 +8575,8 @@ skip_type_check: verbose(env, "invalid map_ptr to access map->key\n"); return -EACCES; } - err = check_helper_mem_access(env, regno, - meta->map_ptr->key_size, false, - NULL); + err = check_helper_mem_access(env, regno, meta->map_ptr->key_size, + BPF_READ, false, NULL); break; case ARG_PTR_TO_MAP_VALUE: if (type_may_be_null(arg_type) && register_is_null(reg)) @@ -8602,9 +8591,9 @@ skip_type_check: return -EACCES; } meta->raw_mode = arg_type & MEM_UNINIT; - err = check_helper_mem_access(env, regno, - meta->map_ptr->value_size, false, - meta); + err = check_helper_mem_access(env, regno, meta->map_ptr->value_size, + arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ, + false, meta); break; case ARG_PTR_TO_PERCPU_BTF_ID: if (!reg->btf_id) { @@ -8646,7 +8635,9 @@ skip_type_check: */ meta->raw_mode = arg_type & MEM_UNINIT; if (arg_type & MEM_FIXED_SIZE) { - err = check_helper_mem_access(env, regno, fn->arg_size[arg], false, meta); + err = check_helper_mem_access(env, regno, fn->arg_size[arg], + arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ, + false, meta); if (err) return err; if (arg_type & MEM_ALIGNED) @@ -8654,10 +8645,16 @@ skip_type_check: } break; case ARG_CONST_SIZE: - err = check_mem_size_reg(env, reg, regno, false, meta); + err = check_mem_size_reg(env, reg, regno, + fn->arg_type[arg - 1] & MEM_WRITE ? + BPF_WRITE : BPF_READ, + false, meta); break; case ARG_CONST_SIZE_OR_ZERO: - err = check_mem_size_reg(env, reg, regno, true, meta); + err = check_mem_size_reg(env, reg, regno, + fn->arg_type[arg - 1] & MEM_WRITE ? + BPF_WRITE : BPF_READ, + true, meta); break; case ARG_PTR_TO_DYNPTR: err = process_dynptr_func(env, regno, insn_idx, arg_type, 0); From d123062a3fdc57c931c57fb4606360b47d501ed0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 21 Oct 2024 17:28:07 +0200 Subject: [PATCH 150/207] bpf: Remove MEM_UNINIT from skb/xdp MTU helpers [ Upstream commit 14a3d3ef02ba53447d5112a2641aac0d10dc994f ] We can now undo parts of 4b3786a6c539 ("bpf: Zero former ARG_PTR_TO_{LONG,INT} args in case of error") as discussed in [0]. Given the BPF helpers now have MEM_WRITE tag, the MEM_UNINIT can be cleared. The mtu_len is an input as well as output argument, meaning, the BPF program has to set it to something. It cannot be uninitialized. Therefore, allowing uninitialized memory and zeroing it on error would be odd. It was done as an interim step in 4b3786a6c539 as the desired behavior could not have been expressed before the introduction of MEM_WRITE tag. Fixes: 4b3786a6c539 ("bpf: Zero former ARG_PTR_TO_{LONG,INT} args in case of error") Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/a86eb76d-f52f-dee4-e5d2-87e45de3e16f@iogearbox.net [0] Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241021152809.33343-3-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- net/core/filter.c | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index a7d928345b1f..a2467a7c01f9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6223,24 +6223,16 @@ BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb, { int ret = BPF_MTU_CHK_RET_FRAG_NEEDED; struct net_device *dev = skb->dev; - int skb_len, dev_len; - int mtu = 0; + int mtu, dev_len, skb_len; - if (unlikely(flags & ~(BPF_MTU_CHK_SEGS))) { - ret = -EINVAL; - goto out; - } - - if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len))) { - ret = -EINVAL; - goto out; - } + if (unlikely(flags & ~(BPF_MTU_CHK_SEGS))) + return -EINVAL; + if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len))) + return -EINVAL; dev = __dev_via_ifindex(dev, ifindex); - if (unlikely(!dev)) { - ret = -ENODEV; - goto out; - } + if (unlikely(!dev)) + return -ENODEV; mtu = READ_ONCE(dev->mtu); dev_len = mtu + dev->hard_header_len; @@ -6275,19 +6267,15 @@ BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp, struct net_device *dev = xdp->rxq->dev; int xdp_len = xdp->data_end - xdp->data; int ret = BPF_MTU_CHK_RET_SUCCESS; - int mtu = 0, dev_len; + int mtu, dev_len; /* XDP variant doesn't support multi-buffer segment check (yet) */ - if (unlikely(flags)) { - ret = -EINVAL; - goto out; - } + if (unlikely(flags)) + return -EINVAL; dev = __dev_via_ifindex(dev, ifindex); - if (unlikely(!dev)) { - ret = -ENODEV; - goto out; - } + if (unlikely(!dev)) + return -ENODEV; mtu = READ_ONCE(dev->mtu); dev_len = mtu + dev->hard_header_len; @@ -6299,7 +6287,7 @@ BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp, xdp_len += len_diff; /* minus result pass check */ if (xdp_len > dev_len) ret = BPF_MTU_CHK_RET_FRAG_NEEDED; -out: + *mtu_len = mtu; return ret; } @@ -6310,7 +6298,7 @@ static const struct bpf_func_proto bpf_skb_check_mtu_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, + .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_WRITE | MEM_ALIGNED, .arg3_size = sizeof(u32), .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, @@ -6322,7 +6310,7 @@ static const struct bpf_func_proto bpf_xdp_check_mtu_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, + .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_WRITE | MEM_ALIGNED, .arg3_size = sizeof(u32), .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, From a5cf8670acbb66f6cb13211d2f9b1d1c368a90fd Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 17 Oct 2024 19:10:48 +0300 Subject: [PATCH 151/207] net/sched: act_api: deny mismatched skip_sw/skip_hw flags for actions created by classifiers [ Upstream commit 34d35b4edbbe890a91bec939bfd29ad92517a52b ] tcf_action_init() has logic for checking mismatches between action and filter offload flags (skip_sw/skip_hw). AFAIU, this is intended to run on the transition between the new tc_act_bind(flags) returning true (aka now gets bound to classifier) and tc_act_bind(act->tcfa_flags) returning false (aka action was not bound to classifier before). Otherwise, the check is skipped. For the case where an action is not standalone, but rather it was created by a classifier and is bound to it, tcf_action_init() skips the check entirely, and this means it allows mismatched flags to occur. Taking the matchall classifier code path as an example (with mirred as an action), the reason is the following: 1 | mall_change() 2 | -> mall_replace_hw_filter() 3 | -> tcf_exts_validate_ex() 4 | -> flags |= TCA_ACT_FLAGS_BIND; 5 | -> tcf_action_init() 6 | -> tcf_action_init_1() 7 | -> a_o->init() 8 | -> tcf_mirred_init() 9 | -> tcf_idr_create_from_flags() 10 | -> tcf_idr_create() 11 | -> p->tcfa_flags = flags; 12 | -> tc_act_bind(flags)) 13 | -> tc_act_bind(act->tcfa_flags) When invoked from tcf_exts_validate_ex() like matchall does (but other classifiers validate their extensions as well), tcf_action_init() runs in a call path where "flags" always contains TCA_ACT_FLAGS_BIND (set by line 4). So line 12 is always true, and line 13 is always true as well. No transition ever takes place, and the check is skipped. The code was added in this form in commit c86e0209dc77 ("flow_offload: validate flags of filter and actions"), but I'm attributing the blame even earlier in that series, to when TCA_ACT_FLAGS_SKIP_HW and TCA_ACT_FLAGS_SKIP_SW were added to the UAPI. Following the development process of this change, the check did not always exist in this form. A change took place between v3 [1] and v4 [2], AFAIU due to review feedback that it doesn't make sense for action flags to be different than classifier flags. I think I agree with that feedback, but it was translated into code that omits enforcing this for "classic" actions created at the same time with the filters themselves. There are 3 more important cases to discuss. First there is this command: $ tc qdisc add dev eth0 clasct $ tc filter add dev eth0 ingress matchall skip_sw \ action mirred ingress mirror dev eth1 which should be allowed, because prior to the concept of dedicated action flags, it used to work and it used to mean the action inherited the skip_sw/skip_hw flags from the classifier. It's not a mismatch. Then we have this command: $ tc qdisc add dev eth0 clasct $ tc filter add dev eth0 ingress matchall skip_sw \ action mirred ingress mirror dev eth1 skip_hw where there is a mismatch and it should be rejected. Finally, we have: $ tc qdisc add dev eth0 clasct $ tc filter add dev eth0 ingress matchall skip_sw \ action mirred ingress mirror dev eth1 skip_sw where the offload flags coincide, and this should be treated the same as the first command based on inheritance, and accepted. [1]: https://lore.kernel.org/netdev/20211028110646.13791-9-simon.horman@corigine.com/ [2]: https://lore.kernel.org/netdev/20211118130805.23897-10-simon.horman@corigine.com/ Fixes: 7adc57651211 ("flow_offload: add skip_hw and skip_sw to control if offload the action") Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/20241017161049.3570037-1-vladimir.oltean@nxp.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/sched/act_api.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2d6d58e1b278..4572aa6e0273 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1489,8 +1489,29 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, bool skip_sw = tc_skip_sw(fl_flags); bool skip_hw = tc_skip_hw(fl_flags); - if (tc_act_bind(act->tcfa_flags)) + if (tc_act_bind(act->tcfa_flags)) { + /* Action is created by classifier and is not + * standalone. Check that the user did not set + * any action flags different than the + * classifier flags, and inherit the flags from + * the classifier for the compatibility case + * where no flags were specified at all. + */ + if ((tc_act_skip_sw(act->tcfa_flags) && !skip_sw) || + (tc_act_skip_hw(act->tcfa_flags) && !skip_hw)) { + NL_SET_ERR_MSG(extack, + "Mismatch between action and filter offload flags"); + err = -EINVAL; + goto err; + } + if (skip_sw) + act->tcfa_flags |= TCA_ACT_FLAGS_SKIP_SW; + if (skip_hw) + act->tcfa_flags |= TCA_ACT_FLAGS_SKIP_HW; continue; + } + + /* Action is standalone */ if (skip_sw != tc_act_skip_sw(act->tcfa_flags) || skip_hw != tc_act_skip_hw(act->tcfa_flags)) { NL_SET_ERR_MSG(extack, From 0d4c0d2844e4eac3aed647f948fd7e60eea56a61 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 18 Oct 2024 08:13:38 +0300 Subject: [PATCH 152/207] net: sched: fix use-after-free in taprio_change() [ Upstream commit f504465970aebb2467da548f7c1efbbf36d0f44b ] In 'taprio_change()', 'admin' pointer may become dangling due to sched switch / removal caused by 'advance_sched()', and critical section protected by 'q->current_entry_lock' is too small to prevent from such a scenario (which causes use-after-free detected by KASAN). Fix this by prefer 'rcu_replace_pointer()' over 'rcu_assign_pointer()' to update 'admin' immediately before an attempt to schedule freeing. Fixes: a3d43c0d56f1 ("taprio: Add support adding an admin schedule") Reported-by: syzbot+b65e0af58423fc8a73aa@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b65e0af58423fc8a73aa Acked-by: Vinicius Costa Gomes Signed-off-by: Dmitry Antipov Link: https://patch.msgid.link/20241018051339.418890-1-dmantipov@yandex.ru Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/sched/sch_taprio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 87090d679036..bc8e55c6d63d 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1988,7 +1988,8 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, taprio_start_sched(sch, start, new_admin); - rcu_assign_pointer(q->admin_sched, new_admin); + admin = rcu_replace_pointer(q->admin_sched, new_admin, + lockdep_rtnl_is_held()); if (admin) call_rcu(&admin->rcu, taprio_free_sched_cb); From 5d282467245f267c0b9ada3f7f309ff838521536 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 18 Oct 2024 08:13:39 +0300 Subject: [PATCH 153/207] net: sched: use RCU read-side critical section in taprio_dump() [ Upstream commit b22db8b8befe90b61c98626ca1a2fbb0505e9fe3 ] Fix possible use-after-free in 'taprio_dump()' by adding RCU read-side critical section there. Never seen on x86 but found on a KASAN-enabled arm64 system when investigating https://syzkaller.appspot.com/bug?extid=b65e0af58423fc8a73aa: [T15862] BUG: KASAN: slab-use-after-free in taprio_dump+0xa0c/0xbb0 [T15862] Read of size 4 at addr ffff0000d4bb88f8 by task repro/15862 [T15862] [T15862] CPU: 0 UID: 0 PID: 15862 Comm: repro Not tainted 6.11.0-rc1-00293-gdefaf1a2113a-dirty #2 [T15862] Hardware name: QEMU QEMU Virtual Machine, BIOS edk2-20240524-5.fc40 05/24/2024 [T15862] Call trace: [T15862] dump_backtrace+0x20c/0x220 [T15862] show_stack+0x2c/0x40 [T15862] dump_stack_lvl+0xf8/0x174 [T15862] print_report+0x170/0x4d8 [T15862] kasan_report+0xb8/0x1d4 [T15862] __asan_report_load4_noabort+0x20/0x2c [T15862] taprio_dump+0xa0c/0xbb0 [T15862] tc_fill_qdisc+0x540/0x1020 [T15862] qdisc_notify.isra.0+0x330/0x3a0 [T15862] tc_modify_qdisc+0x7b8/0x1838 [T15862] rtnetlink_rcv_msg+0x3c8/0xc20 [T15862] netlink_rcv_skb+0x1f8/0x3d4 [T15862] rtnetlink_rcv+0x28/0x40 [T15862] netlink_unicast+0x51c/0x790 [T15862] netlink_sendmsg+0x79c/0xc20 [T15862] __sock_sendmsg+0xe0/0x1a0 [T15862] ____sys_sendmsg+0x6c0/0x840 [T15862] ___sys_sendmsg+0x1ac/0x1f0 [T15862] __sys_sendmsg+0x110/0x1d0 [T15862] __arm64_sys_sendmsg+0x74/0xb0 [T15862] invoke_syscall+0x88/0x2e0 [T15862] el0_svc_common.constprop.0+0xe4/0x2a0 [T15862] do_el0_svc+0x44/0x60 [T15862] el0_svc+0x50/0x184 [T15862] el0t_64_sync_handler+0x120/0x12c [T15862] el0t_64_sync+0x190/0x194 [T15862] [T15862] Allocated by task 15857: [T15862] kasan_save_stack+0x3c/0x70 [T15862] kasan_save_track+0x20/0x3c [T15862] kasan_save_alloc_info+0x40/0x60 [T15862] __kasan_kmalloc+0xd4/0xe0 [T15862] __kmalloc_cache_noprof+0x194/0x334 [T15862] taprio_change+0x45c/0x2fe0 [T15862] tc_modify_qdisc+0x6a8/0x1838 [T15862] rtnetlink_rcv_msg+0x3c8/0xc20 [T15862] netlink_rcv_skb+0x1f8/0x3d4 [T15862] rtnetlink_rcv+0x28/0x40 [T15862] netlink_unicast+0x51c/0x790 [T15862] netlink_sendmsg+0x79c/0xc20 [T15862] __sock_sendmsg+0xe0/0x1a0 [T15862] ____sys_sendmsg+0x6c0/0x840 [T15862] ___sys_sendmsg+0x1ac/0x1f0 [T15862] __sys_sendmsg+0x110/0x1d0 [T15862] __arm64_sys_sendmsg+0x74/0xb0 [T15862] invoke_syscall+0x88/0x2e0 [T15862] el0_svc_common.constprop.0+0xe4/0x2a0 [T15862] do_el0_svc+0x44/0x60 [T15862] el0_svc+0x50/0x184 [T15862] el0t_64_sync_handler+0x120/0x12c [T15862] el0t_64_sync+0x190/0x194 [T15862] [T15862] Freed by task 6192: [T15862] kasan_save_stack+0x3c/0x70 [T15862] kasan_save_track+0x20/0x3c [T15862] kasan_save_free_info+0x4c/0x80 [T15862] poison_slab_object+0x110/0x160 [T15862] __kasan_slab_free+0x3c/0x74 [T15862] kfree+0x134/0x3c0 [T15862] taprio_free_sched_cb+0x18c/0x220 [T15862] rcu_core+0x920/0x1b7c [T15862] rcu_core_si+0x10/0x1c [T15862] handle_softirqs+0x2e8/0xd64 [T15862] __do_softirq+0x14/0x20 Fixes: 18cdd2f0998a ("net/sched: taprio: taprio_dump and taprio_change are protected by rtnl_mutex") Acked-by: Vinicius Costa Gomes Signed-off-by: Dmitry Antipov Link: https://patch.msgid.link/20241018051339.418890-2-dmantipov@yandex.ru Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/sched/sch_taprio.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index bc8e55c6d63d..951a87909c29 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -2397,9 +2397,6 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_mqprio_qopt opt = { 0 }; struct nlattr *nest, *sched_nest; - oper = rtnl_dereference(q->oper_sched); - admin = rtnl_dereference(q->admin_sched); - mqprio_qopt_reconstruct(dev, &opt); nest = nla_nest_start_noflag(skb, TCA_OPTIONS); @@ -2420,18 +2417,23 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay)) goto options_error; + rcu_read_lock(); + + oper = rtnl_dereference(q->oper_sched); + admin = rtnl_dereference(q->admin_sched); + if (oper && taprio_dump_tc_entries(skb, q, oper)) - goto options_error; + goto options_error_rcu; if (oper && dump_schedule(skb, oper)) - goto options_error; + goto options_error_rcu; if (!admin) goto done; sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED); if (!sched_nest) - goto options_error; + goto options_error_rcu; if (dump_schedule(skb, admin)) goto admin_error; @@ -2439,11 +2441,15 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) nla_nest_end(skb, sched_nest); done: + rcu_read_unlock(); return nla_nest_end(skb, nest); admin_error: nla_nest_cancel(skb, sched_nest); +options_error_rcu: + rcu_read_unlock(); + options_error: nla_nest_cancel(skb, nest); From 7d6d46b429804b1a182106e27e2f8c0e84689e1a Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 18 Oct 2024 11:08:16 +0200 Subject: [PATCH 154/207] r8169: avoid unsolicited interrupts [ Upstream commit 10ce0db787004875f4dba068ea952207d1d8abeb ] It was reported that after resume from suspend a PCI error is logged and connectivity is broken. Error message is: PCI error (cmd = 0x0407, status_errs = 0x0000) The message seems to be a red herring as none of the error bits is set, and the PCI command register value also is normal. Exception handling for a PCI error includes a chip reset what apparently brakes connectivity here. The interrupt status bit triggering the PCI error handling isn't actually used on PCIe chip versions, so it's not clear why this bit is set by the chip. Fix this by ignoring this bit on PCIe chip versions. Fixes: 0e4851502f84 ("r8169: merge with version 8.001.00 of Realtek's r8168 driver") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219388 Tested-by: Atlas Yu Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Link: https://patch.msgid.link/78e2f535-438f-4212-ad94-a77637ac6c9c@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/realtek/r8169_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index b499d8ea6d21..6856eb602f82 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4576,7 +4576,9 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask)) return IRQ_NONE; - if (unlikely(status & SYSErr)) { + /* At least RTL8168fp may unexpectedly set the SYSErr bit */ + if (unlikely(status & SYSErr && + tp->mac_version <= RTL_GIGA_MAC_VER_06)) { rtl8169_pcierr_interrupt(tp->dev); goto out; } From 1ba33b327c3f88a7baee598979d73ab5b44d41cc Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Fri, 18 Oct 2024 18:07:48 +0800 Subject: [PATCH 155/207] posix-clock: posix-clock: Fix unbalanced locking in pc_clock_settime() [ Upstream commit 6e62807c7fbb3c758d233018caf94dfea9c65dbd ] If get_clock_desc() succeeds, it calls fget() for the clockid's fd, and get the clk->rwsem read lock, so the error path should release the lock to make the lock balance and fput the clockid's fd to make the refcount balance and release the fd related resource. However the below commit left the error path locked behind resulting in unbalanced locking. Check timespec64_valid_strict() before get_clock_desc() to fix it, because the "ts" is not changed after that. Fixes: d8794ac20a29 ("posix-clock: Fix missing timespec64 check in pc_clock_settime()") Acked-by: Richard Cochran Signed-off-by: Jinjie Ruan Acked-by: Anna-Maria Behnsen [pabeni@redhat.com: fixed commit message typo] Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- kernel/time/posix-clock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 8127673bfc45..05e73d209aa8 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -290,6 +290,9 @@ static int pc_clock_settime(clockid_t id, const struct timespec64 *ts) struct posix_clock_desc cd; int err; + if (!timespec64_valid_strict(ts)) + return -EINVAL; + err = get_clock_desc(id, &cd); if (err) return err; @@ -299,9 +302,6 @@ static int pc_clock_settime(clockid_t id, const struct timespec64 *ts) goto out; } - if (!timespec64_valid_strict(ts)) - return -EINVAL; - if (cd.clk->ops.clock_settime) err = cd.clk->ops.clock_settime(cd.clk, ts); else From d30803f6a972b5b9e26d1d43b583c7ec151de04b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 22 Oct 2024 12:31:08 -0400 Subject: [PATCH 156/207] Bluetooth: SCO: Fix UAF on sco_sock_timeout [ Upstream commit 1bf4470a3939c678fb822073e9ea77a0560bc6bb ] conn->sk maybe have been unlinked/freed while waiting for sco_conn_lock so this checks if the conn->sk is still valid by checking if it part of sco_sk_list. Reported-by: syzbot+4c0d0c4cde787116d465@syzkaller.appspotmail.com Tested-by: syzbot+4c0d0c4cde787116d465@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=4c0d0c4cde787116d465 Fixes: ba316be1b6a0 ("Bluetooth: schedule SCO timeouts with delayed_work") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- include/net/bluetooth/bluetooth.h | 1 + net/bluetooth/af_bluetooth.c | 22 ++++++++++++++++++++++ net/bluetooth/sco.c | 18 ++++++++++++------ 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index e4a6831133f8..4763a47bf8c8 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -403,6 +403,7 @@ int bt_sock_register(int proto, const struct net_proto_family *ops); void bt_sock_unregister(int proto); void bt_sock_link(struct bt_sock_list *l, struct sock *s); void bt_sock_unlink(struct bt_sock_list *l, struct sock *s); +bool bt_sock_linked(struct bt_sock_list *l, struct sock *s); struct sock *bt_sock_alloc(struct net *net, struct socket *sock, struct proto *prot, int proto, gfp_t prio, int kern); int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index e39fba5565c5..0b4d0a8bd361 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -185,6 +185,28 @@ void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk) } EXPORT_SYMBOL(bt_sock_unlink); +bool bt_sock_linked(struct bt_sock_list *l, struct sock *s) +{ + struct sock *sk; + + if (!l || !s) + return false; + + read_lock(&l->lock); + + sk_for_each(sk, &l->head) { + if (s == sk) { + read_unlock(&l->lock); + return true; + } + } + + read_unlock(&l->lock); + + return false; +} +EXPORT_SYMBOL(bt_sock_linked); + void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) { const struct cred *old_cred; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 3c3650902c83..fb368540139a 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -76,6 +76,16 @@ struct sco_pinfo { #define SCO_CONN_TIMEOUT (HZ * 40) #define SCO_DISCONN_TIMEOUT (HZ * 2) +static struct sock *sco_sock_hold(struct sco_conn *conn) +{ + if (!conn || !bt_sock_linked(&sco_sk_list, conn->sk)) + return NULL; + + sock_hold(conn->sk); + + return conn->sk; +} + static void sco_sock_timeout(struct work_struct *work) { struct sco_conn *conn = container_of(work, struct sco_conn, @@ -87,9 +97,7 @@ static void sco_sock_timeout(struct work_struct *work) sco_conn_unlock(conn); return; } - sk = conn->sk; - if (sk) - sock_hold(sk); + sk = sco_sock_hold(conn); sco_conn_unlock(conn); if (!sk) @@ -194,9 +202,7 @@ static void sco_conn_del(struct hci_conn *hcon, int err) /* Kill socket */ sco_conn_lock(conn); - sk = conn->sk; - if (sk) - sock_hold(sk); + sk = sco_sock_hold(conn); sco_conn_unlock(conn); if (sk) { From 14bcb721d241e62fdd18f6f434a2ed2ab6e71a9b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 22 Oct 2024 15:35:49 -0400 Subject: [PATCH 157/207] Bluetooth: ISO: Fix UAF on iso_sock_timeout [ Upstream commit 246b435ad668596aa0e2bbb9d491b6413861211a ] conn->sk maybe have been unlinked/freed while waiting for iso_conn_lock so this checks if the conn->sk is still valid by checking if it part of iso_sk_list. Fixes: ccf74f2390d6 ("Bluetooth: Add BTPROTO_ISO socket type") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/iso.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 9b365fb44fac..c2c80d600083 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -90,6 +90,16 @@ static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst, #define ISO_CONN_TIMEOUT (HZ * 40) #define ISO_DISCONN_TIMEOUT (HZ * 2) +static struct sock *iso_sock_hold(struct iso_conn *conn) +{ + if (!conn || !bt_sock_linked(&iso_sk_list, conn->sk)) + return NULL; + + sock_hold(conn->sk); + + return conn->sk; +} + static void iso_sock_timeout(struct work_struct *work) { struct iso_conn *conn = container_of(work, struct iso_conn, @@ -97,9 +107,7 @@ static void iso_sock_timeout(struct work_struct *work) struct sock *sk; iso_conn_lock(conn); - sk = conn->sk; - if (sk) - sock_hold(sk); + sk = iso_sock_hold(conn); iso_conn_unlock(conn); if (!sk) @@ -217,9 +225,7 @@ static void iso_conn_del(struct hci_conn *hcon, int err) /* Kill socket */ iso_conn_lock(conn); - sk = conn->sk; - if (sk) - sock_hold(sk); + sk = iso_sock_hold(conn); iso_conn_unlock(conn); if (sk) { From 21db2f35fa97e4a3447f2edeb7b2569a8bfdc83b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 23 Oct 2024 22:03:52 +0200 Subject: [PATCH 158/207] bpf,perf: Fix perf_event_detach_bpf_prog error handling [ Upstream commit 0ee288e69d033850bc87abe0f9cc3ada24763d7f ] Peter reported that perf_event_detach_bpf_prog might skip to release the bpf program for -ENOENT error from bpf_prog_array_copy. This can't happen because bpf program is stored in perf event and is detached and released only when perf event is freed. Let's drop the -ENOENT check and make sure the bpf program is released in any case. Fixes: 170a7e3ea070 ("bpf: bpf_prog_array_copy() should return -ENOENT if exclude_prog not found") Reported-by: Peter Zijlstra Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241023200352.3488610-1-jolsa@kernel.org Closes: https://lore.kernel.org/lkml/20241022111638.GC16066@noisy.programming.kicks-ass.net/ Signed-off-by: Sasha Levin --- kernel/trace/bpf_trace.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ecc86a595b75..9064f75de7e4 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2217,8 +2217,6 @@ void perf_event_detach_bpf_prog(struct perf_event *event) old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array); - if (ret == -ENOENT) - goto unlock; if (ret < 0) { bpf_prog_array_delete_safe(old_array, event->prog); } else { From 4dc655d86b54410cbf8ecc6a10521f856296e552 Mon Sep 17 00:00:00 2001 From: Shenghao Yang Date: Sun, 20 Oct 2024 14:38:28 +0800 Subject: [PATCH 159/207] net: dsa: mv88e6xxx: group cycle counter coefficients [ Upstream commit 67af86afff74c914944374a103c04e4d9868dd15 ] Instead of having them as individual fields in ptp_ops, wrap the coefficients in a separate struct so they can be referenced together. Fixes: de776d0d316f ("net: dsa: mv88e6xxx: add support for mv88e6393x family") Signed-off-by: Shenghao Yang Reviewed-by: Andrew Lunn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6xxx/chip.h | 6 ++-- drivers/net/dsa/mv88e6xxx/ptp.c | 59 ++++++++++++++++---------------- 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index f48a3c0ac7f9..bfc3a1040ccc 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -206,6 +206,7 @@ struct mv88e6xxx_gpio_ops; struct mv88e6xxx_avb_ops; struct mv88e6xxx_ptp_ops; struct mv88e6xxx_pcs_ops; +struct mv88e6xxx_cc_coeffs; struct mv88e6xxx_irq { u16 masked; @@ -719,10 +720,7 @@ struct mv88e6xxx_ptp_ops { int arr1_sts_reg; int dep_sts_reg; u32 rx_filters; - u32 cc_shift; - u32 cc_mult; - u32 cc_mult_num; - u32 cc_mult_dem; + const struct mv88e6xxx_cc_coeffs *cc_coeffs; }; struct mv88e6xxx_pcs_ops { diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c index ea17231dc34e..4d5498dac153 100644 --- a/drivers/net/dsa/mv88e6xxx/ptp.c +++ b/drivers/net/dsa/mv88e6xxx/ptp.c @@ -18,6 +18,13 @@ #define MV88E6XXX_MAX_ADJ_PPB 1000000 +struct mv88e6xxx_cc_coeffs { + u32 cc_shift; + u32 cc_mult; + u32 cc_mult_num; + u32 cc_mult_dem; +}; + /* Family MV88E6250: * Raw timestamps are in units of 10-ns clock periods. * @@ -25,10 +32,13 @@ * simplifies to * clkadj = scaled_ppm * 2^7 / 5^5 */ -#define MV88E6250_CC_SHIFT 28 -#define MV88E6250_CC_MULT (10 << MV88E6250_CC_SHIFT) -#define MV88E6250_CC_MULT_NUM (1 << 7) -#define MV88E6250_CC_MULT_DEM 3125ULL +#define MV88E6250_CC_SHIFT 28 +static const struct mv88e6xxx_cc_coeffs mv88e6250_cc_coeffs = { + .cc_shift = MV88E6250_CC_SHIFT, + .cc_mult = 10 << MV88E6250_CC_SHIFT, + .cc_mult_num = 1 << 7, + .cc_mult_dem = 3125ULL, +}; /* Other families: * Raw timestamps are in units of 8-ns clock periods. @@ -37,10 +47,13 @@ * simplifies to * clkadj = scaled_ppm * 2^9 / 5^6 */ -#define MV88E6XXX_CC_SHIFT 28 -#define MV88E6XXX_CC_MULT (8 << MV88E6XXX_CC_SHIFT) -#define MV88E6XXX_CC_MULT_NUM (1 << 9) -#define MV88E6XXX_CC_MULT_DEM 15625ULL +#define MV88E6XXX_CC_SHIFT 28 +static const struct mv88e6xxx_cc_coeffs mv88e6xxx_cc_coeffs = { + .cc_shift = MV88E6XXX_CC_SHIFT, + .cc_mult = 8 << MV88E6XXX_CC_SHIFT, + .cc_mult_num = 1 << 9, + .cc_mult_dem = 15625ULL +}; #define TAI_EVENT_WORK_INTERVAL msecs_to_jiffies(100) @@ -210,10 +223,10 @@ static int mv88e6xxx_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) scaled_ppm = -scaled_ppm; } - mult = ptp_ops->cc_mult; - adj = ptp_ops->cc_mult_num; + mult = ptp_ops->cc_coeffs->cc_mult; + adj = ptp_ops->cc_coeffs->cc_mult_num; adj *= scaled_ppm; - diff = div_u64(adj, ptp_ops->cc_mult_dem); + diff = div_u64(adj, ptp_ops->cc_coeffs->cc_mult_dem); mv88e6xxx_reg_lock(chip); @@ -360,10 +373,7 @@ const struct mv88e6xxx_ptp_ops mv88e6165_ptp_ops = { (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) | (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) | (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ), - .cc_shift = MV88E6XXX_CC_SHIFT, - .cc_mult = MV88E6XXX_CC_MULT, - .cc_mult_num = MV88E6XXX_CC_MULT_NUM, - .cc_mult_dem = MV88E6XXX_CC_MULT_DEM, + .cc_coeffs = &mv88e6xxx_cc_coeffs }; const struct mv88e6xxx_ptp_ops mv88e6250_ptp_ops = { @@ -387,10 +397,7 @@ const struct mv88e6xxx_ptp_ops mv88e6250_ptp_ops = { (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) | (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) | (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ), - .cc_shift = MV88E6250_CC_SHIFT, - .cc_mult = MV88E6250_CC_MULT, - .cc_mult_num = MV88E6250_CC_MULT_NUM, - .cc_mult_dem = MV88E6250_CC_MULT_DEM, + .cc_coeffs = &mv88e6250_cc_coeffs, }; const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops = { @@ -414,10 +421,7 @@ const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops = { (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) | (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) | (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ), - .cc_shift = MV88E6XXX_CC_SHIFT, - .cc_mult = MV88E6XXX_CC_MULT, - .cc_mult_num = MV88E6XXX_CC_MULT_NUM, - .cc_mult_dem = MV88E6XXX_CC_MULT_DEM, + .cc_coeffs = &mv88e6xxx_cc_coeffs, }; const struct mv88e6xxx_ptp_ops mv88e6390_ptp_ops = { @@ -442,10 +446,7 @@ const struct mv88e6xxx_ptp_ops mv88e6390_ptp_ops = { (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) | (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) | (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ), - .cc_shift = MV88E6XXX_CC_SHIFT, - .cc_mult = MV88E6XXX_CC_MULT, - .cc_mult_num = MV88E6XXX_CC_MULT_NUM, - .cc_mult_dem = MV88E6XXX_CC_MULT_DEM, + .cc_coeffs = &mv88e6xxx_cc_coeffs, }; static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc) @@ -483,8 +484,8 @@ int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip) memset(&chip->tstamp_cc, 0, sizeof(chip->tstamp_cc)); chip->tstamp_cc.read = mv88e6xxx_ptp_clock_read; chip->tstamp_cc.mask = CYCLECOUNTER_MASK(32); - chip->tstamp_cc.mult = ptp_ops->cc_mult; - chip->tstamp_cc.shift = ptp_ops->cc_shift; + chip->tstamp_cc.mult = ptp_ops->cc_coeffs->cc_mult; + chip->tstamp_cc.shift = ptp_ops->cc_coeffs->cc_shift; timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc, ktime_to_ns(ktime_get_real())); From 06b1c8091542f35925b4fd0a5c1c15712e11f1f7 Mon Sep 17 00:00:00 2001 From: Shenghao Yang Date: Sun, 20 Oct 2024 14:38:29 +0800 Subject: [PATCH 160/207] net: dsa: mv88e6xxx: read cycle counter period from hardware [ Upstream commit 7e3c18097a709e9b958e721066e5fe76e563739b ] Instead of relying on a fixed mapping of hardware family to cycle counter frequency, pull this information from the MV88E6XXX_TAI_CLOCK_PERIOD register. This lets us support switches whose cycle counter frequencies depend on board design. Fixes: de776d0d316f ("net: dsa: mv88e6xxx: add support for mv88e6393x family") Suggested-by: Andrew Lunn Signed-off-by: Shenghao Yang Reviewed-by: Andrew Lunn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6xxx/chip.h | 2 +- drivers/net/dsa/mv88e6xxx/ptp.c | 60 ++++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index bfc3a1040ccc..f02518e93b60 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -398,6 +398,7 @@ struct mv88e6xxx_chip { struct cyclecounter tstamp_cc; struct timecounter tstamp_tc; struct delayed_work overflow_work; + const struct mv88e6xxx_cc_coeffs *cc_coeffs; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_clock_info; @@ -720,7 +721,6 @@ struct mv88e6xxx_ptp_ops { int arr1_sts_reg; int dep_sts_reg; u32 rx_filters; - const struct mv88e6xxx_cc_coeffs *cc_coeffs; }; struct mv88e6xxx_pcs_ops { diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c index 4d5498dac153..a62b4ce7ff61 100644 --- a/drivers/net/dsa/mv88e6xxx/ptp.c +++ b/drivers/net/dsa/mv88e6xxx/ptp.c @@ -32,10 +32,10 @@ struct mv88e6xxx_cc_coeffs { * simplifies to * clkadj = scaled_ppm * 2^7 / 5^5 */ -#define MV88E6250_CC_SHIFT 28 -static const struct mv88e6xxx_cc_coeffs mv88e6250_cc_coeffs = { - .cc_shift = MV88E6250_CC_SHIFT, - .cc_mult = 10 << MV88E6250_CC_SHIFT, +#define MV88E6XXX_CC_10NS_SHIFT 28 +static const struct mv88e6xxx_cc_coeffs mv88e6xxx_cc_10ns_coeffs = { + .cc_shift = MV88E6XXX_CC_10NS_SHIFT, + .cc_mult = 10 << MV88E6XXX_CC_10NS_SHIFT, .cc_mult_num = 1 << 7, .cc_mult_dem = 3125ULL, }; @@ -47,10 +47,10 @@ static const struct mv88e6xxx_cc_coeffs mv88e6250_cc_coeffs = { * simplifies to * clkadj = scaled_ppm * 2^9 / 5^6 */ -#define MV88E6XXX_CC_SHIFT 28 -static const struct mv88e6xxx_cc_coeffs mv88e6xxx_cc_coeffs = { - .cc_shift = MV88E6XXX_CC_SHIFT, - .cc_mult = 8 << MV88E6XXX_CC_SHIFT, +#define MV88E6XXX_CC_8NS_SHIFT 28 +static const struct mv88e6xxx_cc_coeffs mv88e6xxx_cc_8ns_coeffs = { + .cc_shift = MV88E6XXX_CC_8NS_SHIFT, + .cc_mult = 8 << MV88E6XXX_CC_8NS_SHIFT, .cc_mult_num = 1 << 9, .cc_mult_dem = 15625ULL }; @@ -96,6 +96,31 @@ static int mv88e6352_set_gpio_func(struct mv88e6xxx_chip *chip, int pin, return chip->info->ops->gpio_ops->set_pctl(chip, pin, func); } +static const struct mv88e6xxx_cc_coeffs * +mv88e6xxx_cc_coeff_get(struct mv88e6xxx_chip *chip) +{ + u16 period_ps; + int err; + + err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_CLOCK_PERIOD, &period_ps, 1); + if (err) { + dev_err(chip->dev, "failed to read cycle counter period: %d\n", + err); + return ERR_PTR(err); + } + + switch (period_ps) { + case 8000: + return &mv88e6xxx_cc_8ns_coeffs; + case 10000: + return &mv88e6xxx_cc_10ns_coeffs; + default: + dev_err(chip->dev, "unexpected cycle counter period of %u ps\n", + period_ps); + return ERR_PTR(-ENODEV); + } +} + static u64 mv88e6352_ptp_clock_read(const struct cyclecounter *cc) { struct mv88e6xxx_chip *chip = cc_to_chip(cc); @@ -213,7 +238,6 @@ out: static int mv88e6xxx_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) { struct mv88e6xxx_chip *chip = ptp_to_chip(ptp); - const struct mv88e6xxx_ptp_ops *ptp_ops = chip->info->ops->ptp_ops; int neg_adj = 0; u32 diff, mult; u64 adj; @@ -223,10 +247,10 @@ static int mv88e6xxx_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) scaled_ppm = -scaled_ppm; } - mult = ptp_ops->cc_coeffs->cc_mult; - adj = ptp_ops->cc_coeffs->cc_mult_num; + mult = chip->cc_coeffs->cc_mult; + adj = chip->cc_coeffs->cc_mult_num; adj *= scaled_ppm; - diff = div_u64(adj, ptp_ops->cc_coeffs->cc_mult_dem); + diff = div_u64(adj, chip->cc_coeffs->cc_mult_dem); mv88e6xxx_reg_lock(chip); @@ -373,7 +397,6 @@ const struct mv88e6xxx_ptp_ops mv88e6165_ptp_ops = { (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) | (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) | (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ), - .cc_coeffs = &mv88e6xxx_cc_coeffs }; const struct mv88e6xxx_ptp_ops mv88e6250_ptp_ops = { @@ -397,7 +420,6 @@ const struct mv88e6xxx_ptp_ops mv88e6250_ptp_ops = { (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) | (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) | (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ), - .cc_coeffs = &mv88e6250_cc_coeffs, }; const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops = { @@ -421,7 +443,6 @@ const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops = { (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) | (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) | (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ), - .cc_coeffs = &mv88e6xxx_cc_coeffs, }; const struct mv88e6xxx_ptp_ops mv88e6390_ptp_ops = { @@ -446,7 +467,6 @@ const struct mv88e6xxx_ptp_ops mv88e6390_ptp_ops = { (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) | (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) | (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ), - .cc_coeffs = &mv88e6xxx_cc_coeffs, }; static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc) @@ -481,11 +501,15 @@ int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip) int i; /* Set up the cycle counter */ + chip->cc_coeffs = mv88e6xxx_cc_coeff_get(chip); + if (IS_ERR(chip->cc_coeffs)) + return PTR_ERR(chip->cc_coeffs); + memset(&chip->tstamp_cc, 0, sizeof(chip->tstamp_cc)); chip->tstamp_cc.read = mv88e6xxx_ptp_clock_read; chip->tstamp_cc.mask = CYCLECOUNTER_MASK(32); - chip->tstamp_cc.mult = ptp_ops->cc_coeffs->cc_mult; - chip->tstamp_cc.shift = ptp_ops->cc_coeffs->cc_shift; + chip->tstamp_cc.mult = chip->cc_coeffs->cc_mult; + chip->tstamp_cc.shift = chip->cc_coeffs->cc_shift; timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc, ktime_to_ns(ktime_get_real())); From 773dc610ca6458ee97fe1368b63c6a8794a92da0 Mon Sep 17 00:00:00 2001 From: Shenghao Yang Date: Sun, 20 Oct 2024 14:38:30 +0800 Subject: [PATCH 161/207] net: dsa: mv88e6xxx: support 4000ps cycle counter period [ Upstream commit 3e65ede526cf4f95636dbc835598d100c7668ab3 ] The MV88E6393X family of devices can run its cycle counter off an internal 250MHz clock instead of an external 125MHz one. Add support for this cycle counter period by adding another set of coefficients and lowering the periodic cycle counter read interval to compensate for faster overflows at the increased frequency. Otherwise, the PHC runs at 2x real time in userspace and cannot be synchronized. Fixes: de776d0d316f ("net: dsa: mv88e6xxx: add support for mv88e6393x family") Signed-off-by: Shenghao Yang Reviewed-by: Andrew Lunn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6xxx/ptp.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c index a62b4ce7ff61..5980bb4ce43e 100644 --- a/drivers/net/dsa/mv88e6xxx/ptp.c +++ b/drivers/net/dsa/mv88e6xxx/ptp.c @@ -40,7 +40,7 @@ static const struct mv88e6xxx_cc_coeffs mv88e6xxx_cc_10ns_coeffs = { .cc_mult_dem = 3125ULL, }; -/* Other families: +/* Other families except MV88E6393X in internal clock mode: * Raw timestamps are in units of 8-ns clock periods. * * clkadj = scaled_ppm * 8*2^28 / (10^6 * 2^16) @@ -55,6 +55,21 @@ static const struct mv88e6xxx_cc_coeffs mv88e6xxx_cc_8ns_coeffs = { .cc_mult_dem = 15625ULL }; +/* Family MV88E6393X using internal clock: + * Raw timestamps are in units of 4-ns clock periods. + * + * clkadj = scaled_ppm * 4*2^28 / (10^6 * 2^16) + * simplifies to + * clkadj = scaled_ppm * 2^8 / 5^6 + */ +#define MV88E6XXX_CC_4NS_SHIFT 28 +static const struct mv88e6xxx_cc_coeffs mv88e6xxx_cc_4ns_coeffs = { + .cc_shift = MV88E6XXX_CC_4NS_SHIFT, + .cc_mult = 4 << MV88E6XXX_CC_4NS_SHIFT, + .cc_mult_num = 1 << 8, + .cc_mult_dem = 15625ULL +}; + #define TAI_EVENT_WORK_INTERVAL msecs_to_jiffies(100) #define cc_to_chip(cc) container_of(cc, struct mv88e6xxx_chip, tstamp_cc) @@ -110,6 +125,8 @@ mv88e6xxx_cc_coeff_get(struct mv88e6xxx_chip *chip) } switch (period_ps) { + case 4000: + return &mv88e6xxx_cc_4ns_coeffs; case 8000: return &mv88e6xxx_cc_8ns_coeffs; case 10000: @@ -479,10 +496,10 @@ static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc) return 0; } -/* With a 125MHz input clock, the 32-bit timestamp counter overflows in ~34.3 +/* With a 250MHz input clock, the 32-bit timestamp counter overflows in ~17.2 * seconds; this task forces periodic reads so that we don't miss any. */ -#define MV88E6XXX_TAI_OVERFLOW_PERIOD (HZ * 16) +#define MV88E6XXX_TAI_OVERFLOW_PERIOD (HZ * 8) static void mv88e6xxx_ptp_overflow_check(struct work_struct *work) { struct delayed_work *dw = to_delayed_work(work); From 5e3583b4c000e62fdd746042705891a69f30cf25 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 1 Oct 2024 22:47:49 +0200 Subject: [PATCH 162/207] ASoC: dt-bindings: davinci-mcasp: Fix interrupts property [ Upstream commit 17d8adc4cd5181c13c1041b197b76efc09eaf8a8 ] My understanding of the interrupts property is that it can either be: 1/ - TX 2/ - TX - RX 3/ - Common/combined. There are very little chances that either: - TX - Common/combined or even - TX - RX - Common/combined could be a thing. Looking at the interrupt-names definition (which uses oneOf instead of anyOf), it makes indeed little sense to use anyOf in the interrupts definition. I believe this is just a mistake, hence let's fix it. Fixes: 8be90641a0bb ("ASoC: dt-bindings: davinci-mcasp: convert McASP bindings to yaml schema") Signed-off-by: Miquel Raynal Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20241001204749.390054-1-miquel.raynal@bootlin.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- .../devicetree/bindings/sound/davinci-mcasp-audio.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.yaml b/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.yaml index 7735e08d35ba..ab3206ffa4af 100644 --- a/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.yaml +++ b/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.yaml @@ -102,7 +102,7 @@ properties: default: 2 interrupts: - anyOf: + oneOf: - minItems: 1 items: - description: TX interrupt From f10cba3f761bbaed5dbce5ad256fea3516fee80d Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Oct 2024 10:36:11 +0200 Subject: [PATCH 163/207] ASoC: dt-bindings: davinci-mcasp: Fix interrupt properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8380dbf1b9ef66e3ce6c1d660fd7259637c2a929 ] Combinations of "tx" alone, "rx" alone and "tx", "rx" together are supposedly valid (see link below), which is not the case today as "rx" alone is not accepted by the current binding. Let's rework the two interrupt properties to expose all correct possibilities. Cc: Péter Ujfalusi Link: https://lore.kernel.org/linux-sound/20241003102552.2c11840e@xps-13/T/#m277fce1d49c50d94e071f7890aed472fa2c64052 Fixes: 8be90641a0bb ("ASoC: dt-bindings: davinci-mcasp: convert McASP bindings to yaml schema") Signed-off-by: Miquel Raynal Acked-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20241003083611.461894-1-miquel.raynal@bootlin.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- .../bindings/sound/davinci-mcasp-audio.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.yaml b/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.yaml index ab3206ffa4af..beef193aaaeb 100644 --- a/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.yaml +++ b/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.yaml @@ -102,21 +102,21 @@ properties: default: 2 interrupts: - oneOf: - - minItems: 1 - items: - - description: TX interrupt - - description: RX interrupt - - items: - - description: common/combined interrupt + minItems: 1 + maxItems: 2 interrupt-names: oneOf: - - minItems: 1 + - description: TX interrupt + const: tx + - description: RX interrupt + const: rx + - description: TX and RX interrupts items: - const: tx - const: rx - - const: common + - description: Common/combined interrupt + const: common fck_parent: $ref: /schemas/types.yaml#/definitions/string From 6e4aed4acc1adb11fb800aaf698a18504764ebb7 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Wed, 9 Oct 2024 15:52:27 +0800 Subject: [PATCH 164/207] ASoC: loongson: Fix component check failed on FDT systems [ Upstream commit a6134e7b4d4a14e0942f113a6df1d518baa2a0a4 ] Add missing snd_soc_dai_link.platforms assignment to avoid soc_dai_link_sanity_check() failure. Fixes: d24028606e76 ("ASoC: loongson: Add Loongson ASoC Sound Card Support") Signed-off-by: Binbin Zhou Link: https://patch.msgid.link/6645888f2f9e8a1d8d799109f867d0f97fd78c58.1728459624.git.zhoubinbin@loongson.cn Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/loongson/loongson_card.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/loongson/loongson_card.c b/sound/soc/loongson/loongson_card.c index 8cc54aedd002..010e959d4c69 100644 --- a/sound/soc/loongson/loongson_card.c +++ b/sound/soc/loongson/loongson_card.c @@ -137,6 +137,7 @@ static int loongson_card_parse_of(struct loongson_card_data *data) dev_err(dev, "getting cpu dlc error (%d)\n", ret); goto err; } + loongson_dai_links[i].platforms->of_node = loongson_dai_links[i].cpus->of_node; ret = snd_soc_of_get_dlc(codec, NULL, loongson_dai_links[i].codecs, 0); if (ret < 0) { From 83a420e965f65a5a40646df1e1c071c020bef973 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 10 Oct 2024 19:20:32 +0100 Subject: [PATCH 165/207] ASoC: max98388: Fix missing increment of variable slot_found [ Upstream commit ca2803fadfd239abf155ef4a563b22a9507ee4b2 ] The variable slot_found is being initialized to zero and inside a for-loop is being checked if it's reached MAX_NUM_CH, however, this is currently impossible since slot_found is never changed. In a previous loop a similar coding pattern is used and slot_found is being incremented. It appears the increment of slot_found is missing from the loop, so fix the code by adding in the increment. Fixes: 6a8e1d46f062 ("ASoC: max98388: add amplifier driver") Signed-off-by: Colin Ian King Link: https://patch.msgid.link/20241010182032.776280-1-colin.i.king@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/max98388.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/max98388.c b/sound/soc/codecs/max98388.c index cde5e85946cb..87386404129d 100644 --- a/sound/soc/codecs/max98388.c +++ b/sound/soc/codecs/max98388.c @@ -764,6 +764,7 @@ static int max98388_dai_tdm_slot(struct snd_soc_dai *dai, addr = MAX98388_R2044_PCM_TX_CTRL1 + (cnt / 8); bits = cnt % 8; regmap_update_bits(max98388->regmap, addr, bits, bits); + slot_found++; if (slot_found >= MAX_NUM_CH) break; } From f9ba85f823e75ae952f8b94891832e59fcbf9cac Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Thu, 10 Oct 2024 15:14:32 +0100 Subject: [PATCH 166/207] ASoC: rsnd: Fix probe failure on HiHope boards due to endpoint parsing [ Upstream commit 9b064d200aa8fee9d1d7ced05d8a617e45966715 ] On the HiHope boards, we have a single port with a single endpoint defined as below: .... rsnd_port: port { rsnd_endpoint: endpoint { remote-endpoint = <&dw_hdmi0_snd_in>; dai-format = "i2s"; bitclock-master = <&rsnd_endpoint>; frame-master = <&rsnd_endpoint>; playback = <&ssi2>; }; }; .... With commit 547b02f74e4a ("ASoC: rsnd: enable multi Component support for Audio Graph Card/Card2"), support for multiple ports was added. This caused probe failures on HiHope boards, as the endpoint could not be retrieved due to incorrect device node pointers being used. This patch fixes the issue by updating the `rsnd_dai_of_node()` and `rsnd_dai_probe()` functions to use the correct device node pointers based on the port names ('port' or 'ports'). It ensures that the endpoint is properly parsed for both single and multi-port configurations, restoring compatibility with HiHope boards. Fixes: 547b02f74e4a ("ASoC: rsnd: enable multi Component support for Audio Graph Card/Card2") Signed-off-by: Lad Prabhakar Acked-by: Kuninori Morimoto Link: https://patch.msgid.link/20241010141432.716868-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sh/rcar/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 1bd7114c472a..98c7be340a53 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1297,7 +1297,9 @@ audio_graph: if (!of_node_name_eq(ports, "ports") && !of_node_name_eq(ports, "port")) continue; - priv->component_dais[i] = of_graph_get_endpoint_count(ports); + priv->component_dais[i] = + of_graph_get_endpoint_count(of_node_name_eq(ports, "ports") ? + ports : np); nr += priv->component_dais[i]; i++; if (i >= RSND_MAX_COMPONENT) { @@ -1510,7 +1512,8 @@ static int rsnd_dai_probe(struct rsnd_priv *priv) if (!of_node_name_eq(ports, "ports") && !of_node_name_eq(ports, "port")) continue; - for_each_endpoint_of_node(ports, dai_np) { + for_each_endpoint_of_node(of_node_name_eq(ports, "ports") ? + ports : np, dai_np) { __rsnd_dai_probe(priv, dai_np, dai_np, 0, dai_i); if (rsnd_is_gen3(priv) || rsnd_is_gen4(priv)) { rdai = rsnd_rdai_get(priv, dai_i); From 8b339beb7cfe8e945e45f9384927ed05fb8d180d Mon Sep 17 00:00:00 2001 From: Chancel Liu Date: Thu, 17 Oct 2024 16:15:07 +0900 Subject: [PATCH 167/207] ASoC: fsl_micfil: Add a flag to distinguish with different volume control types [ Upstream commit da95e891dd5d5de6c5ebc010bd028a2e028de093 ] On i.MX8MM the register of volume control has positive and negative values. It is different from other platforms like i.MX8MP and i.MX93 which only have positive values. Add a volume_sx flag to use SX_TLV volume control for this kind of platform. Use common TLV volume control for other platforms. Fixes: cdfa92eb90f5 ("ASoC: fsl_micfil: Correct the number of steps on SX controls") Signed-off-by: Chancel Liu Reviewed-by: Daniel Baluta Link: https://patch.msgid.link/20241017071507.2577786-1-chancel.liu@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/fsl_micfil.c | 43 +++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index 0d37edb70261..9407179af5d5 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -67,6 +67,7 @@ struct fsl_micfil_soc_data { bool imx; bool use_edma; bool use_verid; + bool volume_sx; u64 formats; }; @@ -76,6 +77,7 @@ static struct fsl_micfil_soc_data fsl_micfil_imx8mm = { .fifo_depth = 8, .dataline = 0xf, .formats = SNDRV_PCM_FMTBIT_S16_LE, + .volume_sx = true, }; static struct fsl_micfil_soc_data fsl_micfil_imx8mp = { @@ -84,6 +86,7 @@ static struct fsl_micfil_soc_data fsl_micfil_imx8mp = { .fifo_depth = 32, .dataline = 0xf, .formats = SNDRV_PCM_FMTBIT_S32_LE, + .volume_sx = false, }; static struct fsl_micfil_soc_data fsl_micfil_imx93 = { @@ -94,6 +97,7 @@ static struct fsl_micfil_soc_data fsl_micfil_imx93 = { .formats = SNDRV_PCM_FMTBIT_S32_LE, .use_edma = true, .use_verid = true, + .volume_sx = false, }; static const struct of_device_id fsl_micfil_dt_ids[] = { @@ -317,7 +321,26 @@ static int hwvad_detected(struct snd_kcontrol *kcontrol, return 0; } -static const struct snd_kcontrol_new fsl_micfil_snd_controls[] = { +static const struct snd_kcontrol_new fsl_micfil_volume_controls[] = { + SOC_SINGLE_TLV("CH0 Volume", REG_MICFIL_OUT_CTRL, + MICFIL_OUTGAIN_CHX_SHIFT(0), 0xF, 0, gain_tlv), + SOC_SINGLE_TLV("CH1 Volume", REG_MICFIL_OUT_CTRL, + MICFIL_OUTGAIN_CHX_SHIFT(1), 0xF, 0, gain_tlv), + SOC_SINGLE_TLV("CH2 Volume", REG_MICFIL_OUT_CTRL, + MICFIL_OUTGAIN_CHX_SHIFT(2), 0xF, 0, gain_tlv), + SOC_SINGLE_TLV("CH3 Volume", REG_MICFIL_OUT_CTRL, + MICFIL_OUTGAIN_CHX_SHIFT(3), 0xF, 0, gain_tlv), + SOC_SINGLE_TLV("CH4 Volume", REG_MICFIL_OUT_CTRL, + MICFIL_OUTGAIN_CHX_SHIFT(4), 0xF, 0, gain_tlv), + SOC_SINGLE_TLV("CH5 Volume", REG_MICFIL_OUT_CTRL, + MICFIL_OUTGAIN_CHX_SHIFT(5), 0xF, 0, gain_tlv), + SOC_SINGLE_TLV("CH6 Volume", REG_MICFIL_OUT_CTRL, + MICFIL_OUTGAIN_CHX_SHIFT(6), 0xF, 0, gain_tlv), + SOC_SINGLE_TLV("CH7 Volume", REG_MICFIL_OUT_CTRL, + MICFIL_OUTGAIN_CHX_SHIFT(7), 0xF, 0, gain_tlv), +}; + +static const struct snd_kcontrol_new fsl_micfil_volume_sx_controls[] = { SOC_SINGLE_SX_TLV("CH0 Volume", REG_MICFIL_OUT_CTRL, MICFIL_OUTGAIN_CHX_SHIFT(0), 0x8, 0xF, gain_tlv), SOC_SINGLE_SX_TLV("CH1 Volume", REG_MICFIL_OUT_CTRL, @@ -334,6 +357,9 @@ static const struct snd_kcontrol_new fsl_micfil_snd_controls[] = { MICFIL_OUTGAIN_CHX_SHIFT(6), 0x8, 0xF, gain_tlv), SOC_SINGLE_SX_TLV("CH7 Volume", REG_MICFIL_OUT_CTRL, MICFIL_OUTGAIN_CHX_SHIFT(7), 0x8, 0xF, gain_tlv), +}; + +static const struct snd_kcontrol_new fsl_micfil_snd_controls[] = { SOC_ENUM_EXT("MICFIL Quality Select", fsl_micfil_quality_enum, micfil_quality_get, micfil_quality_set), @@ -801,6 +827,20 @@ static int fsl_micfil_dai_probe(struct snd_soc_dai *cpu_dai) return 0; } +static int fsl_micfil_component_probe(struct snd_soc_component *component) +{ + struct fsl_micfil *micfil = snd_soc_component_get_drvdata(component); + + if (micfil->soc->volume_sx) + snd_soc_add_component_controls(component, fsl_micfil_volume_sx_controls, + ARRAY_SIZE(fsl_micfil_volume_sx_controls)); + else + snd_soc_add_component_controls(component, fsl_micfil_volume_controls, + ARRAY_SIZE(fsl_micfil_volume_controls)); + + return 0; +} + static const struct snd_soc_dai_ops fsl_micfil_dai_ops = { .probe = fsl_micfil_dai_probe, .startup = fsl_micfil_startup, @@ -821,6 +861,7 @@ static struct snd_soc_dai_driver fsl_micfil_dai = { static const struct snd_soc_component_driver fsl_micfil_component = { .name = "fsl-micfil-dai", + .probe = fsl_micfil_component_probe, .controls = fsl_micfil_snd_controls, .num_controls = ARRAY_SIZE(fsl_micfil_snd_controls), .legacy_dai_naming = 1, From 4bdc21506f12b2d432b1f2667e5ff4c75eee58e3 Mon Sep 17 00:00:00 2001 From: Andrey Shumilin Date: Fri, 18 Oct 2024 09:00:18 +0300 Subject: [PATCH 168/207] ALSA: firewire-lib: Avoid division by zero in apply_constraint_to_size() [ Upstream commit 72cafe63b35d06b5cfbaf807e90ae657907858da ] The step variable is initialized to zero. It is changed in the loop, but if it's not changed it will remain zero. Add a variable check before the division. The observed behavior was introduced by commit 826b5de90c0b ("ALSA: firewire-lib: fix insufficient PCM rule for period/buffer size"), and it is difficult to show that any of the interval parameters will satisfy the snd_interval_test() condition with data from the amdtp_rate_table[] table. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 826b5de90c0b ("ALSA: firewire-lib: fix insufficient PCM rule for period/buffer size") Signed-off-by: Andrey Shumilin Reviewed-by: Takashi Sakamoto Link: https://patch.msgid.link/20241018060018.1189537-1-shum.sdl@nppct.ru Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/firewire/amdtp-stream.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index 5f0f8d9c08d1..8c6254ff143e 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -172,6 +172,9 @@ static int apply_constraint_to_size(struct snd_pcm_hw_params *params, step = max(step, amdtp_syt_intervals[i]); } + if (step == 0) + return -EINVAL; + t.min = roundup(s->min, step); t.max = rounddown(s->max, step); t.integer = 1; From 5f0468f30c8f885218a0786876db2f8c3b277e2d Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Fri, 18 Oct 2024 10:12:05 +0800 Subject: [PATCH 169/207] powercap: dtpm_devfreq: Fix error check against dev_pm_qos_add_request() [ Upstream commit 5209d1b654f1db80509040cc694c7814a1b547e3 ] The caller of the function dev_pm_qos_add_request() checks again a non zero value but dev_pm_qos_add_request() can return '1' if the request already exists. Therefore, the setup function fails while the QoS request actually did not failed. Fix that by changing the check against a negative value like all the other callers of the function. Fixes: e44655617317 ("powercap/drivers/dtpm: Add dtpm devfreq with energy model support") Signed-off-by: Yuan Can Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/20241018021205.46460-1-yuancan@huawei.com [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/powercap/dtpm_devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/powercap/dtpm_devfreq.c b/drivers/powercap/dtpm_devfreq.c index 612c3b59dd5b..0ca53db7a90e 100644 --- a/drivers/powercap/dtpm_devfreq.c +++ b/drivers/powercap/dtpm_devfreq.c @@ -166,7 +166,7 @@ static int __dtpm_devfreq_setup(struct devfreq *devfreq, struct dtpm *parent) ret = dev_pm_qos_add_request(dev, &dtpm_devfreq->qos_req, DEV_PM_QOS_MAX_FREQUENCY, PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); - if (ret) { + if (ret < 0) { pr_err("Failed to add QoS request: %d\n", ret); goto out_dtpm_unregister; } From f965dc0f099a54fca100acf6909abe52d0c85328 Mon Sep 17 00:00:00 2001 From: Yang Erkun Date: Mon, 21 Oct 2024 16:25:40 +0800 Subject: [PATCH 170/207] nfsd: cancel nfsd_shrinker_work using sync mode in nfs4_state_shutdown_net [ Upstream commit d5ff2fb2e7167e9483846e34148e60c0c016a1f6 ] In the normal case, when we excute `echo 0 > /proc/fs/nfsd/threads`, the function `nfs4_state_destroy_net` in `nfs4_state_shutdown_net` will release all resources related to the hashed `nfs4_client`. If the `nfsd_client_shrinker` is running concurrently, the `expire_client` function will first unhash this client and then destroy it. This can lead to the following warning. Additionally, numerous use-after-free errors may occur as well. nfsd_client_shrinker echo 0 > /proc/fs/nfsd/threads expire_client nfsd_shutdown_net unhash_client ... nfs4_state_shutdown_net /* won't wait shrinker exit */ /* cancel_work(&nn->nfsd_shrinker_work) * nfsd_file for this /* won't destroy unhashed client1 */ * client1 still alive nfs4_state_destroy_net */ nfsd_file_cache_shutdown /* trigger warning */ kmem_cache_destroy(nfsd_file_slab) kmem_cache_destroy(nfsd_file_mark_slab) /* release nfsd_file and mark */ __destroy_client ==================================================================== BUG nfsd_file (Not tainted): Objects remaining in nfsd_file on __kmem_cache_shutdown() -------------------------------------------------------------------- CPU: 4 UID: 0 PID: 764 Comm: sh Not tainted 6.12.0-rc3+ #1 dump_stack_lvl+0x53/0x70 slab_err+0xb0/0xf0 __kmem_cache_shutdown+0x15c/0x310 kmem_cache_destroy+0x66/0x160 nfsd_file_cache_shutdown+0xac/0x210 [nfsd] nfsd_destroy_serv+0x251/0x2a0 [nfsd] nfsd_svc+0x125/0x1e0 [nfsd] write_threads+0x16a/0x2a0 [nfsd] nfsctl_transaction_write+0x74/0xa0 [nfsd] vfs_write+0x1a5/0x6d0 ksys_write+0xc1/0x160 do_syscall_64+0x5f/0x170 entry_SYSCALL_64_after_hwframe+0x76/0x7e ==================================================================== BUG nfsd_file_mark (Tainted: G B W ): Objects remaining nfsd_file_mark on __kmem_cache_shutdown() -------------------------------------------------------------------- dump_stack_lvl+0x53/0x70 slab_err+0xb0/0xf0 __kmem_cache_shutdown+0x15c/0x310 kmem_cache_destroy+0x66/0x160 nfsd_file_cache_shutdown+0xc8/0x210 [nfsd] nfsd_destroy_serv+0x251/0x2a0 [nfsd] nfsd_svc+0x125/0x1e0 [nfsd] write_threads+0x16a/0x2a0 [nfsd] nfsctl_transaction_write+0x74/0xa0 [nfsd] vfs_write+0x1a5/0x6d0 ksys_write+0xc1/0x160 do_syscall_64+0x5f/0x170 entry_SYSCALL_64_after_hwframe+0x76/0x7e To resolve this issue, cancel `nfsd_shrinker_work` using synchronous mode in nfs4_state_shutdown_net. Fixes: 7c24fa225081 ("NFSD: replace delayed_work with work_struct for nfsd_client_shrinker") Signed-off-by: Yang Erkun Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f16bbbfcf672..975dd74a7a4d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -8254,7 +8254,7 @@ nfs4_state_shutdown_net(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); unregister_shrinker(&nn->nfsd_client_shrinker); - cancel_work(&nn->nfsd_shrinker_work); + cancel_work_sync(&nn->nfsd_shrinker_work); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); From 66921a4d8e844da6668a28ec0885cd471d88dbe1 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 23 Oct 2024 16:13:10 +0800 Subject: [PATCH 171/207] ALSA: hda/realtek: Update default depop procedure [ Upstream commit e3ea2757c312e51bbf62ebc434a6f7df1e3a201f ] Old procedure has a chance to meet Headphone no output. Fixes: c2d6af53a43f ("ALSA: hda/realtek - Add default procedure for suspend and resume state") Signed-off-by: Kailang Yang Link: https://lore.kernel.org/17b717a0a0b04a77aea4a8ec820cba13@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 38 ++++++++++++++++------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 07e1547fff2e..4dc5c7a18d6e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3857,20 +3857,18 @@ static void alc_default_init(struct hda_codec *codec) hp_pin_sense = snd_hda_jack_detect(codec, hp_pin); - if (hp_pin_sense) + if (hp_pin_sense) { msleep(2); - snd_hda_codec_write(codec, hp_pin, 0, - AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); - if (hp_pin_sense) - msleep(85); + msleep(75); - snd_hda_codec_write(codec, hp_pin, 0, - AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); - - if (hp_pin_sense) - msleep(100); + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE); + msleep(75); + } } static void alc_default_shutup(struct hda_codec *codec) @@ -3886,22 +3884,20 @@ static void alc_default_shutup(struct hda_codec *codec) hp_pin_sense = snd_hda_jack_detect(codec, hp_pin); - if (hp_pin_sense) + if (hp_pin_sense) { msleep(2); - snd_hda_codec_write(codec, hp_pin, 0, - AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); - - if (hp_pin_sense) - msleep(85); - - if (!spec->no_shutup_pins) snd_hda_codec_write(codec, hp_pin, 0, - AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); - if (hp_pin_sense) - msleep(100); + msleep(75); + if (!spec->no_shutup_pins) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + + msleep(75); + } alc_auto_setup_eapd(codec, false); alc_shutup_pins(codec); } From 35dbac8c328d6afe937cd45ecd41d209d0b9f8b8 Mon Sep 17 00:00:00 2001 From: Henrique Carvalho Date: Tue, 22 Oct 2024 15:21:26 -0300 Subject: [PATCH 172/207] smb: client: Handle kstrdup failures for passwords [ Upstream commit 9a5dd61151399ad5a5d69aad28ab164734c1e3bc ] In smb3_reconfigure(), after duplicating ctx->password and ctx->password2 with kstrdup(), we need to check for allocation failures. If ses->password allocation fails, return -ENOMEM. If ses->password2 allocation fails, free ses->password, set it to NULL, and return -ENOMEM. Fixes: c1eb537bf456 ("cifs: allow changing password during remount") Reviewed-by: David Howells Signed-off-by: Henrique Carvalho Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/fs_context.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 3bbac925d076..8d7484400fe8 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -918,8 +918,15 @@ static int smb3_reconfigure(struct fs_context *fc) else { kfree_sensitive(ses->password); ses->password = kstrdup(ctx->password, GFP_KERNEL); + if (!ses->password) + return -ENOMEM; kfree_sensitive(ses->password2); ses->password2 = kstrdup(ctx->password2, GFP_KERNEL); + if (!ses->password2) { + kfree_sensitive(ses->password); + ses->password = NULL; + return -ENOMEM; + } } STEAL_STRING(cifs_sb, ctx, domainname); STEAL_STRING(cifs_sb, ctx, nodename); From 33e89c16cea0882bb05c585fa13236b730dd0efa Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Mon, 11 Dec 2023 11:48:53 +0100 Subject: [PATCH 173/207] cpufreq/cppc: Move and rename cppc_cpufreq_{perf_to_khz|khz_to_perf}() [ Upstream commit 50b813b147e9eb6546a1fc49d4e703e6d23691f2 ] Move and rename cppc_cpufreq_perf_to_khz() and cppc_cpufreq_khz_to_perf() to use them outside cppc_cpufreq in topology_init_cpu_capacity_cppc(). Modify the interface to use struct cppc_perf_caps *caps instead of struct cppc_cpudata *cpu_data as we only use the fields of cppc_perf_caps. cppc_cpufreq was converting the lowest and nominal freq from MHz to kHz before using them. We move this conversion inside cppc_perf_to_khz and cppc_khz_to_perf to make them generic and usable outside cppc_cpufreq. No functional change Signed-off-by: Vincent Guittot Signed-off-by: Ingo Molnar Tested-by: Pierre Gondois Acked-by: Rafael J. Wysocki Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20231211104855.558096-6-vincent.guittot@linaro.org Stable-dep-of: d93df29bdab1 ("cpufreq: CPPC: fix perf_to_khz/khz_to_perf conversion exception") Signed-off-by: Sasha Levin --- drivers/acpi/cppc_acpi.c | 104 ++++++++++++++++++++++++ drivers/cpufreq/cppc_cpufreq.c | 139 ++++----------------------------- include/acpi/cppc_acpi.h | 2 + 3 files changed, 123 insertions(+), 122 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 7aced0b9bad7..2297404fe471 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -39,6 +39,9 @@ #include #include #include +#include +#include +#include #include @@ -1858,3 +1861,104 @@ unsigned int cppc_get_transition_latency(int cpu_num) return latency_ns; } EXPORT_SYMBOL_GPL(cppc_get_transition_latency); + +/* Minimum struct length needed for the DMI processor entry we want */ +#define DMI_ENTRY_PROCESSOR_MIN_LENGTH 48 + +/* Offset in the DMI processor structure for the max frequency */ +#define DMI_PROCESSOR_MAX_SPEED 0x14 + +/* Callback function used to retrieve the max frequency from DMI */ +static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private) +{ + const u8 *dmi_data = (const u8 *)dm; + u16 *mhz = (u16 *)private; + + if (dm->type == DMI_ENTRY_PROCESSOR && + dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) { + u16 val = (u16)get_unaligned((const u16 *) + (dmi_data + DMI_PROCESSOR_MAX_SPEED)); + *mhz = val > *mhz ? val : *mhz; + } +} + +/* Look up the max frequency in DMI */ +static u64 cppc_get_dmi_max_khz(void) +{ + u16 mhz = 0; + + dmi_walk(cppc_find_dmi_mhz, &mhz); + + /* + * Real stupid fallback value, just in case there is no + * actual value set. + */ + mhz = mhz ? mhz : 1; + + return KHZ_PER_MHZ * mhz; +} + +/* + * If CPPC lowest_freq and nominal_freq registers are exposed then we can + * use them to convert perf to freq and vice versa. The conversion is + * extrapolated as an affine function passing by the 2 points: + * - (Low perf, Low freq) + * - (Nominal perf, Nominal freq) + */ +unsigned int cppc_perf_to_khz(struct cppc_perf_caps *caps, unsigned int perf) +{ + s64 retval, offset = 0; + static u64 max_khz; + u64 mul, div; + + if (caps->lowest_freq && caps->nominal_freq) { + mul = caps->nominal_freq - caps->lowest_freq; + mul *= KHZ_PER_MHZ; + div = caps->nominal_perf - caps->lowest_perf; + offset = caps->nominal_freq * KHZ_PER_MHZ - + div64_u64(caps->nominal_perf * mul, div); + } else { + if (!max_khz) + max_khz = cppc_get_dmi_max_khz(); + mul = max_khz; + div = caps->highest_perf; + } + + retval = offset + div64_u64(perf * mul, div); + if (retval >= 0) + return retval; + return 0; +} +EXPORT_SYMBOL_GPL(cppc_perf_to_khz); + +unsigned int cppc_khz_to_perf(struct cppc_perf_caps *caps, unsigned int freq) +{ + s64 retval, offset = 0; + static u64 max_khz; + u64 mul, div; + + if (caps->lowest_freq && caps->nominal_freq) { + mul = caps->nominal_perf - caps->lowest_perf; + div = caps->nominal_freq - caps->lowest_freq; + /* + * We don't need to convert to kHz for computing offset and can + * directly use nominal_freq and lowest_freq as the div64_u64 + * will remove the frequency unit. + */ + offset = caps->nominal_perf - + div64_u64(caps->nominal_freq * mul, div); + /* But we need it for computing the perf level. */ + div *= KHZ_PER_MHZ; + } else { + if (!max_khz) + max_khz = cppc_get_dmi_max_khz(); + mul = caps->highest_perf; + div = max_khz; + } + + retval = offset + div64_u64(freq * mul, div); + if (retval >= 0) + return retval; + return 0; +} +EXPORT_SYMBOL_GPL(cppc_khz_to_perf); diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 1ba3943be8a3..15f1d41920a3 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -27,12 +26,6 @@ #include -/* Minimum struct length needed for the DMI processor entry we want */ -#define DMI_ENTRY_PROCESSOR_MIN_LENGTH 48 - -/* Offset in the DMI processor structure for the max frequency */ -#define DMI_PROCESSOR_MAX_SPEED 0x14 - /* * This list contains information parsed from per CPU ACPI _CPC and _PSD * structures: e.g. the highest and lowest supported performance, capabilities, @@ -291,97 +284,9 @@ static inline void cppc_freq_invariance_exit(void) } #endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */ -/* Callback function used to retrieve the max frequency from DMI */ -static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private) -{ - const u8 *dmi_data = (const u8 *)dm; - u16 *mhz = (u16 *)private; - - if (dm->type == DMI_ENTRY_PROCESSOR && - dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) { - u16 val = (u16)get_unaligned((const u16 *) - (dmi_data + DMI_PROCESSOR_MAX_SPEED)); - *mhz = val > *mhz ? val : *mhz; - } -} - -/* Look up the max frequency in DMI */ -static u64 cppc_get_dmi_max_khz(void) -{ - u16 mhz = 0; - - dmi_walk(cppc_find_dmi_mhz, &mhz); - - /* - * Real stupid fallback value, just in case there is no - * actual value set. - */ - mhz = mhz ? mhz : 1; - - return (1000 * mhz); -} - -/* - * If CPPC lowest_freq and nominal_freq registers are exposed then we can - * use them to convert perf to freq and vice versa. The conversion is - * extrapolated as an affine function passing by the 2 points: - * - (Low perf, Low freq) - * - (Nominal perf, Nominal perf) - */ -static unsigned int cppc_cpufreq_perf_to_khz(struct cppc_cpudata *cpu_data, - unsigned int perf) -{ - struct cppc_perf_caps *caps = &cpu_data->perf_caps; - s64 retval, offset = 0; - static u64 max_khz; - u64 mul, div; - - if (caps->lowest_freq && caps->nominal_freq) { - mul = caps->nominal_freq - caps->lowest_freq; - div = caps->nominal_perf - caps->lowest_perf; - offset = caps->nominal_freq - div64_u64(caps->nominal_perf * mul, div); - } else { - if (!max_khz) - max_khz = cppc_get_dmi_max_khz(); - mul = max_khz; - div = caps->highest_perf; - } - - retval = offset + div64_u64(perf * mul, div); - if (retval >= 0) - return retval; - return 0; -} - -static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu_data, - unsigned int freq) -{ - struct cppc_perf_caps *caps = &cpu_data->perf_caps; - s64 retval, offset = 0; - static u64 max_khz; - u64 mul, div; - - if (caps->lowest_freq && caps->nominal_freq) { - mul = caps->nominal_perf - caps->lowest_perf; - div = caps->nominal_freq - caps->lowest_freq; - offset = caps->nominal_perf - div64_u64(caps->nominal_freq * mul, div); - } else { - if (!max_khz) - max_khz = cppc_get_dmi_max_khz(); - mul = caps->highest_perf; - div = max_khz; - } - - retval = offset + div64_u64(freq * mul, div); - if (retval >= 0) - return retval; - return 0; -} - static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) - { struct cppc_cpudata *cpu_data = policy->driver_data; unsigned int cpu = policy->cpu; @@ -389,7 +294,7 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, u32 desired_perf; int ret = 0; - desired_perf = cppc_cpufreq_khz_to_perf(cpu_data, target_freq); + desired_perf = cppc_khz_to_perf(&cpu_data->perf_caps, target_freq); /* Return if it is exactly the same perf */ if (desired_perf == cpu_data->perf_ctrls.desired_perf) return ret; @@ -417,7 +322,7 @@ static unsigned int cppc_cpufreq_fast_switch(struct cpufreq_policy *policy, u32 desired_perf; int ret; - desired_perf = cppc_cpufreq_khz_to_perf(cpu_data, target_freq); + desired_perf = cppc_khz_to_perf(&cpu_data->perf_caps, target_freq); cpu_data->perf_ctrls.desired_perf = desired_perf; ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); @@ -530,7 +435,7 @@ static int cppc_get_cpu_power(struct device *cpu_dev, min_step = min_cap / CPPC_EM_CAP_STEP; max_step = max_cap / CPPC_EM_CAP_STEP; - perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); + perf_prev = cppc_khz_to_perf(perf_caps, *KHz); step = perf_prev / perf_step; if (step > max_step) @@ -550,8 +455,8 @@ static int cppc_get_cpu_power(struct device *cpu_dev, perf = step * perf_step; } - *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf); - perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); + *KHz = cppc_perf_to_khz(perf_caps, perf); + perf_check = cppc_khz_to_perf(perf_caps, *KHz); step_check = perf_check / perf_step; /* @@ -561,8 +466,8 @@ static int cppc_get_cpu_power(struct device *cpu_dev, */ while ((*KHz == prev_freq) || (step_check != step)) { perf++; - *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf); - perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); + *KHz = cppc_perf_to_khz(perf_caps, perf); + perf_check = cppc_khz_to_perf(perf_caps, *KHz); step_check = perf_check / perf_step; } @@ -591,7 +496,7 @@ static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz, perf_caps = &cpu_data->perf_caps; max_cap = arch_scale_cpu_capacity(cpu_dev->id); - perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, KHz); + perf_prev = cppc_khz_to_perf(perf_caps, KHz); perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap; step = perf_prev / perf_step; @@ -679,10 +584,6 @@ static struct cppc_cpudata *cppc_cpufreq_get_cpu_data(unsigned int cpu) goto free_mask; } - /* Convert the lowest and nominal freq from MHz to KHz */ - cpu_data->perf_caps.lowest_freq *= 1000; - cpu_data->perf_caps.nominal_freq *= 1000; - list_add(&cpu_data->node, &cpu_data_list); return cpu_data; @@ -724,20 +625,16 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * Set min to lowest nonlinear perf to avoid any efficiency penalty (see * Section 8.4.7.1.1.5 of ACPI 6.1 spec) */ - policy->min = cppc_cpufreq_perf_to_khz(cpu_data, - caps->lowest_nonlinear_perf); - policy->max = cppc_cpufreq_perf_to_khz(cpu_data, - caps->nominal_perf); + policy->min = cppc_perf_to_khz(caps, caps->lowest_nonlinear_perf); + policy->max = cppc_perf_to_khz(caps, caps->nominal_perf); /* * Set cpuinfo.min_freq to Lowest to make the full range of performance * available if userspace wants to use any perf between lowest & lowest * nonlinear perf */ - policy->cpuinfo.min_freq = cppc_cpufreq_perf_to_khz(cpu_data, - caps->lowest_perf); - policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu_data, - caps->nominal_perf); + policy->cpuinfo.min_freq = cppc_perf_to_khz(caps, caps->lowest_perf); + policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, caps->nominal_perf); policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu); policy->shared_type = cpu_data->shared_type; @@ -773,7 +670,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) boost_supported = true; /* Set policy->cur to max now. The governors will adjust later. */ - policy->cur = cppc_cpufreq_perf_to_khz(cpu_data, caps->highest_perf); + policy->cur = cppc_perf_to_khz(caps, caps->highest_perf); cpu_data->perf_ctrls.desired_perf = caps->highest_perf; ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); @@ -868,7 +765,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0, &fb_ctrs_t1); - return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf); + return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf); } static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) @@ -883,11 +780,9 @@ static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) } if (state) - policy->max = cppc_cpufreq_perf_to_khz(cpu_data, - caps->highest_perf); + policy->max = cppc_perf_to_khz(caps, caps->highest_perf); else - policy->max = cppc_cpufreq_perf_to_khz(cpu_data, - caps->nominal_perf); + policy->max = cppc_perf_to_khz(caps, caps->nominal_perf); policy->cpuinfo.max_freq = policy->max; ret = freq_qos_update_request(policy->max_freq_req, policy->max); @@ -947,7 +842,7 @@ static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu) if (ret < 0) return -EIO; - return cppc_cpufreq_perf_to_khz(cpu_data, desired_perf); + return cppc_perf_to_khz(&cpu_data->perf_caps, desired_perf); } static void cppc_check_hisi_workaround(void) diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index ec425d2834f8..e1720d930666 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -147,6 +147,8 @@ extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_set_enable(int cpu, bool enable); extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); extern bool cppc_perf_ctrs_in_pcc(void); +extern unsigned int cppc_perf_to_khz(struct cppc_perf_caps *caps, unsigned int perf); +extern unsigned int cppc_khz_to_perf(struct cppc_perf_caps *caps, unsigned int freq); extern bool acpi_cpc_valid(void); extern bool cppc_allow_fast_switch(void); extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data); From 2f2684c7903e8b5ca256c267e6dff6ea3164a3bc Mon Sep 17 00:00:00 2001 From: liwei Date: Thu, 24 Oct 2024 10:29:52 +0800 Subject: [PATCH 174/207] cpufreq: CPPC: fix perf_to_khz/khz_to_perf conversion exception [ Upstream commit d93df29bdab133b85e94b3c328e7fe26a0ebd56c ] When the nominal_freq recorded by the kernel is equal to the lowest_freq, and the frequency adjustment operation is triggered externally, there is a logic error in cppc_perf_to_khz()/cppc_khz_to_perf(), resulting in perf and khz conversion errors. Fix this by adding a branch processing logic when nominal_freq is equal to lowest_freq. Fixes: ec1c7ad47664 ("cpufreq: CPPC: Fix performance/frequency conversion") Signed-off-by: liwei Acked-by: Viresh Kumar Link: https://patch.msgid.link/20241024022952.2627694-1-liwei728@huawei.com [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/cppc_acpi.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 2297404fe471..5df417626fd1 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1912,9 +1912,15 @@ unsigned int cppc_perf_to_khz(struct cppc_perf_caps *caps, unsigned int perf) u64 mul, div; if (caps->lowest_freq && caps->nominal_freq) { - mul = caps->nominal_freq - caps->lowest_freq; + /* Avoid special case when nominal_freq is equal to lowest_freq */ + if (caps->lowest_freq == caps->nominal_freq) { + mul = caps->nominal_freq; + div = caps->nominal_perf; + } else { + mul = caps->nominal_freq - caps->lowest_freq; + div = caps->nominal_perf - caps->lowest_perf; + } mul *= KHZ_PER_MHZ; - div = caps->nominal_perf - caps->lowest_perf; offset = caps->nominal_freq * KHZ_PER_MHZ - div64_u64(caps->nominal_perf * mul, div); } else { @@ -1935,11 +1941,17 @@ unsigned int cppc_khz_to_perf(struct cppc_perf_caps *caps, unsigned int freq) { s64 retval, offset = 0; static u64 max_khz; - u64 mul, div; + u64 mul, div; if (caps->lowest_freq && caps->nominal_freq) { - mul = caps->nominal_perf - caps->lowest_perf; - div = caps->nominal_freq - caps->lowest_freq; + /* Avoid special case when nominal_freq is equal to lowest_freq */ + if (caps->lowest_freq == caps->nominal_freq) { + mul = caps->nominal_perf; + div = caps->nominal_freq; + } else { + mul = caps->nominal_perf - caps->lowest_perf; + div = caps->nominal_freq - caps->lowest_freq; + } /* * We don't need to convert to kHz for computing offset and can * directly use nominal_freq and lowest_freq as the div64_u64 From 3521754614cf11ef5fe65c7ade7a9f268c5b4c42 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 22 Oct 2024 17:52:08 +0800 Subject: [PATCH 175/207] btrfs: fix passing 0 to ERR_PTR in btrfs_search_dir_index_item() commit 75f49c3dc7b7423d3734f2e4dabe3dac8d064338 upstream. The ret may be zero in btrfs_search_dir_index_item() and should not passed to ERR_PTR(). Now btrfs_unlink_subvol() is the only caller to this, reconstructed it to check ERR_PTR(-ENOENT) while ret >= 0. This fixes smatch warnings: fs/btrfs/dir-item.c:353 btrfs_search_dir_index_item() warn: passing zero to 'ERR_PTR' Fixes: 9dcbe16fccbb ("btrfs: use btrfs_for_each_slot in btrfs_search_dir_index_item") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Johannes Thumshirn Signed-off-by: Yue Haibing Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/dir-item.c | 4 ++-- fs/btrfs/inode.c | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 9c07d5c3e5ad..7066414be6ee 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -347,8 +347,8 @@ btrfs_search_dir_index_item(struct btrfs_root *root, struct btrfs_path *path, return di; } /* Adjust return code if the key was not found in the next leaf. */ - if (ret > 0) - ret = 0; + if (ret >= 0) + ret = -ENOENT; return ERR_PTR(ret); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ee04185d8e0f..ea19ea75674d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4293,11 +4293,8 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, */ if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) { di = btrfs_search_dir_index_item(root, path, dir_ino, &fname.disk_name); - if (IS_ERR_OR_NULL(di)) { - if (!di) - ret = -ENOENT; - else - ret = PTR_ERR(di); + if (IS_ERR(di)) { + ret = PTR_ERR(di); btrfs_abort_transaction(trans, ret); goto out; } From 004ad1a2d6841bb5765c890df72008c117ced7c7 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 1 Oct 2024 17:03:32 +0900 Subject: [PATCH 176/207] btrfs: zoned: fix zone unusable accounting for freed reserved extent commit bf9821ba4792a0d9a2e72803ae7b4341faf3d532 upstream. When btrfs reserves an extent and does not use it (e.g, by an error), it calls btrfs_free_reserved_extent() to free the reserved extent. In the process, it calls btrfs_add_free_space() and then it accounts the region bytes as block_group->zone_unusable. However, it leaves the space_info->bytes_zone_unusable side not updated. As a result, ENOSPC can happen while a space_info reservation succeeded. The reservation is fine because the freed region is not added in space_info->bytes_zone_unusable, leaving that space as "free". OTOH, corresponding block group counts it as zone_unusable and its allocation pointer is not rewound, we cannot allocate an extent from that block group. That will also negate space_info's async/sync reclaim process, and cause an ENOSPC error from the extent allocation process. Fix that by returning the space to space_info->bytes_zone_unusable. Ideally, since a bio is not submitted for this reserved region, we should return the space to free space and rewind the allocation pointer. But, it needs rework on extent allocation handling, so let it work in this way for now. Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/block-group.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 4e999e1c1407..434cf3d5f4cf 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3794,6 +3794,8 @@ void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, spin_lock(&cache->lock); if (cache->ro) space_info->bytes_readonly += num_bytes; + else if (btrfs_is_zoned(cache->fs_info)) + space_info->bytes_zone_unusable += num_bytes; cache->reserved -= num_bytes; space_info->bytes_reserved -= num_bytes; space_info->max_extent_size = 0; From 975ede2a7bec52b5da1428829b3439667c8a234b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 11 Oct 2024 12:23:15 -0500 Subject: [PATCH 177/207] drm/amd: Guard against bad data for ATIF ACPI method commit bf58f03931fdcf7b3c45cb76ac13244477a60f44 upstream. If a BIOS provides bad data in response to an ATIF method call this causes a NULL pointer dereference in the caller. ``` ? show_regs (arch/x86/kernel/dumpstack.c:478 (discriminator 1)) ? __die (arch/x86/kernel/dumpstack.c:423 arch/x86/kernel/dumpstack.c:434) ? page_fault_oops (arch/x86/mm/fault.c:544 (discriminator 2) arch/x86/mm/fault.c:705 (discriminator 2)) ? do_user_addr_fault (arch/x86/mm/fault.c:440 (discriminator 1) arch/x86/mm/fault.c:1232 (discriminator 1)) ? acpi_ut_update_object_reference (drivers/acpi/acpica/utdelete.c:642) ? exc_page_fault (arch/x86/mm/fault.c:1542) ? asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:623) ? amdgpu_atif_query_backlight_caps.constprop.0 (drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c:387 (discriminator 2)) amdgpu ? amdgpu_atif_query_backlight_caps.constprop.0 (drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c:386 (discriminator 1)) amdgpu ``` It has been encountered on at least one system, so guard for it. Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)") Acked-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit c9b7c809b89f24e9372a4e7f02d64c950b07fdee) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 2bca37044ad0..fac204d6e0ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -147,6 +147,7 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif, struct acpi_buffer *params) { acpi_status status; + union acpi_object *obj; union acpi_object atif_arg_elements[2]; struct acpi_object_list atif_arg; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; @@ -169,16 +170,24 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif, status = acpi_evaluate_object(atif->handle, NULL, &atif_arg, &buffer); + obj = (union acpi_object *)buffer.pointer; - /* Fail only if calling the method fails and ATIF is supported */ + /* Fail if calling the method fails and ATIF is supported */ if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { DRM_DEBUG_DRIVER("failed to evaluate ATIF got %s\n", acpi_format_exception(status)); - kfree(buffer.pointer); + kfree(obj); return NULL; } - return buffer.pointer; + if (obj->type != ACPI_TYPE_BUFFER) { + DRM_DEBUG_DRIVER("bad object returned from ATIF: %d\n", + obj->type); + kfree(obj); + return NULL; + } + + return obj; } /** From bdaab141edb6fcd052ee0cd0942f129f35d258c6 Mon Sep 17 00:00:00 2001 From: Christian Heusel Date: Thu, 17 Oct 2024 13:16:26 +0200 Subject: [PATCH 178/207] ACPI: resource: Add LG 16T90SP to irq1_level_low_skip_override[] commit 53f1a907d36fb3aa02a4d34073bcec25823a6c74 upstream. The LG Gram Pro 16 2-in-1 (2024) the 16T90SP has its keybopard IRQ (1) described as ActiveLow in the DSDT, which the kernel overrides to EdgeHigh which breaks the keyboard. Add the 16T90SP to the irq1_level_low_skip_override[] quirk table to fix this. Reported-by: Dirk Holten Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219382 Cc: All applicable Suggested-by: Dirk Holten Signed-off-by: Christian Heusel Link: https://patch.msgid.link/20241017-lg-gram-pro-keyboard-v2-1-7c8fbf6ff718@heusel.eu Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 95233b413c1a..d3d776d4fb5a 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -498,6 +498,13 @@ static const struct dmi_system_id tongfang_gm_rg[] = { DMI_MATCH(DMI_BOARD_NAME, "GMxRGxx"), }, }, + { + /* LG Electronics 16T90SP */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LG Electronics"), + DMI_MATCH(DMI_BOARD_NAME, "16T90SP"), + }, + }, { } }; From 795b080d9aa127215a5baf088a22fa09341a0126 Mon Sep 17 00:00:00 2001 From: Koba Ko Date: Sun, 13 Oct 2024 04:50:10 +0800 Subject: [PATCH 179/207] ACPI: PRM: Find EFI_MEMORY_RUNTIME block for PRM handler and context commit 088984c8d54c0053fc4ae606981291d741c5924b upstream. PRMT needs to find the correct type of block to translate the PA-VA mapping for EFI runtime services. The issue arises because the PRMT is finding a block of type EFI_CONVENTIONAL_MEMORY, which is not appropriate for runtime services as described in Section 2.2.2 (Runtime Services) of the UEFI Specification [1]. Since the PRM handler is a type of runtime service, this causes an exception when the PRM handler is called. [Firmware Bug]: Unable to handle paging request in EFI runtime service WARNING: CPU: 22 PID: 4330 at drivers/firmware/efi/runtime-wrappers.c:341 __efi_queue_work+0x11c/0x170 Call trace: Let PRMT find a block with EFI_MEMORY_RUNTIME for PRM handler and PRM context. If no suitable block is found, a warning message will be printed, but the procedure continues to manage the next PRM handler. However, if the PRM handler is actually called without proper allocation, it would result in a failure during error handling. By using the correct memory types for runtime services, ensure that the PRM handler and the context are properly mapped in the virtual address space during runtime, preventing the paging request error. The issue is really that only memory that has been remapped for runtime by the firmware can be used by the PRM handler, and so the region needs to have the EFI_MEMORY_RUNTIME attribute. Link: https://uefi.org/sites/default/files/resources/UEFI_Spec_2_10_Aug29.pdf # [1] Fixes: cefc7ca46235 ("ACPI: PRM: implement OperationRegion handler for the PlatformRtMechanism subtype") Cc: All applicable Signed-off-by: Koba Ko Reviewed-by: Matthew R. Ochs Reviewed-by: Zhang Rui Reviewed-by: Ard Biesheuvel Link: https://patch.msgid.link/20241012205010.4165798-1-kobak@nvidia.com [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/prmt.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c index 7020584096bf..44899234462b 100644 --- a/drivers/acpi/prmt.c +++ b/drivers/acpi/prmt.c @@ -72,17 +72,21 @@ struct prm_module_info { struct prm_handler_info handlers[]; }; -static u64 efi_pa_va_lookup(u64 pa) +static u64 efi_pa_va_lookup(efi_guid_t *guid, u64 pa) { efi_memory_desc_t *md; u64 pa_offset = pa & ~PAGE_MASK; u64 page = pa & PAGE_MASK; for_each_efi_memory_desc(md) { - if (md->phys_addr < pa && pa < md->phys_addr + PAGE_SIZE * md->num_pages) + if ((md->attribute & EFI_MEMORY_RUNTIME) && + (md->phys_addr < pa && pa < md->phys_addr + PAGE_SIZE * md->num_pages)) { return pa_offset + md->virt_addr + page - md->phys_addr; + } } + pr_warn("Failed to find VA for GUID: %pUL, PA: 0x%llx", guid, pa); + return 0; } @@ -148,9 +152,15 @@ acpi_parse_prmt(union acpi_subtable_headers *header, const unsigned long end) th = &tm->handlers[cur_handler]; guid_copy(&th->guid, (guid_t *)handler_info->handler_guid); - th->handler_addr = (void *)efi_pa_va_lookup(handler_info->handler_address); - th->static_data_buffer_addr = efi_pa_va_lookup(handler_info->static_data_buffer_address); - th->acpi_param_buffer_addr = efi_pa_va_lookup(handler_info->acpi_param_buffer_address); + th->handler_addr = + (void *)efi_pa_va_lookup(&th->guid, handler_info->handler_address); + + th->static_data_buffer_addr = + efi_pa_va_lookup(&th->guid, handler_info->static_data_buffer_address); + + th->acpi_param_buffer_addr = + efi_pa_va_lookup(&th->guid, handler_info->acpi_param_buffer_address); + } while (++cur_handler < tm->handler_count && (handler_info = get_next_handler(handler_info))); return 0; @@ -253,6 +263,13 @@ static acpi_status acpi_platformrt_space_handler(u32 function, if (!handler || !module) goto invalid_guid; + if (!handler->handler_addr || + !handler->static_data_buffer_addr || + !handler->acpi_param_buffer_addr) { + buffer->prm_status = PRM_HANDLER_ERROR; + return AE_OK; + } + ACPI_COPY_NAMESEG(context.signature, "PRMC"); context.revision = 0x0; context.reserved = 0x0; From a299d415dd3788f7d82e4910dafd65426077e504 Mon Sep 17 00:00:00 2001 From: Shubham Panwar Date: Sun, 20 Oct 2024 15:20:46 +0530 Subject: [PATCH 180/207] ACPI: button: Add DMI quirk for Samsung Galaxy Book2 to fix initial lid detection issue commit 8fa73ee44daefc884c53a25158c25a4107eb5a94 upstream. Add a DMI quirk for Samsung Galaxy Book2 to fix an initial lid state detection issue. The _LID device incorrectly returns the lid status as "closed" during boot, causing the system to enter a suspend loop right after booting. The quirk ensures that the correct lid state is reported initially, preventing the system from immediately suspending after startup. It only addresses the initial lid state detection and ensures proper system behavior upon boot. Signed-off-by: Shubham Panwar Link: https://patch.msgid.link/20241020095045.6036-2-shubiisp8@gmail.com [ rjw: Changelog edits ] Cc: All applicable Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/button.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 1e76a64cce0a..906a7bfa448b 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -130,6 +130,17 @@ static const struct dmi_system_id dmi_lid_quirks[] = { }, .driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_OPEN, }, + { + /* + * Samsung galaxybook2 ,initial _LID device notification returns + * lid closed. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), + DMI_MATCH(DMI_PRODUCT_NAME, "750XED"), + }, + .driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_OPEN, + }, {} }; From 743c78d455e784097011ea958b27396001181567 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 16 Oct 2024 06:32:07 +0900 Subject: [PATCH 181/207] nilfs2: fix kernel bug due to missing clearing of buffer delay flag commit 6ed469df0bfbef3e4b44fca954a781919db9f7ab upstream. Syzbot reported that after nilfs2 reads a corrupted file system image and degrades to read-only, the BUG_ON check for the buffer delay flag in submit_bh_wbc() may fail, causing a kernel bug. This is because the buffer delay flag is not cleared when clearing the buffer state flags to discard a page/folio or a buffer head. So, fix this. This became necessary when the use of nilfs2's own page clear routine was expanded. This state inconsistency does not occur if the buffer is written normally by log writing. Signed-off-by: Ryusuke Konishi Link: https://lore.kernel.org/r/20241015213300.7114-1-konishi.ryusuke@gmail.com Fixes: 8c26c4e2694a ("nilfs2: fix issue with flush kernel thread after remount in RO mode because of driver's internal error or metadata corruption") Reported-by: syzbot+985ada84bf055a575c07@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=985ada84bf055a575c07 Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/page.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index b4e54d079b7d..36d29c183bb7 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -77,7 +77,8 @@ void nilfs_forget_buffer(struct buffer_head *bh) const unsigned long clear_bits = (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | - BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected)); + BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) | + BIT(BH_Delay)); lock_buffer(bh); set_mask_bits(&bh->b_state, clear_bits, 0); @@ -410,7 +411,8 @@ void nilfs_clear_dirty_page(struct page *page, bool silent) const unsigned long clear_bits = (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | - BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected)); + BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) | + BIT(BH_Delay)); bh = head = page_buffers(page); do { From f421a3b18aa78acb3f8433cc357853978f6940a1 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Thu, 10 Oct 2024 07:40:36 +1100 Subject: [PATCH 182/207] openat2: explicitly return -E2BIG for (usize > PAGE_SIZE) commit f92f0a1b05698340836229d791b3ffecc71b265a upstream. While we do currently return -EFAULT in this case, it seems prudent to follow the behaviour of other syscalls like clone3. It seems quite unlikely that anyone depends on this error code being EFAULT, but we can always revert this if it turns out to be an issue. Cc: stable@vger.kernel.org # v5.6+ Fixes: fddb5d430ad9 ("open: introduce openat2(2) syscall") Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/r/20241010-extensible-structs-check_fields-v3-3-d2833dfe6edd@cyphar.com Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/open.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/open.c b/fs/open.c index 59db720693f9..f9ac703ec1b2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1461,6 +1461,8 @@ SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename, if (unlikely(usize < OPEN_HOW_SIZE_VER0)) return -EINVAL; + if (unlikely(usize > PAGE_SIZE)) + return -E2BIG; err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize); if (err) From 2c4adc9b192a0815fe58a62bc0709449416cc884 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 9 Oct 2024 07:08:38 -0700 Subject: [PATCH 183/207] KVM: nSVM: Ignore nCR3[4:0] when loading PDPTEs from memory commit f559b2e9c5c5308850544ab59396b7d53cfc67bd upstream. Ignore nCR3[4:0] when loading PDPTEs from memory for nested SVM, as bits 4:0 of CR3 are ignored when PAE paging is used, and thus VMRUN doesn't enforce 32-byte alignment of nCR3. In the absolute worst case scenario, failure to ignore bits 4:0 can result in an out-of-bounds read, e.g. if the target page is at the end of a memslot, and the VMM isn't using guard pages. Per the APM: The CR3 register points to the base address of the page-directory-pointer table. The page-directory-pointer table is aligned on a 32-byte boundary, with the low 5 address bits 4:0 assumed to be 0. And the SDM's much more explicit: 4:0 Ignored Note, KVM gets this right when loading PDPTRs, it's only the nSVM flow that is broken. Fixes: e4e517b4be01 ("KVM: MMU: Do not unconditionally read PDPTE from guest memory") Reported-by: Kirk Swidowski Cc: Andy Nguyen Cc: 3pvd <3pvd@google.com> Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-ID: <20241009140838.1036226-1-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/nested.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 60891b9ce25f..acf22bd99efc 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -63,8 +63,12 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) u64 pdpte; int ret; + /* + * Note, nCR3 is "assumed" to be 32-byte aligned, i.e. the CPU ignores + * nCR3[4:0] when loading PDPTEs from memory. + */ ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte, - offset_in_page(cr3) + index * 8, 8); + (cr3 & GENMASK(11, 5)) + index * 8, 8); if (ret) return 0; return pdpte; From 04ed2ba07ce73f323052475fbd33d647aca3ff2e Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Wed, 16 Oct 2024 19:57:01 -0700 Subject: [PATCH 184/207] KVM: arm64: Fix shift-out-of-bounds bug commit c6c167afa090ea0451f91814e1318755a8fb8bb9 upstream. Fix a shift-out-of-bounds bug reported by UBSAN when running VM with MTE enabled host kernel. UBSAN: shift-out-of-bounds in arch/arm64/kvm/sys_regs.c:1988:14 shift exponent 33 is too large for 32-bit type 'int' CPU: 26 UID: 0 PID: 7629 Comm: qemu-kvm Not tainted 6.12.0-rc2 #34 Hardware name: IEI NF5280R7/Mitchell MB, BIOS 00.00. 2024-10-12 09:28:54 10/14/2024 Call trace: dump_backtrace+0xa0/0x128 show_stack+0x20/0x38 dump_stack_lvl+0x74/0x90 dump_stack+0x18/0x28 __ubsan_handle_shift_out_of_bounds+0xf8/0x1e0 reset_clidr+0x10c/0x1c8 kvm_reset_sys_regs+0x50/0x1c8 kvm_reset_vcpu+0xec/0x2b0 __kvm_vcpu_set_target+0x84/0x158 kvm_vcpu_set_target+0x138/0x168 kvm_arch_vcpu_ioctl_vcpu_init+0x40/0x2b0 kvm_arch_vcpu_ioctl+0x28c/0x4b8 kvm_vcpu_ioctl+0x4bc/0x7a8 __arm64_sys_ioctl+0xb4/0x100 invoke_syscall+0x70/0x100 el0_svc_common.constprop.0+0x48/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x3c/0x158 el0t_64_sync_handler+0x120/0x130 el0t_64_sync+0x194/0x198 Fixes: 7af0c2534f4c ("KVM: arm64: Normalize cache configuration") Cc: stable@vger.kernel.org Reviewed-by: Gavin Shan Signed-off-by: Ilkka Koskinen Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20241017025701.67936-1-ilkka@os.amperecomputing.com Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/sys_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b233a64df295..370a1a7bd369 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1708,7 +1708,7 @@ static u64 reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) * one cache line. */ if (kvm_has_mte(vcpu->kvm)) - clidr |= 2 << CLIDR_TTYPE_SHIFT(loc); + clidr |= 2ULL << CLIDR_TTYPE_SHIFT(loc); __vcpu_sys_reg(vcpu, r->reg) = clidr; From 599eee0e98179fc23db72d65e7117bcd52228a45 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Oct 2024 19:36:03 +0100 Subject: [PATCH 185/207] KVM: arm64: Don't eagerly teardown the vgic on init error commit df5fd75ee305cb5927e0b1a0b46cc988ad8db2b1 upstream. As there is very little ordering in the KVM API, userspace can instanciate a half-baked GIC (missing its memory map, for example) at almost any time. This means that, with the right timing, a thread running vcpu-0 can enter the kernel without a GIC configured and get a GIC created behind its back by another thread. Amusingly, it will pick up that GIC and start messing with the data structures without the GIC having been fully initialised. Similarly, a thread running vcpu-1 can enter the kernel, and try to init the GIC that was previously created. Since this GIC isn't properly configured (no memory map), it fails to correctly initialise. And that's the point where we decide to teardown the GIC, freeing all its resources. Behind vcpu-0's back. Things stop pretty abruptly, with a variety of symptoms. Clearly, this isn't good, we should be a bit more careful about this. It is obvious that this guest is not viable, as it is missing some important part of its configuration. So instead of trying to tear bits of it down, let's just mark it as *dead*. It means that any further interaction from userspace will result in -EIO. The memory will be released on the "normal" path, when userspace gives up. Cc: stable@vger.kernel.org Reported-by: Alexander Potapenko Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20241009183603.3221824-1-maz@kernel.org Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/arm.c | 3 +++ arch/arm64/kvm/vgic/vgic-init.c | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 685cc436146a..18413d869cca 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -777,6 +777,9 @@ static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu) static int check_vcpu_requests(struct kvm_vcpu *vcpu) { if (kvm_request_pending(vcpu)) { + if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) + return -EIO; + if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) kvm_vcpu_sleep(vcpu); diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index a2b439ad387c..b7306c588d9d 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -494,10 +494,10 @@ int kvm_vgic_map_resources(struct kvm *kvm) out: mutex_unlock(&kvm->arch.config_lock); out_slots: - mutex_unlock(&kvm->slots_lock); - if (ret) - kvm_vgic_destroy(kvm); + kvm_vm_dead(kvm); + + mutex_unlock(&kvm->slots_lock); return ret; } From 60a5ba560f296ad8da153f6ad3f70030bfa3958f Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 23 Jan 2024 19:55:21 -0800 Subject: [PATCH 186/207] x86/lam: Disable ADDRESS_MASKING in most cases commit 3267cb6d3a174ff83d6287dcd5b0047bbd912452 upstream. Linear Address Masking (LAM) has a weakness related to transient execution as described in the SLAM paper[1]. Unless Linear Address Space Separation (LASS) is enabled this weakness may be exploitable. Until kernel adds support for LASS[2], only allow LAM for COMPILE_TEST, or when speculation mitigations have been disabled at compile time, otherwise keep LAM disabled. There are no processors in market that support LAM yet, so currently nobody is affected by this issue. [1] SLAM: https://download.vusec.net/papers/slam_sp24.pdf [2] LASS: https://lore.kernel.org/lkml/20230609183632.48706-1-alexander.shishkin@linux.intel.com/ [ dhansen: update SPECULATION_MITIGATIONS -> CPU_MITIGATIONS ] Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Sohil Mehta Acked-by: Kirill A. Shutemov Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/5373262886f2783f054256babdf5a98545dc986b.1706068222.git.pawan.kumar.gupta%40linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 82d12c93feab..05c82fd5d0f6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2217,6 +2217,7 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING config ADDRESS_MASKING bool "Linear Address Masking support" depends on X86_64 + depends on COMPILE_TEST || !CPU_MITIGATIONS # wait for LASS help Linear Address Masking (LAM) modifies the checking that is applied to 64-bit linear addresses, allowing software to use of the From 0d674f2e73b47a14c97b0c47cf7b5095a23cfdbe Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 20 Oct 2024 10:56:24 -0700 Subject: [PATCH 187/207] ALSA: hda/tas2781: select CRC32 instead of CRC32_SARWATE commit 86c96e7289c5758284b562ac7b5c94429f48d2d9 upstream. Fix the kconfig option for the tas2781 HDA driver to select CRC32 rather than CRC32_SARWATE. CRC32_SARWATE is an option from the kconfig 'choice' that selects the specific CRC32 implementation. Selecting a 'choice' option seems to have no effect, but even if it did work, it would be incorrect for a random driver to override the user's choice. CRC32 is the correct option to select for crc32() to be available. Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Link: https://patch.msgid.link/20241020175624.7095-1-ebiggers@kernel.org Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index 9698ebe3fbc2..233b03bb4f98 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -173,7 +173,7 @@ config SND_HDA_SCODEC_TAS2781_I2C depends on SND_SOC select SND_SOC_TAS2781_COMLIB select SND_SOC_TAS2781_FMWLIB - select CRC32_SARWATE + select CRC32 help Say Y or M here to include TAS2781 I2C HD-audio side codec support in snd-hda-intel driver, such as ALC287. From 5d10f6fb2cb7fbc0a5637ebb7fe48d1da595c3fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Relvas?= Date: Sun, 20 Oct 2024 11:27:56 +0100 Subject: [PATCH 188/207] ALSA: hda/realtek: Add subwoofer quirk for Acer Predator G9-593 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 35fdc6e1c16099078bcbd73a6c8f1733ae7f1909 upstream. The Acer Predator G9-593 has a 2+1 speaker system which isn't probed correctly. This patch adds a quirk with the proper pin connections. Note that I do not own this laptop, so I cannot guarantee that this fixes the issue. Testing was done by other users here: https://discussion.fedoraproject.org/t/-/118482 This model appears to have two different dev IDs... - 0x1177 (as seen on the forum link above) - 0x1178 (as seen on https://linux-hardware.org/?probe=127df9999f) I don't think the audio system was changed between model revisions, so the patch applies for both IDs. Signed-off-by: José Relvas Link: https://patch.msgid.link/20241020102756.225258-1-josemonsantorelvas@gmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4dc5c7a18d6e..9be5a5c509f0 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7358,6 +7358,7 @@ enum { ALC286_FIXUP_ACER_AIO_HEADSET_MIC, ALC256_FIXUP_ASUS_HEADSET_MIC, ALC256_FIXUP_ASUS_MIC_NO_PRESENCE, + ALC255_FIXUP_PREDATOR_SUBWOOFER, ALC299_FIXUP_PREDATOR_SPK, ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, ALC289_FIXUP_DELL_SPK1, @@ -8705,6 +8706,13 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE }, + [ALC255_FIXUP_PREDATOR_SUBWOOFER] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x17, 0x90170151 }, /* use as internal speaker (LFE) */ + { 0x1b, 0x90170152 } /* use as internal speaker (back) */ + } + }, [ALC299_FIXUP_PREDATOR_SPK] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -9678,6 +9686,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x110e, "Acer Aspire ES1-432", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1166, "Acer Veriton N4640G", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1025, 0x1167, "Acer Veriton N6640G", ALC269_FIXUP_LIFEBOOK), + SND_PCI_QUIRK(0x1025, 0x1177, "Acer Predator G9-593", ALC255_FIXUP_PREDATOR_SUBWOOFER), + SND_PCI_QUIRK(0x1025, 0x1178, "Acer Predator G9-593", ALC255_FIXUP_PREDATOR_SUBWOOFER), SND_PCI_QUIRK(0x1025, 0x1246, "Acer Predator Helios 500", ALC299_FIXUP_PREDATOR_SPK), SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE), From cd2cef7311707a788eeb60cb73cd4b8bd7d767ed Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 21 Oct 2024 22:11:18 +0800 Subject: [PATCH 189/207] LoongArch: Get correct cores_per_package for SMT systems commit b7296f9d5bf99330063d4bbecc43c9b33fed0137 upstream. In loongson_sysconf, The "core" of cores_per_node and cores_per_package stands for a logical core, which means in a SMT system it stands for a thread indeed. This information is gotten from SMBIOS Type4 Structure, so in order to get a correct cores_per_package for both SMT and non-SMT systems in parse_cpu_table() we should use SMBIOS_THREAD_PACKAGE_OFFSET instead of SMBIOS_CORE_PACKAGE_OFFSET. Cc: stable@vger.kernel.org Reported-by: Chao Li Tested-by: Chao Li Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/include/asm/bootinfo.h | 4 ++++ arch/loongarch/kernel/setup.c | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h index c60796869b2b..d2317fa6a766 100644 --- a/arch/loongarch/include/asm/bootinfo.h +++ b/arch/loongarch/include/asm/bootinfo.h @@ -24,6 +24,10 @@ struct loongson_board_info { const char *board_vendor; }; +/* + * The "core" of cores_per_node and cores_per_package stands for a + * logical core, which means in a SMT system it stands for a thread. + */ struct loongson_system_configuration { int nr_cpus; int nr_nodes; diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 6748d7f3f221..065f2db57c09 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -55,6 +55,7 @@ #define SMBIOS_FREQHIGH_OFFSET 0x17 #define SMBIOS_FREQLOW_MASK 0xFF #define SMBIOS_CORE_PACKAGE_OFFSET 0x23 +#define SMBIOS_THREAD_PACKAGE_OFFSET 0x25 #define LOONGSON_EFI_ENABLE (1 << 3) #ifdef CONFIG_EFI @@ -129,7 +130,7 @@ static void __init parse_cpu_table(const struct dmi_header *dm) cpu_clock_freq = freq_temp * 1000000; loongson_sysconf.cpuname = (void *)dmi_string_parse(dm, dmi_data[16]); - loongson_sysconf.cores_per_package = *(dmi_data + SMBIOS_CORE_PACKAGE_OFFSET); + loongson_sysconf.cores_per_package = *(dmi_data + SMBIOS_THREAD_PACKAGE_OFFSET); pr_info("CpuClock = %llu\n", cpu_clock_freq); } From 8915ed160dbd32b5ef5864df9a9fc11db83a77bb Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 21 Oct 2024 22:11:19 +0800 Subject: [PATCH 190/207] LoongArch: Enable IRQ if do_ale() triggered in irq-enabled context commit 69cc6fad5df4ce652d969be69acc60e269e5eea1 upstream. Unaligned access exception can be triggered in irq-enabled context such as user mode, in this case do_ale() may call get_user() which may cause sleep. Then we will get: BUG: sleeping function called from invalid context at arch/loongarch/kernel/access-helper.h:7 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 129, name: modprobe preempt_count: 0, expected: 0 RCU nest depth: 0, expected: 0 CPU: 0 UID: 0 PID: 129 Comm: modprobe Tainted: G W 6.12.0-rc1+ #1723 Tainted: [W]=WARN Stack : 9000000105e0bd48 0000000000000000 9000000003803944 9000000105e08000 9000000105e0bc70 9000000105e0bc78 0000000000000000 0000000000000000 9000000105e0bc78 0000000000000001 9000000185e0ba07 9000000105e0b890 ffffffffffffffff 9000000105e0bc78 73924b81763be05b 9000000100194500 000000000000020c 000000000000000a 0000000000000000 0000000000000003 00000000000023f0 00000000000e1401 00000000072f8000 0000007ffbb0e260 0000000000000000 0000000000000000 9000000005437650 90000000055d5000 0000000000000000 0000000000000003 0000007ffbb0e1f0 0000000000000000 0000005567b00490 0000000000000000 9000000003803964 0000007ffbb0dfec 00000000000000b0 0000000000000007 0000000000000003 0000000000071c1d ... Call Trace: [<9000000003803964>] show_stack+0x64/0x1a0 [<9000000004c57464>] dump_stack_lvl+0x74/0xb0 [<9000000003861ab4>] __might_resched+0x154/0x1a0 [<900000000380c96c>] emulate_load_store_insn+0x6c/0xf60 [<9000000004c58118>] do_ale+0x78/0x180 [<9000000003801bc8>] handle_ale+0x128/0x1e0 So enable IRQ if unaligned access exception is triggered in irq-enabled context to fix it. Cc: stable@vger.kernel.org Reported-by: Binbin Zhou Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kernel/traps.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index aebfc3733a76..d59052c03d9b 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -529,6 +529,9 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) #else unsigned int *pc; + if (regs->csr_prmd & CSR_PRMD_PIE) + local_irq_enable(); + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr); /* @@ -553,6 +556,8 @@ sigbus: die_if_kernel("Kernel ale access", regs); force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); out: + if (regs->csr_prmd & CSR_PRMD_PIE) + local_irq_disable(); #endif irqentry_exit(regs, state); } From 36bd0f386b7c2d0ed237105e50d8c2cd139d1bbd Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 23 Oct 2024 22:15:30 +0800 Subject: [PATCH 191/207] LoongArch: Make KASAN usable for variable cpu_vabits commit 3c252263be801f937f56b4bcd8e8e2b5307c1ce5 upstream. Currently, KASAN on LoongArch assume the CPU VA bits is 48, which is true for Loongson-3 series, but not for Loongson-2 series (only 40 or lower), this patch fix that issue and make KASAN usable for variable cpu_vabits. Solution is very simple: Just define XRANGE_SHADOW_SHIFT which means valid address length from VA_BITS to min(cpu_vabits, VA_BITS). Cc: stable@vger.kernel.org Signed-off-by: Kanglong Wang Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/include/asm/kasan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h index cd6084f4e153..c6bce5fbff57 100644 --- a/arch/loongarch/include/asm/kasan.h +++ b/arch/loongarch/include/asm/kasan.h @@ -16,7 +16,7 @@ #define XRANGE_SHIFT (48) /* Valid address length */ -#define XRANGE_SHADOW_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) +#define XRANGE_SHADOW_SHIFT min(cpu_vabits, VA_BITS) /* Used for taking out the valid address */ #define XRANGE_SHADOW_MASK GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0) /* One segment whole address space size */ From c73bca72b84b453c8d26a5e7673b20adb294bf54 Mon Sep 17 00:00:00 2001 From: Petr Vaganov Date: Tue, 8 Oct 2024 14:02:58 +0500 Subject: [PATCH 192/207] xfrm: fix one more kernel-infoleak in algo dumping commit 6889cd2a93e1e3606b3f6e958aa0924e836de4d2 upstream. During fuzz testing, the following issue was discovered: BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x598/0x2a30 _copy_to_iter+0x598/0x2a30 __skb_datagram_iter+0x168/0x1060 skb_copy_datagram_iter+0x5b/0x220 netlink_recvmsg+0x362/0x1700 sock_recvmsg+0x2dc/0x390 __sys_recvfrom+0x381/0x6d0 __x64_sys_recvfrom+0x130/0x200 x64_sys_call+0x32c8/0x3cc0 do_syscall_64+0xd8/0x1c0 entry_SYSCALL_64_after_hwframe+0x79/0x81 Uninit was stored to memory at: copy_to_user_state_extra+0xcc1/0x1e00 dump_one_state+0x28c/0x5f0 xfrm_state_walk+0x548/0x11e0 xfrm_dump_sa+0x1e0/0x840 netlink_dump+0x943/0x1c40 __netlink_dump_start+0x746/0xdb0 xfrm_user_rcv_msg+0x429/0xc00 netlink_rcv_skb+0x613/0x780 xfrm_netlink_rcv+0x77/0xc0 netlink_unicast+0xe90/0x1280 netlink_sendmsg+0x126d/0x1490 __sock_sendmsg+0x332/0x3d0 ____sys_sendmsg+0x863/0xc30 ___sys_sendmsg+0x285/0x3e0 __x64_sys_sendmsg+0x2d6/0x560 x64_sys_call+0x1316/0x3cc0 do_syscall_64+0xd8/0x1c0 entry_SYSCALL_64_after_hwframe+0x79/0x81 Uninit was created at: __kmalloc+0x571/0xd30 attach_auth+0x106/0x3e0 xfrm_add_sa+0x2aa0/0x4230 xfrm_user_rcv_msg+0x832/0xc00 netlink_rcv_skb+0x613/0x780 xfrm_netlink_rcv+0x77/0xc0 netlink_unicast+0xe90/0x1280 netlink_sendmsg+0x126d/0x1490 __sock_sendmsg+0x332/0x3d0 ____sys_sendmsg+0x863/0xc30 ___sys_sendmsg+0x285/0x3e0 __x64_sys_sendmsg+0x2d6/0x560 x64_sys_call+0x1316/0x3cc0 do_syscall_64+0xd8/0x1c0 entry_SYSCALL_64_after_hwframe+0x79/0x81 Bytes 328-379 of 732 are uninitialized Memory access of size 732 starts at ffff88800e18e000 Data copied to user address 00007ff30f48aff0 CPU: 2 PID: 18167 Comm: syz-executor.0 Not tainted 6.8.11 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Fixes copying of xfrm algorithms where some random data of the structure fields can end up in userspace. Padding in structures may be filled with random (possibly sensitve) data and should never be given directly to user-space. A similar issue was resolved in the commit 8222d5910dae ("xfrm: Zero padding when dumping algos and encap") Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: c7a5899eb26e ("xfrm: redact SA secret with lockdown confidentiality") Cc: stable@vger.kernel.org Co-developed-by: Boris Tonofa Signed-off-by: Boris Tonofa Signed-off-by: Petr Vaganov Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 979f23cded40..35b775cf233c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -995,7 +995,9 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) if (!nla) return -EMSGSIZE; ap = nla_data(nla); - memcpy(ap, auth, sizeof(struct xfrm_algo_auth)); + strscpy_pad(ap->alg_name, auth->alg_name, sizeof(ap->alg_name)); + ap->alg_key_len = auth->alg_key_len; + ap->alg_trunc_len = auth->alg_trunc_len; if (redact_secret && auth->alg_key_len) memset(ap->alg_key, 0, (auth->alg_key_len + 7) / 8); else From 62c85b9a0dd7471a362170323e1211ad98ff7b4b Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 18 Oct 2024 11:25:22 -0700 Subject: [PATCH 193/207] hv_netvsc: Fix VF namespace also in synthetic NIC NETDEV_REGISTER event commit 4c262801ea60c518b5bebc22a09f5b78b3147da2 upstream. The existing code moves VF to the same namespace as the synthetic NIC during netvsc_register_vf(). But, if the synthetic device is moved to a new namespace after the VF registration, the VF won't be moved together. To make the behavior more consistent, add a namespace check for synthetic NIC's NETDEV_REGISTER event (generated during its move), and move the VF if it is not in the same namespace. Cc: stable@vger.kernel.org Fixes: c0a41b887ce6 ("hv_netvsc: move VF to same namespace as netvsc device") Suggested-by: Stephen Hemminger Signed-off-by: Haiyang Zhang Reviewed-by: Simon Horman Link: https://patch.msgid.link/1729275922-17595-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc_drv.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 9d2d66a4aafd..8698d2db3dc8 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2795,6 +2795,31 @@ static struct hv_driver netvsc_drv = { }, }; +/* Set VF's namespace same as the synthetic NIC */ +static void netvsc_event_set_vf_ns(struct net_device *ndev) +{ + struct net_device_context *ndev_ctx = netdev_priv(ndev); + struct net_device *vf_netdev; + int ret; + + vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); + if (!vf_netdev) + return; + + if (!net_eq(dev_net(ndev), dev_net(vf_netdev))) { + ret = dev_change_net_namespace(vf_netdev, dev_net(ndev), + "eth%d"); + if (ret) + netdev_err(vf_netdev, + "Cannot move to same namespace as %s: %d\n", + ndev->name, ret); + else + netdev_info(vf_netdev, + "Moved VF to namespace with: %s\n", + ndev->name); + } +} + /* * On Hyper-V, every VF interface is matched with a corresponding * synthetic interface. The synthetic interface is presented first @@ -2807,6 +2832,11 @@ static int netvsc_netdev_event(struct notifier_block *this, struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); int ret = 0; + if (event_dev->netdev_ops == &device_ops && event == NETDEV_REGISTER) { + netvsc_event_set_vf_ns(event_dev); + return NOTIFY_DONE; + } + ret = check_dev_is_matching_vf(event_dev); if (ret != 0) return NOTIFY_DONE; From c79e0a18e4b301401bb745702830be9041cfbf04 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 5 Feb 2024 15:12:33 -0600 Subject: [PATCH 194/207] drm/amd/display: Disable PSR-SU on Parade 08-01 TCON too commit ba1959f71117b27f3099ee789e0815360b4081dd upstream. Stuart Hayhurst has found that both at bootup and fullscreen VA-API video is leading to black screens for around 1 second and kernel WARNING [1] traces when calling dmub_psr_enable() with Parade 08-01 TCON. These symptoms all go away with PSR-SU disabled for this TCON, so disable it for now while DMUB traces [2] from the failure can be analyzed and the failure state properly root caused. Cc: Marc Rossi Cc: Hamza Mahfooz Link: https://gitlab.freedesktop.org/drm/amd/uploads/a832dd515b571ee171b3e3b566e99a13/dmesg.log [1] Link: https://gitlab.freedesktop.org/drm/amd/uploads/8f13ff3b00963c833e23e68aa8116959/output.log [2] Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2645 Reviewed-by: Leo Li Link: https://lore.kernel.org/r/20240205211233.2601-1-mario.limonciello@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit afb634a6823d8d9db23c5fb04f79c5549349628b) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 2b3d5183818a..4887a360ead4 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -841,6 +841,8 @@ bool is_psr_su_specific_panel(struct dc_link *link) isPSRSUSupported = false; else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03) isPSRSUSupported = false; + else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x01) + isPSRSUSupported = false; else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1) isPSRSUSupported = true; } From 4512c448f54c52a6e7089a22cd769bc31ba3a493 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 25 Oct 2024 11:20:21 -0300 Subject: [PATCH 195/207] selinux: improve error checking in sel_write_load() [ Upstream commit 42c773238037c90b3302bf37a57ae3b5c3f6004a ] Move our existing input sanity checking to the top of sel_write_load() and add a check to ensure the buffer size is non-zero. Move a local variable initialization from the declaration to before it is used. Minor style adjustments. Reported-by: Sam Sun Signed-off-by: Paul Moore Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Sasha Levin --- security/selinux/selinuxfs.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 2c23a5a28608..54bc18e8164b 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -582,11 +582,18 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_fs_info *fsi; struct selinux_load_state load_state; ssize_t length; void *data = NULL; + /* no partial writes */ + if (*ppos) + return -EINVAL; + /* no empty policies */ + if (!count) + return -EINVAL; + mutex_lock(&selinux_state.policy_mutex); length = avc_has_perm(current_sid(), SECINITSID_SECURITY, @@ -594,26 +601,22 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, if (length) goto out; - /* No partial writes. */ - length = -EINVAL; - if (*ppos != 0) - goto out; - - length = -ENOMEM; data = vmalloc(count); - if (!data) + if (!data) { + length = -ENOMEM; goto out; - - length = -EFAULT; - if (copy_from_user(data, buf, count) != 0) + } + if (copy_from_user(data, buf, count) != 0) { + length = -EFAULT; goto out; + } length = security_load_policy(data, count, &load_state); if (length) { pr_warn_ratelimited("SELinux: failed to load policy\n"); goto out; } - + fsi = file_inode(file)->i_sb->s_fs_info; length = sel_make_policy_nodes(fsi, load_state.policy); if (length) { pr_warn_ratelimited("SELinux: failed to initialize selinuxfs\n"); @@ -622,13 +625,12 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, } selinux_policy_commit(&load_state); - length = count; - audit_log(audit_context(), GFP_KERNEL, AUDIT_MAC_POLICY_LOAD, "auid=%u ses=%u lsm=selinux res=1", from_kuid(&init_user_ns, audit_get_loginuid(current)), audit_get_sessionid(current)); + out: mutex_unlock(&selinux_state.policy_mutex); vfree(data); From ce4a70d9b916cc1dc2887e337282bdc9351b6334 Mon Sep 17 00:00:00 2001 From: Michel Alex Date: Wed, 16 Oct 2024 12:11:15 +0000 Subject: [PATCH 196/207] net: phy: dp83822: Fix reset pin definitions commit de96f6a3003513c796bbe4e23210a446913f5c00 upstream. This change fixes a rare issue where the PHY fails to detect a link due to incorrect reset behavior. The SW_RESET definition was incorrectly assigned to bit 14, which is the Digital Restart bit according to the datasheet. This commit corrects SW_RESET to bit 15 and assigns DIG_RESTART to bit 14 as per the datasheet specifications. The SW_RESET define is only used in the phy_reset function, which fully re-initializes the PHY after the reset is performed. The change in the bit definitions should not have any negative impact on the functionality of the PHY. v2: - added Fixes tag - improved commit message Cc: stable@vger.kernel.org Fixes: 5dc39fd5ef35 ("net: phy: DP83822: Add ability to advertise Fiber connection") Signed-off-by: Alex Michel Reviewed-by: Andrew Lunn Message-ID: Signed-off-by: Andrew Lunn Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/dp83822.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 29e1cbea6dc0..507726a08f82 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -40,8 +40,8 @@ /* Control Register 2 bits */ #define DP83822_FX_ENABLE BIT(14) -#define DP83822_HW_RESET BIT(15) -#define DP83822_SW_RESET BIT(14) +#define DP83822_SW_RESET BIT(15) +#define DP83822_DIG_RESTART BIT(14) /* PHY STS bits */ #define DP83822_PHYSTS_DUPLEX BIT(2) From 24f638125cc3d7b5870444238cded580c552ea31 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 23 Oct 2024 12:55:41 +0200 Subject: [PATCH 197/207] ata: libata: Set DID_TIME_OUT for commands that actually timed out commit 8e59a2a5459fd9840dbe2cbde85fe154b11e1727 upstream. When ata_qc_complete() schedules a command for EH using ata_qc_schedule_eh(), blk_abort_request() will be called, which leads to req->q->mq_ops->timeout() / scsi_timeout() being called. scsi_timeout(), if the LLDD has no abort handler (libata has no abort handler), will set host byte to DID_TIME_OUT, and then call scsi_eh_scmd_add() to add the command to EH. Thus, when commands first enter libata's EH strategy_handler, all the commands that have been added to EH will have DID_TIME_OUT set. Commit e5dd410acb34 ("ata: libata: Clear DID_TIME_OUT for ATA PT commands with sense data") clears this bogus DID_TIME_OUT flag for all commands that reached libata's EH strategy_handler. libata has its own flag (AC_ERR_TIMEOUT), that it sets for commands that have not received a completion at the time of entering EH. ata_eh_worth_retry() has no special handling for AC_ERR_TIMEOUT, so by default timed out commands will get flag ATA_QCFLAG_RETRY set, and will be retried after the port has been reset (ata_eh_link_autopsy() always triggers a port reset if any command has AC_ERR_TIMEOUT set). For a command that has ATA_QCFLAG_RETRY set, while also having an error flag set (e.g. AC_ERR_TIMEOUT), ata_eh_finish() will not increment scmd->allowed, so the command will at most be retried scmd->allowed number of times (which by default is set to 3). However, scsi_eh_flush_done_q() will only retry commands for which scsi_noretry_cmd() returns false. For a command that has DID_TIME_OUT set, while also having either the FAILFAST flag set, or the command being a passthrough command, scsi_noretry_cmd() will return true. Thus, such a command will never be retried. Thus, make sure that libata sets SCSI's DID_TIME_OUT flag for commands that actually timed out (libata's AC_ERR_TIMEOUT flag), such that timed out commands will once again not be retried if they are also a FAILFAST or passthrough command. Cc: stable@vger.kernel.org Fixes: e5dd410acb34 ("ata: libata: Clear DID_TIME_OUT for ATA PT commands with sense data") Reported-by: Lai, Yi Closes: https://lore.kernel.org/linux-ide/ZxYz871I3Blsi30F@ly-workstation/ Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20241023105540.1070012-2-cassel@kernel.org Signed-off-by: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-eh.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index a96566e1b2b8..9cc022522184 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -636,6 +636,7 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, /* the scmd has an associated qc */ if (!(qc->flags & ATA_QCFLAG_EH)) { /* which hasn't failed yet, timeout */ + set_host_byte(scmd, DID_TIME_OUT); qc->err_mask |= AC_ERR_TIMEOUT; qc->flags |= ATA_QCFLAG_EH; nr_timedout++; From 73cc3f905ca9aa95694eea3dfa1acadc90686368 Mon Sep 17 00:00:00 2001 From: Zichen Xie Date: Sun, 6 Oct 2024 15:57:37 -0500 Subject: [PATCH 198/207] ASoC: qcom: Fix NULL Dereference in asoc_qcom_lpass_cpu_platform_probe() commit 49da1463c9e3d2082276c3e0e2a8b65a88711cd2 upstream. A devm_kzalloc() in asoc_qcom_lpass_cpu_platform_probe() could possibly return NULL pointer. NULL Pointer Dereference may be triggerred without addtional check. Add a NULL check for the returned pointer. Fixes: b5022a36d28f ("ASoC: qcom: lpass: Use regmap_field for i2sctl and dmactl registers") Cc: stable@vger.kernel.org Signed-off-by: Zichen Xie Link: https://patch.msgid.link/20241006205737.8829-1-zichenxie0106@gmail.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/qcom/lpass-cpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index 73b42d9ee244..e587455dc40a 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -1246,6 +1246,8 @@ int asoc_qcom_lpass_cpu_platform_probe(struct platform_device *pdev) /* Allocation for i2sctl regmap fields */ drvdata->i2sctl = devm_kzalloc(&pdev->dev, sizeof(struct lpaif_i2sctl), GFP_KERNEL); + if (!drvdata->i2sctl) + return -ENOMEM; /* Initialize bitfields for dai I2SCTL register */ ret = lpass_cpu_init_i2sctl_bitfields(dev, drvdata->i2sctl, From d1e55eeee0562f7a6cc576689f0b57256e67ee5f Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 15 Oct 2024 00:05:29 +0200 Subject: [PATCH 199/207] platform/x86: dell-wmi: Ignore suspend notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a7990957fa53326fe9b47f0349373ed99bb69aaa upstream. Some machines like the Dell G15 5155 emit WMI events when suspending/resuming. Ignore those WMI events. Tested-by: siddharth.manthan@gmail.com Signed-off-by: Armin Wolf Acked-by: Pali Rohár Link: https://lore.kernel.org/r/20241014220529.397390-1-W_Armin@gmx.de Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/dell/dell-wmi-base.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/dell/dell-wmi-base.c b/drivers/platform/x86/dell/dell-wmi-base.c index 502783a7adb1..24fd7ffadda9 100644 --- a/drivers/platform/x86/dell/dell-wmi-base.c +++ b/drivers/platform/x86/dell/dell-wmi-base.c @@ -264,6 +264,15 @@ static const struct key_entry dell_wmi_keymap_type_0010[] = { /*Speaker Mute*/ { KE_KEY, 0x109, { KEY_MUTE} }, + /* S2Idle screen off */ + { KE_IGNORE, 0x120, { KEY_RESERVED }}, + + /* Leaving S4 or S2Idle suspend */ + { KE_IGNORE, 0x130, { KEY_RESERVED }}, + + /* Entering S2Idle suspend */ + { KE_IGNORE, 0x140, { KEY_RESERVED }}, + /* Mic mute */ { KE_KEY, 0x150, { KEY_MICMUTE } }, From 38ca6fd426eae83959066fe57ce6911a72674b8e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 24 Oct 2024 11:07:15 +0300 Subject: [PATCH 200/207] ACPI: PRM: Clean up guid type in struct prm_handler_info commit 3d1c651272cf1df8aac7d9b6d92d836d27bed50f upstream. Clang 19 prints a warning when we pass &th->guid to efi_pa_va_lookup(): drivers/acpi/prmt.c:156:29: error: passing 1-byte aligned argument to 4-byte aligned parameter 1 of 'efi_pa_va_lookup' may result in an unaligned pointer access [-Werror,-Walign-mismatch] 156 | (void *)efi_pa_va_lookup(&th->guid, handler_info->handler_address); | ^ The problem is that efi_pa_va_lookup() takes a efi_guid_t and &th->guid is a regular guid_t. The difference between the two types is the alignment. efi_guid_t is a typedef. typedef guid_t efi_guid_t __aligned(__alignof__(u32)); It's possible that this a bug in Clang 19. Even though the alignment of &th->guid is not explicitly specified, it will still end up being aligned at 4 or 8 bytes. Anyway, as Ard points out, it's cleaner to change guid to efi_guid_t type and that also makes the warning go away. Fixes: 088984c8d54c ("ACPI: PRM: Find EFI_MEMORY_RUNTIME block for PRM handler and context") Reported-by: Linux Kernel Functional Testing Suggested-by: Ard Biesheuvel Signed-off-by: Dan Carpenter Tested-by: Paul E. McKenney Acked-by: Ard Biesheuvel Link: https://patch.msgid.link/3777d71b-9e19-45f4-be4e-17bf4fa7a834@stanley.mountain [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/prmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c index 44899234462b..8b391f12853b 100644 --- a/drivers/acpi/prmt.c +++ b/drivers/acpi/prmt.c @@ -52,7 +52,7 @@ struct prm_context_buffer { static LIST_HEAD(prm_module_list); struct prm_handler_info { - guid_t guid; + efi_guid_t guid; efi_status_t (__efiapi *handler_addr)(u64, void *); u64 static_data_buffer_addr; u64 acpi_param_buffer_addr; From 1aee34ed99e91ed9c7d7544f420eb004ba344e25 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 19 Dec 2023 04:11:40 -0800 Subject: [PATCH 201/207] RDMA/bnxt_re: Fix the offset for GenP7 adapters for user applications commit 9248f363d0791a548a9c7711365b8be4c70bd375 upstream. User Doorbell page indexes start at an offset for GenP7 adapters. Fix the offset that will be used for user doorbell page indexes. Fixes: a62d68581441 ("RDMA/bnxt_re: Update the BAR offsets") Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1702987900-5363-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/bnxt_re/main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 607293794b92..0373d0e9db63 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -107,12 +107,14 @@ static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev) dev_info(rdev_to_dev(rdev), "Couldn't get DB bar size, Low latency framework is disabled\n"); /* set register offsets for both UC and WC */ - if (bnxt_qplib_is_chip_gen_p7(cctx)) + if (bnxt_qplib_is_chip_gen_p7(cctx)) { res->dpi_tbl.ucreg.offset = offset; - else + res->dpi_tbl.wcreg.offset = en_dev->l2_db_size; + } else { res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET : BNXT_QPLIB_DBR_PF_DB_OFFSET; - res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset; + res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset; + } /* If WC mapping is disabled by L2 driver then en_dev->l2_db_size * is equal to the DB-Bar actual size. This indicates that L2 From 67819f10702b2a160d19651b9538d276615df365 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 22 Jan 2024 20:54:33 -0800 Subject: [PATCH 202/207] RDMA/bnxt_re: Avoid creating fence MR for newer adapters commit 282fd66e2ef6e5d72b8fcd77efb2b282d2569464 upstream. Limit the usage of fence MR to adapters older than Gen P5 products. Fixes: 1801d87b3598 ("RDMA/bnxt_re: Support new 5760X P7 devices") Signed-off-by: Kashyap Desai Signed-off-by: Bhargava Chenna Marreddy Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1705985677-15551-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index f2eaecef7570..9c8224889789 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -400,6 +400,10 @@ static void bnxt_re_create_fence_wqe(struct bnxt_re_pd *pd) struct bnxt_re_fence_data *fence = &pd->fence; struct ib_mr *ib_mr = &fence->mr->ib_mr; struct bnxt_qplib_swqe *wqe = &fence->bind_wqe; + struct bnxt_re_dev *rdev = pd->rdev; + + if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) + return; memset(wqe, 0, sizeof(*wqe)); wqe->type = BNXT_QPLIB_SWQE_TYPE_BIND_MW; @@ -454,6 +458,9 @@ static void bnxt_re_destroy_fence_mr(struct bnxt_re_pd *pd) struct device *dev = &rdev->en_dev->pdev->dev; struct bnxt_re_mr *mr = fence->mr; + if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) + return; + if (fence->mw) { bnxt_re_dealloc_mw(fence->mw); fence->mw = NULL; @@ -485,6 +492,9 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) struct ib_mw *mw; int rc; + if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) + return 0; + dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES, DMA_BIDIRECTIONAL); rc = dma_mapping_error(dev, dma_addr); From 042804a9fff166c0eb7874a57a38d1115849b3e3 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 22 Jan 2024 20:54:35 -0800 Subject: [PATCH 203/207] RDMA/bnxt_re: Fix unconditional fence for newer adapters commit 8eaca6b5997bd8fd7039f2693e4ecf112823c816 upstream. Older adapters required an unconditional fence for non-wire memory operations. Newer adapters doesn't require this and therefore, disabling the unconditional fence. Fixes: 1801d87b3598 ("RDMA/bnxt_re: Support new 5760X P7 devices") Signed-off-by: Kashyap Desai Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1705985677-15551-4-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 28 ++++++++++++++---------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 9c8224889789..f20da108fb29 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2563,11 +2563,6 @@ static int bnxt_re_build_inv_wqe(const struct ib_send_wr *wr, wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV; wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey; - /* Need unconditional fence for local invalidate - * opcode to work as expected. - */ - wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; - if (wr->send_flags & IB_SEND_SIGNALED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP; if (wr->send_flags & IB_SEND_SOLICITED) @@ -2590,12 +2585,6 @@ static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr, wqe->frmr.levels = qplib_frpl->hwq.level; wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR; - /* Need unconditional fence for reg_mr - * opcode to function as expected. - */ - - wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; - if (wr->wr.send_flags & IB_SEND_SIGNALED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP; @@ -2726,6 +2715,18 @@ bad: return rc; } +static void bnxt_re_legacy_set_uc_fence(struct bnxt_qplib_swqe *wqe) +{ + /* Need unconditional fence for non-wire memory opcode + * to work as expected. + */ + if (wqe->type == BNXT_QPLIB_SWQE_TYPE_LOCAL_INV || + wqe->type == BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR || + wqe->type == BNXT_QPLIB_SWQE_TYPE_REG_MR || + wqe->type == BNXT_QPLIB_SWQE_TYPE_BIND_MW) + wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; +} + int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { @@ -2805,8 +2806,11 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr, rc = -EINVAL; goto bad; } - if (!rc) + if (!rc) { + if (!bnxt_qplib_is_chip_gen_p5_p7(qp->rdev->chip_ctx)) + bnxt_re_legacy_set_uc_fence(&wqe); rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe); + } bad: if (rc) { ibdev_err(&qp->rdev->ibdev, From 81507f633e791b2dbce44b52e67f9210b5b05d6e Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 20 Mar 2024 17:10:38 +0900 Subject: [PATCH 204/207] tracing: probes: Fix to zero initialize a local variable commit 0add699ad068d26e5b1da9ff28b15461fc4005df upstream. Fix to initialize 'val' local variable with zero. Dan reported that Smatch static code checker reports an error that a local 'val' variable needs to be initialized. Actually, the 'val' is expected to be initialized by FETCH_OP_ARG in the same loop, but it is not obvious. So initialize it with zero. Link: https://lore.kernel.org/all/171092223833.237219.17304490075697026697.stgit@devnote2/ Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/b010488e-68aa-407c-add0-3e059254aaa0@moroto.mountain/ Fixes: 25f00e40ce79 ("tracing/probes: Support $argN in return probe (kprobe and fprobe)") Reviewed-by: Steven Rostedt (Google) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index a1bc49de648f..8c73156a7eb9 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -843,7 +843,7 @@ out: void store_trace_entry_data(void *edata, struct trace_probe *tp, struct pt_regs *regs) { struct probe_entry_arg *earg = tp->entry_arg; - unsigned long val; + unsigned long val = 0; int i; if (!earg) From 7a26cb660775067a4d7da6ca48e024a8543bf848 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 29 Jul 2024 12:05:06 -0700 Subject: [PATCH 205/207] task_work: make TWA_NMI_CURRENT handling conditional on IRQ_WORK commit cec6937dd1aae1b38d147bd190cb895d06cf96d0 upstream. The TWA_NMI_CURRENT handling very much depends on IRQ_WORK, but that isn't universally enabled everywhere. Maybe the IRQ_WORK infrastructure should just be unconditional - x86 ends up indirectly enabling it through unconditionally enabling PERF_EVENTS, for example. But it also gets enabled by having SMP support, or even if you just have PRINTK enabled. But in the meantime TWA_NMI_CURRENT causes tons of build failures on various odd minimal configs. Which did show up in linux-next, but despite that nobody bothered to fix it or even inform me until -rc1 was out. Fixes: 466e4d801cd4 ("task_work: Add TWA_NMI_CURRENT as an additional notify mode") Reported-by: Naresh Kamboju Reported-by: kernelci.org bot Reported-by: Guenter Roeck Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/task_work.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/task_work.c b/kernel/task_work.c index 8aa43204cb7d..c969f1f26be5 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -6,12 +6,14 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ +#ifdef CONFIG_IRQ_WORK static void task_work_set_notify_irq(struct irq_work *entry) { test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); } static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) = IRQ_WORK_INIT_HARD(task_work_set_notify_irq); +#endif /** * task_work_add - ask the @task to execute @work->func() @@ -59,6 +61,8 @@ int task_work_add(struct task_struct *task, struct callback_head *work, if (notify == TWA_NMI_CURRENT) { if (WARN_ON_ONCE(task != current)) return -EINVAL; + if (!IS_ENABLED(CONFIG_IRQ_WORK)) + return -EINVAL; } else { /* * Record the work call stack in order to print it in KASAN @@ -92,9 +96,11 @@ int task_work_add(struct task_struct *task, struct callback_head *work, case TWA_SIGNAL_NO_IPI: __set_notify_signal(task); break; +#ifdef CONFIG_IRQ_WORK case TWA_NMI_CURRENT: irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume)); break; +#endif default: WARN_ON_ONCE(1); break; From 7d9868180bd1e4cf37e7c5067362658971162366 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 1 Oct 2024 18:48:14 +0200 Subject: [PATCH 206/207] xfrm: validate new SA's prefixlen using SA family when sel.family is unset [ Upstream commit 3f0ab59e6537c6a8f9e1b355b48f9c05a76e8563 ] This expands the validation introduced in commit 07bf7908950a ("xfrm: Validate address prefix lengths in the xfrm selector.") syzbot created an SA with usersa.sel.family = AF_UNSPEC usersa.sel.prefixlen_s = 128 usersa.family = AF_INET Because of the AF_UNSPEC selector, verify_newsa_info doesn't put limits on prefixlen_{s,d}. But then copy_from_user_state sets x->sel.family to usersa.family (AF_INET). Do the same conversion in verify_newsa_info before validating prefixlen_{s,d}, since that's how prefixlen is going to be used later on. Reported-by: syzbot+cc39f136925517aed571@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert Signed-off-by: Antony Antony Signed-off-by: Sasha Levin --- net/xfrm/xfrm_user.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 35b775cf233c..1d91b42e7997 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -176,6 +176,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, struct netlink_ext_ack *extack) { int err; + u16 family = p->sel.family; err = -EINVAL; switch (p->family) { @@ -196,7 +197,10 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; } - switch (p->sel.family) { + if (!family && !(p->flags & XFRM_STATE_AF_UNSPEC)) + family = p->family; + + switch (family) { case AF_UNSPEC: break; From bf3af7e92bda9f48085b7741e657eeb387a61644 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Nov 2024 01:58:34 +0100 Subject: [PATCH 207/207] Linux 6.6.59 Link: https://lore.kernel.org/r/20241028062306.649733554@linuxfoundation.org Tested-by: Miguel Ojeda Tested-by: Takeshi Ogasawara Tested-by: Mark Brown Tested-by: Linux Kernel Functional Testing Tested-by: SeongJae Park Tested-by: Peter Schneider Tested-by: Florian Fainelli Tested-by: Hardik Garg Tested-by: Harshit Mogalapalli Tested-by: kernelci.org bot Tested-by: Ron Economos Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f80e78c7cf20..8a55af189f36 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 58 +SUBLEVEL = 59 EXTRAVERSION = NAME = Pinguïn Aangedreven