From 8333aae9bba1f007b1b3bfdcbc7262ff79d10227 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 15 Mar 2024 10:34:43 +0800 Subject: [PATCH 001/553] scripts/bpf_doc: Use silent mode when exec make cmd [ Upstream commit 5384cc0d1a88c27448a6a4e65b8abe6486de8012 ] When getting kernel version via make, the result may be polluted by other output, like directory change info. e.g. $ export MAKEFLAGS="-w" $ make kernelversion make: Entering directory '/home/net' 6.8.0 make: Leaving directory '/home/net' This will distort the reStructuredText output and make latter rst2man failed like: [...] bpf-helpers.rst:20: (WARNING/2) Field list ends without a blank line; unexpected unindent. [...] Using silent mode would help. e.g. $ make -s --no-print-directory kernelversion 6.8.0 Fixes: fd0a38f9c37d ("scripts/bpf: Set version attribute for bpf-helpers(7) man page") Signed-off-by: Michael Hofmann Signed-off-by: Hangbin Liu Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Acked-by: Alejandro Colomar Link: https://lore.kernel.org/bpf/20240315023443.2364442-1-liuhangbin@gmail.com Signed-off-by: Sasha Levin --- scripts/bpf_doc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 4de98b7bbea9..c2da6ed32104 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -396,8 +396,8 @@ class PrinterRST(Printer): version = version.stdout.decode().rstrip() except: try: - version = subprocess.run(['make', 'kernelversion'], cwd=linuxRoot, - capture_output=True, check=True) + version = subprocess.run(['make', '-s', '--no-print-directory', 'kernelversion'], + cwd=linuxRoot, capture_output=True, check=True) version = version.stdout.decode().rstrip() except: return 'Linux' From 0336995512cdab0c65e99e4cdd47c4606debe14e Mon Sep 17 00:00:00 2001 From: Pavel Sakharov Date: Wed, 20 Mar 2024 04:15:23 +0500 Subject: [PATCH 002/553] dma-buf: Fix NULL pointer dereference in sanitycheck() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2295bd846765c766701e666ed2e4b35396be25e6 ] If due to a memory allocation failure mock_chain() returns NULL, it is passed to dma_fence_enable_sw_signaling() resulting in NULL pointer dereference there. Call dma_fence_enable_sw_signaling() only if mock_chain() succeeds. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: d62c43a953ce ("dma-buf: Enable signaling on fence for selftests") Signed-off-by: Pavel Sakharov Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240319231527.1821372-1-p.sakharov@ispras.ru Signed-off-by: Sasha Levin --- drivers/dma-buf/st-dma-fence-chain.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c index 0a9b099d0518..d90479d830fc 100644 --- a/drivers/dma-buf/st-dma-fence-chain.c +++ b/drivers/dma-buf/st-dma-fence-chain.c @@ -84,11 +84,11 @@ static int sanitycheck(void *arg) return -ENOMEM; chain = mock_chain(NULL, f, 1); - if (!chain) + if (chain) + dma_fence_enable_sw_signaling(chain); + else err = -ENOMEM; - dma_fence_enable_sw_signaling(chain); - dma_fence_signal(f); dma_fence_put(f); From b51ec7fc9f877ef869c01d3ea6f18f6a64e831a7 Mon Sep 17 00:00:00 2001 From: Ryosuke Yasuoka Date: Wed, 20 Mar 2024 09:54:10 +0900 Subject: [PATCH 003/553] nfc: nci: Fix uninit-value in nci_dev_up and nci_ntf_packet [ Upstream commit d24b03535e5eb82e025219c2f632b485409c898f ] syzbot reported the following uninit-value access issue [1][2]: nci_rx_work() parses and processes received packet. When the payload length is zero, each message type handler reads uninitialized payload and KMSAN detects this issue. The receipt of a packet with a zero-size payload is considered unexpected, and therefore, such packets should be silently discarded. This patch resolved this issue by checking payload size before calling each message type handler codes. Fixes: 6a2968aaf50c ("NFC: basic NCI protocol implementation") Reported-and-tested-by: syzbot+7ea9413ea6749baf5574@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+29b5ca705d2e0f4a44d2@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7ea9413ea6749baf5574 [1] Closes: https://syzkaller.appspot.com/bug?extid=29b5ca705d2e0f4a44d2 [2] Signed-off-by: Ryosuke Yasuoka Reviewed-by: Jeremy Cline Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/nfc/nci/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index b5071a2f597d..f76a2d806034 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1512,6 +1512,11 @@ static void nci_rx_work(struct work_struct *work) nfc_send_to_raw_sock(ndev->nfc_dev, skb, RAW_PAYLOAD_NCI, NFC_DIRECTION_RX); + if (!nci_plen(skb->data)) { + kfree_skb(skb); + break; + } + /* Process frame */ switch (nci_mt(skb->data)) { case NCI_MT_RSP_PKT: From 57beec623ac5314c5e0e957624c7517d9efdfa59 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Wed, 20 Mar 2024 15:31:17 -0400 Subject: [PATCH 004/553] mlxbf_gige: stop PHY during open() error paths [ Upstream commit d6c30c5a168f8586b8bcc0d8e42e2456eb05209b ] The mlxbf_gige_open() routine starts the PHY as part of normal initialization. The mlxbf_gige_open() routine must stop the PHY during its error paths. Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver") Signed-off-by: David Thompson Reviewed-by: Asmaa Mnebhi Reviewed-by: Andrew Lunn Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index 83c4659390fd..113e3d9d3353 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -157,7 +157,7 @@ static int mlxbf_gige_open(struct net_device *netdev) err = mlxbf_gige_tx_init(priv); if (err) - goto free_irqs; + goto phy_deinit; err = mlxbf_gige_rx_init(priv); if (err) goto tx_deinit; @@ -185,6 +185,9 @@ static int mlxbf_gige_open(struct net_device *netdev) tx_deinit: mlxbf_gige_tx_deinit(priv); +phy_deinit: + phy_stop(phydev); + free_irqs: mlxbf_gige_free_irqs(priv); return err; From 99a75d75007421d8e08ba139e24f77395cd08f62 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Mar 2024 10:10:17 +0200 Subject: [PATCH 005/553] wifi: iwlwifi: mvm: rfi: fix potential response leaks [ Upstream commit 06a093807eb7b5c5b29b6cff49f8174a4e702341 ] If the rx payload length check fails, or if kmemdup() fails, we still need to free the command response. Fix that. Fixes: 21254908cbe9 ("iwlwifi: mvm: add RFI-M support") Co-authored-by: Anjaneyulu Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240319100755.db2fa0196aa7.I116293b132502ac68a65527330fa37799694b79c@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/rfi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c index bb77bc9aa821..fb2408c0551d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c @@ -122,14 +122,18 @@ struct iwl_rfi_freq_table_resp_cmd *iwl_rfi_get_freq_table(struct iwl_mvm *mvm) if (ret) return ERR_PTR(ret); - if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != resp_size)) + if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != + resp_size)) { + iwl_free_resp(&cmd); return ERR_PTR(-EIO); + } resp = kmemdup(cmd.resp_pkt->data, resp_size, GFP_KERNEL); + iwl_free_resp(&cmd); + if (!resp) return ERR_PTR(-ENOMEM); - iwl_free_resp(&cmd); return resp; } From 231b189fa1a14a8ee8c2c14a81834cea4019fb23 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Tue, 5 Mar 2024 17:02:02 +0100 Subject: [PATCH 006/553] ixgbe: avoid sleeping allocation in ixgbe_ipsec_vf_add_sa() [ Upstream commit aec806fb4afba5fe80b09e29351379a4292baa43 ] Change kzalloc() flags used in ixgbe_ipsec_vf_add_sa() to GFP_ATOMIC, to avoid sleeping in IRQ context. Dan Carpenter, with the help of Smatch, has found following issue: The patch eda0333ac293: "ixgbe: add VF IPsec management" from Aug 13, 2018 (linux-next), leads to the following Smatch static checker warning: drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c:917 ixgbe_ipsec_vf_add_sa() warn: sleeping in IRQ context The call tree that Smatch is worried about is: ixgbe_msix_other() <- IRQ handler -> ixgbe_msg_task() -> ixgbe_rcv_msg_from_vf() -> ixgbe_ipsec_vf_add_sa() Fixes: eda0333ac293 ("ixgbe: add VF IPsec management") Reported-by: Dan Carpenter Link: https://lore.kernel.org/intel-wired-lan/db31a0b0-4d9f-4e6b-aed8-88266eb5665c@moroto.mountain Reviewed-by: Michal Kubiak Signed-off-by: Przemek Kitszel Reviewed-by: Shannon Nelson Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 774de63dd93a..15fc2acffb87 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -908,7 +908,13 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) goto err_out; } - xs = kzalloc(sizeof(*xs), GFP_KERNEL); + algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1); + if (unlikely(!algo)) { + err = -ENOENT; + goto err_out; + } + + xs = kzalloc(sizeof(*xs), GFP_ATOMIC); if (unlikely(!xs)) { err = -ENOMEM; goto err_out; @@ -924,14 +930,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) memcpy(&xs->id.daddr.a4, sam->addr, sizeof(xs->id.daddr.a4)); xs->xso.dev = adapter->netdev; - algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1); - if (unlikely(!algo)) { - err = -ENOENT; - goto err_xs; - } - aead_len = sizeof(*xs->aead) + IXGBE_IPSEC_KEY_BITS / 8; - xs->aead = kzalloc(aead_len, GFP_KERNEL); + xs->aead = kzalloc(aead_len, GFP_ATOMIC); if (unlikely(!xs->aead)) { err = -ENOMEM; goto err_xs; From 16307e7bc1120e87f243d00b8e403da9cfaa60d6 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Thu, 21 Mar 2024 12:53:37 +0100 Subject: [PATCH 007/553] s390/qeth: handle deferred cc1 [ Upstream commit afb373ff3f54c9d909efc7f810dc80a9742807b2 ] The IO subsystem expects a driver to retry a ccw_device_start, when the subsequent interrupt response block (irb) contains a deferred condition code 1. Symptoms before this commit: On the read channel we always trigger the next read anyhow, so no different behaviour here. On the write channel we may experience timeout errors, because the expected reply will never be received without the retry. Other callers of qeth_send_control_data() may wrongly assume that the ccw was successful, which may cause problems later. Note that since commit 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") and commit 5ef1dc40ffa6 ("s390/cio: fix invalid -EBUSY on ccw_device_start") deferred CC1s are much more likely to occur. See the commit message of the latter for more background information. Fixes: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") Signed-off-by: Alexandra Winter Co-developed-by: Thorsten Winkler Signed-off-by: Thorsten Winkler Reviewed-by: Peter Oberparleiter Link: https://lore.kernel.org/r/20240321115337.3564694-1-wintera@linux.ibm.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/s390/net/qeth_core_main.c | 38 +++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index ae4b6d24bc90..1e6340e2c258 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1179,6 +1179,20 @@ static int qeth_check_irb_error(struct qeth_card *card, struct ccw_device *cdev, } } +/** + * qeth_irq() - qeth interrupt handler + * @cdev: ccw device + * @intparm: expect pointer to iob + * @irb: Interruption Response Block + * + * In the good path: + * corresponding qeth channel is locked with last used iob as active_cmd. + * But this function is also called for error interrupts. + * + * Caller ensures that: + * Interrupts are disabled; ccw device lock is held; + * + */ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) { @@ -1220,11 +1234,10 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, iob = (struct qeth_cmd_buffer *) (addr_t)intparm; } - qeth_unlock_channel(card, channel); - rc = qeth_check_irb_error(card, cdev, irb); if (rc) { /* IO was terminated, free its resources. */ + qeth_unlock_channel(card, channel); if (iob) qeth_cancel_cmd(iob, rc); return; @@ -1268,6 +1281,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, rc = qeth_get_problem(card, cdev, irb); if (rc) { card->read_or_write_problem = 1; + qeth_unlock_channel(card, channel); if (iob) qeth_cancel_cmd(iob, rc); qeth_clear_ipacmd_list(card); @@ -1276,6 +1290,26 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, } } + if (scsw_cmd_is_valid_cc(&irb->scsw) && irb->scsw.cmd.cc == 1 && iob) { + /* channel command hasn't started: retry. + * active_cmd is still set to last iob + */ + QETH_CARD_TEXT(card, 2, "irqcc1"); + rc = ccw_device_start_timeout(cdev, __ccw_from_cmd(iob), + (addr_t)iob, 0, 0, iob->timeout); + if (rc) { + QETH_DBF_MESSAGE(2, + "ccw retry on %x failed, rc = %i\n", + CARD_DEVID(card), rc); + QETH_CARD_TEXT_(card, 2, " err%d", rc); + qeth_unlock_channel(card, channel); + qeth_cancel_cmd(iob, rc); + } + return; + } + + qeth_unlock_channel(card, channel); + if (iob) { /* sanity check: */ if (irb->scsw.cmd.count > iob->length) { From 91b243de910a9ac8476d40238ab3dbfeedd5b7de Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Mar 2024 13:57:32 +0000 Subject: [PATCH 008/553] tcp: properly terminate timers for kernel sockets [ Upstream commit 151c9c724d05d5b0dd8acd3e11cb69ef1f2dbada ] We had various syzbot reports about tcp timers firing after the corresponding netns has been dismantled. Fortunately Josef Bacik could trigger the issue more often, and could test a patch I wrote two years ago. When TCP sockets are closed, we call inet_csk_clear_xmit_timers() to 'stop' the timers. inet_csk_clear_xmit_timers() can be called from any context, including when socket lock is held. This is the reason it uses sk_stop_timer(), aka del_timer(). This means that ongoing timers might finish much later. For user sockets, this is fine because each running timer holds a reference on the socket, and the user socket holds a reference on the netns. For kernel sockets, we risk that the netns is freed before timer can complete, because kernel sockets do not hold reference on the netns. This patch adds inet_csk_clear_xmit_timers_sync() function that using sk_stop_timer_sync() to make sure all timers are terminated before the kernel socket is released. Modules using kernel sockets close them in their netns exit() handler. Also add sock_not_owned_by_me() helper to get LOCKDEP support : inet_csk_clear_xmit_timers_sync() must not be called while socket lock is held. It is very possible we can revert in the future commit 3a58f13a881e ("net: rds: acquire refcount on TCP sockets") which attempted to solve the issue in rds only. (net/smc/af_smc.c and net/mptcp/subflow.c have similar code) We probably can remove the check_net() tests from tcp_out_of_resources() and __tcp_close() in the future. Reported-by: Josef Bacik Closes: https://lore.kernel.org/netdev/20240314210740.GA2823176@perftesting/ Fixes: 26abe14379f8 ("net: Modify sk_alloc to not reference count the netns of kernel sockets.") Fixes: 8a68173691f0 ("net: sk_clone_lock() should only do get_net() if the parent is not a kernel socket") Link: https://lore.kernel.org/bpf/CANn89i+484ffqb93aQm1N-tjxxvb3WDKX0EbD7318RwRgsatjw@mail.gmail.com/ Signed-off-by: Eric Dumazet Tested-by: Josef Bacik Cc: Tetsuo Handa Link: https://lore.kernel.org/r/20240322135732.1535772-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/inet_connection_sock.h | 1 + include/net/sock.h | 7 +++++++ net/ipv4/inet_connection_sock.c | 14 ++++++++++++++ net/ipv4/tcp.c | 2 ++ 4 files changed, 24 insertions(+) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 080968d6e6c5..8132f330306d 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -172,6 +172,7 @@ void inet_csk_init_xmit_timers(struct sock *sk, void (*delack_handler)(struct timer_list *), void (*keepalive_handler)(struct timer_list *)); void inet_csk_clear_xmit_timers(struct sock *sk); +void inet_csk_clear_xmit_timers_sync(struct sock *sk); static inline void inet_csk_schedule_ack(struct sock *sk) { diff --git a/include/net/sock.h b/include/net/sock.h index 579732d47dfc..60577751ea9e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1833,6 +1833,13 @@ static inline void sock_owned_by_me(const struct sock *sk) #endif } +static inline void sock_not_owned_by_me(const struct sock *sk) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks); +#endif +} + static inline bool sock_owned_by_user(const struct sock *sk) { sock_owned_by_me(sk); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 79fa19a36bbd..f7832d425382 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -771,6 +771,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk) } EXPORT_SYMBOL(inet_csk_clear_xmit_timers); +void inet_csk_clear_xmit_timers_sync(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + /* ongoing timer handlers need to acquire socket lock. */ + sock_not_owned_by_me(sk); + + icsk->icsk_pending = icsk->icsk_ack.pending = 0; + + sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer); + sk_stop_timer_sync(sk, &icsk->icsk_delack_timer); + sk_stop_timer_sync(sk, &sk->sk_timer); +} + void inet_csk_delete_keepalive_timer(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5a165e29f7be..f01c0a5d2c37 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3052,6 +3052,8 @@ void tcp_close(struct sock *sk, long timeout) lock_sock(sk); __tcp_close(sk, timeout); release_sock(sk); + if (!sk->sk_net_refcnt) + inet_csk_clear_xmit_timers_sync(sk); sock_put(sk); } EXPORT_SYMBOL(tcp_close); From beaf0e7996b79e06ccc2bdcb4442fbaeccc31200 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 22 Mar 2024 15:40:00 +0100 Subject: [PATCH 009/553] net: wwan: t7xx: Split 64bit accesses to fix alignment issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7d5a7dd5a35876f0ecc286f3602a88887a788217 ] Some of the registers are aligned on a 32bit boundary, causing alignment faults on 64bit platforms. Unable to handle kernel paging request at virtual address ffffffc084a1d004 Mem abort info: ESR = 0x0000000096000061 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x21: alignment fault Data abort info: ISV = 0, ISS = 0x00000061, ISS2 = 0x00000000 CM = 0, WnR = 1, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000046ad6000 [ffffffc084a1d004] pgd=100000013ffff003, p4d=100000013ffff003, pud=100000013ffff003, pmd=0068000020a00711 Internal error: Oops: 0000000096000061 [#1] SMP Modules linked in: mtk_t7xx(+) qcserial pppoe ppp_async option nft_fib_inet nf_flow_table_inet mt7921u(O) mt7921s(O) mt7921e(O) mt7921_common(O) iwlmvm(O) iwldvm(O) usb_wwan rndis_host qmi_wwan pppox ppp_generic nft_reject_ipv6 nft_reject_ipv4 nft_reject_inet nft_reject nft_redir nft_quota nft_numgen nft_nat nft_masq nft_log nft_limit nft_hash nft_flow_offload nft_fib_ipv6 nft_fib_ipv4 nft_fib nft_ct nft_chain_nat nf_tables nf_nat nf_flow_table nf_conntrack mt7996e(O) mt792x_usb(O) mt792x_lib(O) mt7915e(O) mt76_usb(O) mt76_sdio(O) mt76_connac_lib(O) mt76(O) mac80211(O) iwlwifi(O) huawei_cdc_ncm cfg80211(O) cdc_ncm cdc_ether wwan usbserial usbnet slhc sfp rtc_pcf8563 nfnetlink nf_reject_ipv6 nf_reject_ipv4 nf_log_syslog nf_defrag_ipv6 nf_defrag_ipv4 mt6577_auxadc mdio_i2c libcrc32c compat(O) cdc_wdm cdc_acm at24 crypto_safexcel pwm_fan i2c_gpio i2c_smbus industrialio i2c_algo_bit i2c_mux_reg i2c_mux_pca954x i2c_mux_pca9541 i2c_mux_gpio i2c_mux dummy oid_registry tun sha512_arm64 sha1_ce sha1_generic seqiv md5 geniv des_generic libdes cbc authencesn authenc leds_gpio xhci_plat_hcd xhci_pci xhci_mtk_hcd xhci_hcd nvme nvme_core gpio_button_hotplug(O) dm_mirror dm_region_hash dm_log dm_crypt dm_mod dax usbcore usb_common ptp aquantia pps_core mii tpm encrypted_keys trusted CPU: 3 PID: 5266 Comm: kworker/u9:1 Tainted: G O 6.6.22 #0 Hardware name: Bananapi BPI-R4 (DT) Workqueue: md_hk_wq t7xx_fsm_uninit [mtk_t7xx] pstate: 804000c5 (Nzcv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : t7xx_cldma_hw_set_start_addr+0x1c/0x3c [mtk_t7xx] lr : t7xx_cldma_start+0xac/0x13c [mtk_t7xx] sp : ffffffc085d63d30 x29: ffffffc085d63d30 x28: 0000000000000000 x27: 0000000000000000 x26: 0000000000000000 x25: ffffff80c804f2c0 x24: ffffff80ca196c05 x23: 0000000000000000 x22: ffffff80c814b9b8 x21: ffffff80c814b128 x20: 0000000000000001 x19: ffffff80c814b080 x18: 0000000000000014 x17: 0000000055c9806b x16: 000000007c5296d0 x15: 000000000f6bca68 x14: 00000000dbdbdce4 x13: 000000001aeaf72a x12: 0000000000000001 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : ffffff80ca1ef6b4 x7 : ffffff80c814b818 x6 : 0000000000000018 x5 : 0000000000000870 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 000000010a947000 x1 : ffffffc084a1d004 x0 : ffffffc084a1d004 Call trace: t7xx_cldma_hw_set_start_addr+0x1c/0x3c [mtk_t7xx] t7xx_fsm_uninit+0x578/0x5ec [mtk_t7xx] process_one_work+0x154/0x2a0 worker_thread+0x2ac/0x488 kthread+0xe0/0xec ret_from_fork+0x10/0x20 Code: f9400800 91001000 8b214001 d50332bf (f9000022) ---[ end trace 0000000000000000 ]--- The inclusion of io-64-nonatomic-lo-hi.h indicates that all 64bit accesses can be replaced by pairs of nonatomic 32bit access. Fix alignment by forcing all accesses to be 32bit on 64bit platforms. Link: https://forum.openwrt.org/t/fibocom-fm350-gl-support/142682/72 Fixes: 39d439047f1d ("net: wwan: t7xx: Add control DMA interface") Signed-off-by: Bjørn Mork Reviewed-by: Sergey Ryazanov Tested-by: Liviu Dudau Link: https://lore.kernel.org/r/20240322144000.1683822-1-bjorn@mork.no Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/wwan/t7xx/t7xx_cldma.c | 4 ++-- drivers/net/wwan/t7xx/t7xx_hif_cldma.c | 9 +++++---- drivers/net/wwan/t7xx/t7xx_pcie_mac.c | 8 ++++---- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/wwan/t7xx/t7xx_cldma.c b/drivers/net/wwan/t7xx/t7xx_cldma.c index 9f43f256db1d..f0a4783baf1f 100644 --- a/drivers/net/wwan/t7xx/t7xx_cldma.c +++ b/drivers/net/wwan/t7xx/t7xx_cldma.c @@ -106,7 +106,7 @@ bool t7xx_cldma_tx_addr_is_set(struct t7xx_cldma_hw *hw_info, unsigned int qno) { u32 offset = REG_CLDMA_UL_START_ADDRL_0 + qno * ADDR_SIZE; - return ioread64(hw_info->ap_pdn_base + offset); + return ioread64_lo_hi(hw_info->ap_pdn_base + offset); } void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qno, u64 address, @@ -117,7 +117,7 @@ void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qn reg = tx_rx == MTK_RX ? hw_info->ap_ao_base + REG_CLDMA_DL_START_ADDRL_0 : hw_info->ap_pdn_base + REG_CLDMA_UL_START_ADDRL_0; - iowrite64(address, reg + offset); + iowrite64_lo_hi(address, reg + offset); } void t7xx_cldma_hw_resume_queue(struct t7xx_cldma_hw *hw_info, unsigned int qno, diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c index 6ff30cb8eb16..5d6032ceb9e5 100644 --- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c +++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c @@ -139,8 +139,9 @@ static int t7xx_cldma_gpd_rx_from_q(struct cldma_queue *queue, int budget, bool return -ENODEV; } - gpd_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_DL_CURRENT_ADDRL_0 + - queue->index * sizeof(u64)); + gpd_addr = ioread64_lo_hi(hw_info->ap_pdn_base + + REG_CLDMA_DL_CURRENT_ADDRL_0 + + queue->index * sizeof(u64)); if (req->gpd_addr == gpd_addr || hwo_polling_count++ >= 100) return 0; @@ -318,8 +319,8 @@ static void t7xx_cldma_txq_empty_hndl(struct cldma_queue *queue) struct t7xx_cldma_hw *hw_info = &md_ctrl->hw_info; /* Check current processing TGPD, 64-bit address is in a table by Q index */ - ul_curr_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 + - queue->index * sizeof(u64)); + ul_curr_addr = ioread64_lo_hi(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 + + queue->index * sizeof(u64)); if (req->gpd_addr != ul_curr_addr) { spin_unlock_irqrestore(&md_ctrl->cldma_lock, flags); dev_err(md_ctrl->dev, "CLDMA%d queue %d is not empty\n", diff --git a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c index 76da4c15e3de..f071ec7ff23d 100644 --- a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c +++ b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c @@ -75,7 +75,7 @@ static void t7xx_pcie_mac_atr_tables_dis(void __iomem *pbase, enum t7xx_atr_src_ for (i = 0; i < ATR_TABLE_NUM_PER_ATR; i++) { offset = ATR_PORT_OFFSET * port + ATR_TABLE_OFFSET * i; reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset; - iowrite64(0, reg); + iowrite64_lo_hi(0, reg); } } @@ -112,17 +112,17 @@ static int t7xx_pcie_mac_atr_cfg(struct t7xx_pci_dev *t7xx_dev, struct t7xx_atr_ reg = pbase + ATR_PCIE_WIN0_T0_TRSL_ADDR + offset; value = cfg->trsl_addr & ATR_PCIE_WIN0_ADDR_ALGMT; - iowrite64(value, reg); + iowrite64_lo_hi(value, reg); reg = pbase + ATR_PCIE_WIN0_T0_TRSL_PARAM + offset; iowrite32(cfg->trsl_id, reg); reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset; value = (cfg->src_addr & ATR_PCIE_WIN0_ADDR_ALGMT) | (atr_size << 1) | BIT(0); - iowrite64(value, reg); + iowrite64_lo_hi(value, reg); /* Ensure ATR is set */ - ioread64(reg); + ioread64_lo_hi(reg); return 0; } From ebabdae52f2a0ec378baf2c6e3a5f22725ee0b4f Mon Sep 17 00:00:00 2001 From: Nikita Kiryushin Date: Fri, 22 Mar 2024 21:07:53 +0300 Subject: [PATCH 010/553] ACPICA: debugger: check status of acpi_evaluate_object() in acpi_db_walk_for_fields() [ Upstream commit 40e2710860e57411ab57a1529c5a2748abbe8a19 ] ACPICA commit 9061cd9aa131205657c811a52a9f8325a040c6c9 Errors in acpi_evaluate_object() can lead to incorrect state of buffer. This can lead to access to data in previously ACPI_FREEd buffer and secondary ACPI_FREE to the same buffer later. Handle errors in acpi_evaluate_object the same way it is done earlier with acpi_ns_handle_to_pathname. Found by Linux Verification Center (linuxtesting.org) with SVACE. Link: https://github.com/acpica/acpica/commit/9061cd9a Fixes: 5fd033288a86 ("ACPICA: debugger: add command to dump all fields of particular subtype") Signed-off-by: Nikita Kiryushin Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/acpica/dbnames.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c index b91155ea9c34..c9131259f717 100644 --- a/drivers/acpi/acpica/dbnames.c +++ b/drivers/acpi/acpica/dbnames.c @@ -550,8 +550,12 @@ acpi_db_walk_for_fields(acpi_handle obj_handle, ACPI_FREE(buffer.pointer); buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER; - acpi_evaluate_object(obj_handle, NULL, NULL, &buffer); - + status = acpi_evaluate_object(obj_handle, NULL, NULL, &buffer); + if (ACPI_FAILURE(status)) { + acpi_os_printf("Could Not evaluate object %p\n", + obj_handle); + return (AE_OK); + } /* * Since this is a field unit, surround the output in braces */ From b8b533eeee5ba53950d7e1358c7fa1614013cdf6 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Mon, 25 Mar 2024 20:43:09 +0800 Subject: [PATCH 011/553] net: hns3: fix index limit to support all queue stats [ Upstream commit 47e39d213e09c6cae0d6b4d95e454ea404013312 ] Currently, hns hardware supports more than 512 queues and the index limit in hclge_comm_tqps_update_stats is wrong. So this patch removes it. Fixes: 287db5c40d15 ("net: hns3: create new set of common tqp stats APIs for PF and VF reuse") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Reviewed-by: Michal Kubiak Reviewed-by: Kalesh AP Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c index f3c9395d8351..618f66d9586b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c @@ -85,7 +85,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle, hclge_comm_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_TX_STATS, true); - desc.data[0] = cpu_to_le32(tqp->index & 0x1ff); + desc.data[0] = cpu_to_le32(tqp->index); ret = hclge_comm_cmd_send(hw, &desc, 1); if (ret) { dev_err(&hw->cmq.csq.pdev->dev, From 50b69054f455dcdb34bd6b22764c7579b270eef3 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Mon, 25 Mar 2024 20:43:10 +0800 Subject: [PATCH 012/553] net: hns3: fix kernel crash when devlink reload during pf initialization [ Upstream commit 93305b77ffcb042f1538ecc383505e87d95aa05a ] The devlink reload process will access the hardware resources, but the register operation is done before the hardware is initialized. So, processing the devlink reload during initialization may lead to kernel crash. This patch fixes this by taking devl_lock during initialization. Fixes: b741269b2759 ("net: hns3: add support for registering devlink for PF") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 27037ce79590..9db363fbc34f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11604,6 +11604,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) if (ret) goto err_pci_uninit; + devl_lock(hdev->devlink); + /* Firmware command queue initialize */ ret = hclge_comm_cmd_queue_init(hdev->pdev, &hdev->hw.hw); if (ret) @@ -11778,6 +11780,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_task_schedule(hdev, round_jiffies_relative(HZ)); + devl_unlock(hdev->devlink); return 0; err_mdiobus_unreg: @@ -11790,6 +11793,7 @@ err_msi_uninit: err_cmd_uninit: hclge_comm_cmd_uninit(hdev->ae_dev, &hdev->hw.hw); err_devlink_uninit: + devl_unlock(hdev->devlink); hclge_devlink_uninit(hdev); err_pci_uninit: pcim_iounmap(pdev, hdev->hw.hw.io_base); From 0e111ce740fc705a4cde38df41ecc8e61ba0e5ef Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Mon, 25 Mar 2024 20:43:11 +0800 Subject: [PATCH 013/553] net: hns3: mark unexcuted loopback test result as UNEXECUTED [ Upstream commit 5bd088d6c21a45ee70e6116879310e54174d75eb ] Currently, loopback test may be skipped when resetting, but the test result will still show as 'PASS', because the driver doesn't set ETH_TEST_FL_FAILED flag. Fix it by setting the flag and initializating the value to UNEXECUTED. Fixes: 4c8dab1c709c ("net: hns3: reconstruct function hns3_self_test") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Reviewed-by: Michal Kubiak Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../ethernet/hisilicon/hns3/hns3_ethtool.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index e22835ae8a94..9fce976a08f0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -78,6 +78,9 @@ static const struct hns3_stats hns3_rxq_stats[] = { #define HNS3_NIC_LB_TEST_NO_MEM_ERR 1 #define HNS3_NIC_LB_TEST_TX_CNT_ERR 2 #define HNS3_NIC_LB_TEST_RX_CNT_ERR 3 +#define HNS3_NIC_LB_TEST_UNEXECUTED 4 + +static int hns3_get_sset_count(struct net_device *netdev, int stringset); static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en) { @@ -419,18 +422,26 @@ static void hns3_do_external_lb(struct net_device *ndev, static void hns3_self_test(struct net_device *ndev, struct ethtool_test *eth_test, u64 *data) { + int cnt = hns3_get_sset_count(ndev, ETH_SS_TEST); struct hns3_nic_priv *priv = netdev_priv(ndev); struct hnae3_handle *h = priv->ae_handle; int st_param[HNAE3_LOOP_NONE][2]; bool if_running = netif_running(ndev); + int i; + + /* initialize the loopback test result, avoid marking an unexcuted + * loopback test as PASS. + */ + for (i = 0; i < cnt; i++) + data[i] = HNS3_NIC_LB_TEST_UNEXECUTED; if (hns3_nic_resetting(ndev)) { netdev_err(ndev, "dev resetting!"); - return; + goto failure; } if (!(eth_test->flags & ETH_TEST_FL_OFFLINE)) - return; + goto failure; if (netif_msg_ifdown(h)) netdev_info(ndev, "self test start\n"); @@ -452,6 +463,10 @@ static void hns3_self_test(struct net_device *ndev, if (netif_msg_ifdown(h)) netdev_info(ndev, "self test end\n"); + return; + +failure: + eth_test->flags |= ETH_TEST_FL_FAILED; } static void hns3_update_limit_promisc_mode(struct net_device *netdev, From efb4573feaa54bb3cc04db1d2334d18f4a9aa322 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 25 Mar 2024 16:56:45 +0100 Subject: [PATCH 014/553] tls: recv: process_rx_list shouldn't use an offset with kvec [ Upstream commit 7608a971fdeb4c3eefa522d1bfe8d4bc6b2481cc ] Only MSG_PEEK needs to copy from an offset during the final process_rx_list call, because the bytes we copied at the beginning of tls_sw_recvmsg were left on the rx_list. In the KVEC case, we removed data from the rx_list as we were copying it, so there's no need to use an offset, just like in the normal case. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/e5487514f828e0347d2b92ca40002c62b58af73d.1711120964.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index bdb5153f3788..e40f6ed65e6a 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2238,7 +2238,7 @@ recv_end: } /* Drain records from the rx_list & copy if required */ - if (is_peek || is_kvec) + if (is_peek) err = process_rx_list(ctx, msg, &control, copied + peeked, decrypted - peeked, is_peek, NULL); else From f52c8f1210da45d905c9d3b7a3ff4ea02c692fd7 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 25 Mar 2024 16:56:46 +0100 Subject: [PATCH 015/553] tls: adjust recv return with async crypto and failed copy to userspace [ Upstream commit 85eef9a41d019b59be7bc91793f26251909c0710 ] process_rx_list may not copy as many bytes as we want to the userspace buffer, for example in case we hit an EFAULT during the copy. If this happens, we should only count the bytes that were actually copied, which may be 0. Subtracting async_copy_bytes is correct in both peek and !peek cases, because decrypted == async_copy_bytes + peeked for the peek case: peek is always !ZC, and we can go through either the sync or async path. In the async case, we add chunk to both decrypted and async_copy_bytes. In the sync case, we add chunk to both decrypted and peeked. I missed that in commit 6caaf104423d ("tls: fix peeking with sync+async decryption"). Fixes: 4d42cd6bc2ac ("tls: rx: fix return value for async crypto") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1b5a1eaab3c088a9dd5d9f1059ceecd7afe888d1.1711120964.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index e40f6ed65e6a..7166c0606527 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2244,6 +2244,9 @@ recv_end: else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek, NULL); + + /* we could have copied less than we wanted, and possibly nothing */ + decrypted += max(err, 0) - async_copy_bytes; } copied += decrypted; From 30fabe50a7ace3e9d57cf7f9288f33ea408491c8 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 25 Mar 2024 16:56:48 +0100 Subject: [PATCH 016/553] tls: get psock ref after taking rxlock to avoid leak [ Upstream commit 417e91e856099e9b8a42a2520e2255e6afe024be ] At the start of tls_sw_recvmsg, we take a reference on the psock, and then call tls_rx_reader_lock. If that fails, we return directly without releasing the reference. Instead of adding a new label, just take the reference after locking has succeeded, since we don't need it before. Fixes: 4cbc325ed6b4 ("tls: rx: allow only one reader at a time") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/fe2ade22d030051ce4c3638704ed58b67d0df643.1711120964.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 7166c0606527..348abadbc2d8 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2062,10 +2062,10 @@ int tls_sw_recvmsg(struct sock *sk, if (unlikely(flags & MSG_ERRQUEUE)) return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR); - psock = sk_psock_get(sk); err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT); if (err < 0) return err; + psock = sk_psock_get(sk); bpf_strp_enabled = sk_psock_strp_enabled(psock); /* If crypto failed the connection is broken */ From 24444af5ddf729376b90db0f135fa19973cb5dab Mon Sep 17 00:00:00 2001 From: David Thompson Date: Mon, 25 Mar 2024 14:36:27 -0400 Subject: [PATCH 017/553] mlxbf_gige: call request_irq() after NAPI initialized [ Upstream commit f7442a634ac06b953fc1f7418f307b25acd4cfbc ] The mlxbf_gige driver encounters a NULL pointer exception in mlxbf_gige_open() when kdump is enabled. The sequence to reproduce the exception is as follows: a) enable kdump b) trigger kdump via "echo c > /proc/sysrq-trigger" c) kdump kernel executes d) kdump kernel loads mlxbf_gige module e) the mlxbf_gige module runs its open() as the the "oob_net0" interface is brought up f) mlxbf_gige module will experience an exception during its open(), something like: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Mem abort info: ESR = 0x0000000086000004 EC = 0x21: IABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault user pgtable: 4k pages, 48-bit VAs, pgdp=00000000e29a4000 [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 0000000086000004 [#1] SMP CPU: 0 PID: 812 Comm: NetworkManager Tainted: G OE 5.15.0-1035-bluefield #37-Ubuntu Hardware name: https://www.mellanox.com BlueField-3 SmartNIC Main Card/BlueField-3 SmartNIC Main Card, BIOS 4.6.0.13024 Jan 19 2024 pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : 0x0 lr : __napi_poll+0x40/0x230 sp : ffff800008003e00 x29: ffff800008003e00 x28: 0000000000000000 x27: 00000000ffffffff x26: ffff000066027238 x25: ffff00007cedec00 x24: ffff800008003ec8 x23: 000000000000012c x22: ffff800008003eb7 x21: 0000000000000000 x20: 0000000000000001 x19: ffff000066027238 x18: 0000000000000000 x17: ffff578fcb450000 x16: ffffa870b083c7c0 x15: 0000aaab010441d0 x14: 0000000000000001 x13: 00726f7272655f65 x12: 6769675f6662786c x11: 0000000000000000 x10: 0000000000000000 x9 : ffffa870b0842398 x8 : 0000000000000004 x7 : fe5a48b9069706ea x6 : 17fdb11fc84ae0d2 x5 : d94a82549d594f35 x4 : 0000000000000000 x3 : 0000000000400100 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000066027238 Call trace: 0x0 net_rx_action+0x178/0x360 __do_softirq+0x15c/0x428 __irq_exit_rcu+0xac/0xec irq_exit+0x18/0x2c handle_domain_irq+0x6c/0xa0 gic_handle_irq+0xec/0x1b0 call_on_irq_stack+0x20/0x2c do_interrupt_handler+0x5c/0x70 el1_interrupt+0x30/0x50 el1h_64_irq_handler+0x18/0x2c el1h_64_irq+0x7c/0x80 __setup_irq+0x4c0/0x950 request_threaded_irq+0xf4/0x1bc mlxbf_gige_request_irqs+0x68/0x110 [mlxbf_gige] mlxbf_gige_open+0x5c/0x170 [mlxbf_gige] __dev_open+0x100/0x220 __dev_change_flags+0x16c/0x1f0 dev_change_flags+0x2c/0x70 do_setlink+0x220/0xa40 __rtnl_newlink+0x56c/0x8a0 rtnl_newlink+0x58/0x84 rtnetlink_rcv_msg+0x138/0x3c4 netlink_rcv_skb+0x64/0x130 rtnetlink_rcv+0x20/0x30 netlink_unicast+0x2ec/0x360 netlink_sendmsg+0x278/0x490 __sock_sendmsg+0x5c/0x6c ____sys_sendmsg+0x290/0x2d4 ___sys_sendmsg+0x84/0xd0 __sys_sendmsg+0x70/0xd0 __arm64_sys_sendmsg+0x2c/0x40 invoke_syscall+0x78/0x100 el0_svc_common.constprop.0+0x54/0x184 do_el0_svc+0x30/0xac el0_svc+0x48/0x160 el0t_64_sync_handler+0xa4/0x12c el0t_64_sync+0x1a4/0x1a8 Code: bad PC value ---[ end trace 7d1c3f3bf9d81885 ]--- Kernel panic - not syncing: Oops: Fatal exception in interrupt Kernel Offset: 0x2870a7a00000 from 0xffff800008000000 PHYS_OFFSET: 0x80000000 CPU features: 0x0,000005c1,a3332a5a Memory Limit: none ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]--- The exception happens because there is a pending RX interrupt before the call to request_irq(RX IRQ) executes. Then, the RX IRQ handler fires immediately after this request_irq() completes. The RX IRQ handler runs "napi_schedule()" before NAPI is fully initialized via "netif_napi_add()" and "napi_enable()", both which happen later in the open() logic. The logic in mlxbf_gige_open() must fully initialize NAPI before any calls to request_irq() execute. Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver") Signed-off-by: David Thompson Reviewed-by: Asmaa Mnebhi Link: https://lore.kernel.org/r/20240325183627.7641-1-davthompson@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../mellanox/mlxbf_gige/mlxbf_gige_main.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index 113e3d9d3353..65e92541db6e 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -139,13 +139,10 @@ static int mlxbf_gige_open(struct net_device *netdev) control |= MLXBF_GIGE_CONTROL_PORT_EN; writeq(control, priv->base + MLXBF_GIGE_CONTROL); - err = mlxbf_gige_request_irqs(priv); - if (err) - return err; mlxbf_gige_cache_stats(priv); err = mlxbf_gige_clean_port(priv); if (err) - goto free_irqs; + return err; /* Clear driver's valid_polarity to match hardware, * since the above call to clean_port() resets the @@ -166,6 +163,10 @@ static int mlxbf_gige_open(struct net_device *netdev) napi_enable(&priv->napi); netif_start_queue(netdev); + err = mlxbf_gige_request_irqs(priv); + if (err) + goto napi_deinit; + /* Set bits in INT_EN that we care about */ int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR | MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS | @@ -182,14 +183,17 @@ static int mlxbf_gige_open(struct net_device *netdev) return 0; +napi_deinit: + netif_stop_queue(netdev); + napi_disable(&priv->napi); + netif_napi_del(&priv->napi); + mlxbf_gige_rx_deinit(priv); + tx_deinit: mlxbf_gige_tx_deinit(priv); phy_deinit: phy_stop(phydev); - -free_irqs: - mlxbf_gige_free_irqs(priv); return err; } From 98cdac206b112bec63852e94802791e316acc2c1 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Tue, 26 Mar 2024 22:42:45 -0400 Subject: [PATCH 018/553] bpf: Protect against int overflow for stack access size [ Upstream commit ecc6a2101840177e57c925c102d2d29f260d37c8 ] This patch re-introduces protection against the size of access to stack memory being negative; the access size can appear negative as a result of overflowing its signed int representation. This should not actually happen, as there are other protections along the way, but we should protect against it anyway. One code path was missing such protections (fixed in the previous patch in the series), causing out-of-bounds array accesses in check_stack_range_initialized(). This patch causes the verification of a program with such a non-sensical access size to fail. This check used to exist in a more indirect way, but was inadvertendly removed in a833a17aeac7. Fixes: a833a17aeac7 ("bpf: Fix verification of indirect var-off stack access") Reported-by: syzbot+33f4297b5f927648741a@syzkaller.appspotmail.com Reported-by: syzbot+aafd0513053a1cbf52ef@syzkaller.appspotmail.com Closes: https://lore.kernel.org/bpf/CAADnVQLORV5PT0iTAhRER+iLBTkByCYNBYyvBSgjN1T31K+gOw@mail.gmail.com/ Acked-by: Andrii Nakryiko Signed-off-by: Andrei Matei Link: https://lore.kernel.org/r/20240327024245.318299-3-andreimatei1@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1a29ac4db6ea..27cc6e3db5a8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4965,6 +4965,11 @@ static int check_stack_access_within_bounds( err = check_stack_slot_within_bounds(min_off, state, type); if (!err && max_off > 0) err = -EINVAL; /* out of stack access into non-negative offsets */ + if (!err && access_size < 0) + /* access_size should not be negative (or overflow an int); others checks + * along the way should have prevented such an access. + */ + err = -EFAULT; /* invalid negative access size; integer overflow? */ if (err) { if (tnum_is_const(reg->var_off)) { From 852698c9fd5b177202ef6683565460d807f93ca7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 Mar 2024 14:13:24 +0000 Subject: [PATCH 019/553] cifs: Fix duplicate fscache cookie warnings [ Upstream commit 8876a37277cb832e1861c35f8c661825179f73f5 ] fscache emits a lot of duplicate cookie warnings with cifs because the index key for the fscache cookies does not include everything that the cifs_find_inode() function does. The latter is used with iget5_locked() to distinguish between inodes in the local inode cache. Fix this by adding the creation time and file type to the fscache cookie key. Additionally, add a couple of comments to note that if one is changed the other must be also. Signed-off-by: David Howells Fixes: 70431bfd825d ("cifs: Support fscache indexing rewrite") cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/fscache.c | 16 +++++++++++++++- fs/smb/client/inode.c | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c index f64bad513ba6..6df4ab2a6e5d 100644 --- a/fs/smb/client/fscache.c +++ b/fs/smb/client/fscache.c @@ -12,6 +12,16 @@ #include "cifs_fs_sb.h" #include "cifsproto.h" +/* + * Key for fscache inode. [!] Contents must match comparisons in cifs_find_inode(). + */ +struct cifs_fscache_inode_key { + + __le64 uniqueid; /* server inode number */ + __le64 createtime; /* creation time on server */ + u8 type; /* S_IFMT file type */ +} __packed; + static void cifs_fscache_fill_volume_coherency( struct cifs_tcon *tcon, struct cifs_fscache_volume_coherency_data *cd) @@ -97,15 +107,19 @@ void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) void cifs_fscache_get_inode_cookie(struct inode *inode) { struct cifs_fscache_inode_coherency_data cd; + struct cifs_fscache_inode_key key; struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + key.uniqueid = cpu_to_le64(cifsi->uniqueid); + key.createtime = cpu_to_le64(cifsi->createtime); + key.type = (inode->i_mode & S_IFMT) >> 12; cifs_fscache_fill_coherency(&cifsi->netfs.inode, &cd); cifsi->netfs.cache = fscache_acquire_cookie(tcon->fscache, 0, - &cifsi->uniqueid, sizeof(cifsi->uniqueid), + &key, sizeof(key), &cd, sizeof(cd), i_size_read(&cifsi->netfs.inode)); if (cifsi->netfs.cache) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 5343898bac8a..634f28f0d331 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1274,6 +1274,8 @@ cifs_find_inode(struct inode *inode, void *opaque) { struct cifs_fattr *fattr = opaque; + /* [!] The compared values must be the same in struct cifs_fscache_inode_key. */ + /* don't match inode with different uniqueid */ if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) return 0; From 2553bfaa19b4219a8ad0ebbe785bc000cebf8274 Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Tue, 26 Mar 2024 12:28:05 +0530 Subject: [PATCH 020/553] net: lan743x: Add set RFE read fifo threshold for PCI1x1x chips [ Upstream commit e4a58989f5c839316ac63675e8800b9eed7dbe96 ] PCI11x1x Rev B0 devices might drop packets when receiving back to back frames at 2.5G link speed. Change the B0 Rev device's Receive filtering Engine FIFO threshold parameter from its hardware default of 4 to 3 dwords to prevent the problem. Rev C0 and later hardware already defaults to 3 dwords. Fixes: bb4f6bffe33c ("net: lan743x: Add PCI11010 / PCI11414 device IDs") Signed-off-by: Raju Lakkaraju Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240326065805.686128-1-Raju.Lakkaraju@microchip.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/lan743x_main.c | 18 ++++++++++++++++++ drivers/net/ethernet/microchip/lan743x_main.h | 4 ++++ 2 files changed, 22 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index e804613faa1f..d5123e8c4a9f 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -25,6 +25,8 @@ #define PCS_POWER_STATE_DOWN 0x6 #define PCS_POWER_STATE_UP 0x4 +#define RFE_RD_FIFO_TH_3_DWORDS 0x3 + static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter) { u32 chip_rev; @@ -3217,6 +3219,21 @@ static void lan743x_full_cleanup(struct lan743x_adapter *adapter) lan743x_pci_cleanup(adapter); } +static void pci11x1x_set_rfe_rd_fifo_threshold(struct lan743x_adapter *adapter) +{ + u16 rev = adapter->csr.id_rev & ID_REV_CHIP_REV_MASK_; + + if (rev == ID_REV_CHIP_REV_PCI11X1X_B0_) { + u32 misc_ctl; + + misc_ctl = lan743x_csr_read(adapter, MISC_CTL_0); + misc_ctl &= ~MISC_CTL_0_RFE_READ_FIFO_MASK_; + misc_ctl |= FIELD_PREP(MISC_CTL_0_RFE_READ_FIFO_MASK_, + RFE_RD_FIFO_TH_3_DWORDS); + lan743x_csr_write(adapter, MISC_CTL_0, misc_ctl); + } +} + static int lan743x_hardware_init(struct lan743x_adapter *adapter, struct pci_dev *pdev) { @@ -3232,6 +3249,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, pci11x1x_strap_get_status(adapter); spin_lock_init(&adapter->eth_syslock_spinlock); mutex_init(&adapter->sgmii_rw_lock); + pci11x1x_set_rfe_rd_fifo_threshold(adapter); } else { adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS; adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS; diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 67877d3b6dd9..d304be17b9d8 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -26,6 +26,7 @@ #define ID_REV_CHIP_REV_MASK_ (0x0000FFFF) #define ID_REV_CHIP_REV_A0_ (0x00000000) #define ID_REV_CHIP_REV_B0_ (0x00000010) +#define ID_REV_CHIP_REV_PCI11X1X_B0_ (0x000000B0) #define FPGA_REV (0x04) #define FPGA_REV_GET_MINOR_(fpga_rev) (((fpga_rev) >> 8) & 0x000000FF) @@ -311,6 +312,9 @@ #define SGMII_CTL_LINK_STATUS_SOURCE_ BIT(8) #define SGMII_CTL_SGMII_POWER_DN_ BIT(1) +#define MISC_CTL_0 (0x920) +#define MISC_CTL_0_RFE_READ_FIFO_MASK_ GENMASK(6, 4) + /* Vendor Specific SGMII MMD details */ #define SR_VSMMD_PCS_ID1 0x0004 #define SR_VSMMD_PCS_ID2 0x0005 From 06426737653357e65661b6fb039f535c677fa306 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Tue, 26 Mar 2024 10:57:20 +0530 Subject: [PATCH 021/553] Octeontx2-af: fix pause frame configuration in GMP mode [ Upstream commit 40d4b4807cadd83fb3f46cc8cd67a945b5b25461 ] The Octeontx2 MAC block (CGX) has separate data paths (SMU and GMP) for different speeds, allowing for efficient data transfer. The previous patch which added pause frame configuration has a bug due to which pause frame feature is not working in GMP mode. This patch fixes the issue by configurating appropriate registers. Fixes: f7e086e754fe ("octeontx2-af: Pause frame configuration at cgx") Signed-off-by: Hariprasad Kelam Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240326052720.4441-1-hkelam@marvell.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index e6fe599f7bf3..254cad45a555 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -814,6 +814,11 @@ static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id, if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; + cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); + cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; + cfg |= rx_pause ? CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK : 0x0; + cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK; cfg |= rx_pause ? CGX_SMUX_RX_FRM_CTL_CTL_BCK : 0x0; From 7d0567842b78390dd9b60f00f1d8f838d540e325 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Mar 2024 11:18:41 +0100 Subject: [PATCH 022/553] inet: inet_defrag: prevent sk release while still in use [ Upstream commit 18685451fc4e546fc0e718580d32df3c0e5c8272 ] ip_local_out() and other functions can pass skb->sk as function argument. If the skb is a fragment and reassembly happens before such function call returns, the sk must not be released. This affects skb fragments reassembled via netfilter or similar modules, e.g. openvswitch or ct_act.c, when run as part of tx pipeline. Eric Dumazet made an initial analysis of this bug. Quoting Eric: Calling ip_defrag() in output path is also implying skb_orphan(), which is buggy because output path relies on sk not disappearing. A relevant old patch about the issue was : 8282f27449bf ("inet: frag: Always orphan skbs inside ip_defrag()") [..] net/ipv4/ip_output.c depends on skb->sk being set, and probably to an inet socket, not an arbitrary one. If we orphan the packet in ipvlan, then downstream things like FQ packet scheduler will not work properly. We need to change ip_defrag() to only use skb_orphan() when really needed, ie whenever frag_list is going to be used. Eric suggested to stash sk in fragment queue and made an initial patch. However there is a problem with this: If skb is refragmented again right after, ip_do_fragment() will copy head->sk to the new fragments, and sets up destructor to sock_wfree. IOW, we have no choice but to fix up sk_wmem accouting to reflect the fully reassembled skb, else wmem will underflow. This change moves the orphan down into the core, to last possible moment. As ip_defrag_offset is aliased with sk_buff->sk member, we must move the offset into the FRAG_CB, else skb->sk gets clobbered. This allows to delay the orphaning long enough to learn if the skb has to be queued or if the skb is completing the reasm queue. In the former case, things work as before, skb is orphaned. This is safe because skb gets queued/stolen and won't continue past reasm engine. In the latter case, we will steal the skb->sk reference, reattach it to the head skb, and fix up wmem accouting when inet_frag inflates truesize. Fixes: 7026b1ddb6b8 ("netfilter: Pass socket pointer down through okfn().") Diagnosed-by: Eric Dumazet Reported-by: xingwei lee Reported-by: yue sun Reported-by: syzbot+e5167d7144a62715044c@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240326101845.30836-1-fw@strlen.de Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 7 +-- net/ipv4/inet_fragment.c | 70 ++++++++++++++++++++----- net/ipv4/ip_fragment.c | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- 4 files changed, 60 insertions(+), 21 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c30d419ebf54..c4a8520dc748 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -745,8 +745,6 @@ typedef unsigned char *sk_buff_data_t; * @list: queue head * @ll_node: anchor in an llist (eg socket defer_list) * @sk: Socket we are owned by - * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in - * fragmentation management * @dev: Device we arrived on/are leaving by * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here @@ -870,10 +868,7 @@ struct sk_buff { struct llist_node ll_node; }; - union { - struct sock *sk; - int ip_defrag_offset; - }; + struct sock *sk; union { ktime_t tstamp; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index c9f9ac5013a7..834cdc57755f 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -24,6 +24,8 @@ #include #include +#include "../core/sock_destructor.h" + /* Use skb->cb to track consecutive/adjacent fragments coming at * the end of the queue. Nodes in the rb-tree queue will * contain "runs" of one or more adjacent fragments. @@ -39,6 +41,7 @@ struct ipfrag_skb_cb { }; struct sk_buff *next_frag; int frag_run_len; + int ip_defrag_offset; }; #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) @@ -390,12 +393,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, */ if (!last) fragrun_create(q, skb); /* First fragment. */ - else if (last->ip_defrag_offset + last->len < end) { + else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { /* This is the common case: skb goes to the end. */ /* Detect and discard overlaps. */ - if (offset < last->ip_defrag_offset + last->len) + if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) return IPFRAG_OVERLAP; - if (offset == last->ip_defrag_offset + last->len) + if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) fragrun_append_to_last(q, skb); else fragrun_create(q, skb); @@ -412,13 +415,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, parent = *rbn; curr = rb_to_skb(parent); - curr_run_end = curr->ip_defrag_offset + + curr_run_end = FRAG_CB(curr)->ip_defrag_offset + FRAG_CB(curr)->frag_run_len; - if (end <= curr->ip_defrag_offset) + if (end <= FRAG_CB(curr)->ip_defrag_offset) rbn = &parent->rb_left; else if (offset >= curr_run_end) rbn = &parent->rb_right; - else if (offset >= curr->ip_defrag_offset && + else if (offset >= FRAG_CB(curr)->ip_defrag_offset && end <= curr_run_end) return IPFRAG_DUP; else @@ -432,7 +435,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, rb_insert_color(&skb->rbnode, &q->rb_fragments); } - skb->ip_defrag_offset = offset; + FRAG_CB(skb)->ip_defrag_offset = offset; return IPFRAG_OK; } @@ -442,13 +445,28 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, struct sk_buff *parent) { struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); - struct sk_buff **nextp; + void (*destructor)(struct sk_buff *); + unsigned int orig_truesize = 0; + struct sk_buff **nextp = NULL; + struct sock *sk = skb->sk; int delta; + if (sk && is_skb_wmem(skb)) { + /* TX: skb->sk might have been passed as argument to + * dst->output and must remain valid until tx completes. + * + * Move sk to reassembled skb and fix up wmem accounting. + */ + orig_truesize = skb->truesize; + destructor = skb->destructor; + } + if (head != skb) { fp = skb_clone(skb, GFP_ATOMIC); - if (!fp) - return NULL; + if (!fp) { + head = skb; + goto out_restore_sk; + } FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; if (RB_EMPTY_NODE(&skb->rbnode)) FRAG_CB(parent)->next_frag = fp; @@ -457,6 +475,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, &q->rb_fragments); if (q->fragments_tail == skb) q->fragments_tail = fp; + + if (orig_truesize) { + /* prevent skb_morph from releasing sk */ + skb->sk = NULL; + skb->destructor = NULL; + } skb_morph(skb, head); FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; rb_replace_node(&head->rbnode, &skb->rbnode, @@ -464,13 +488,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, consume_skb(head); head = skb; } - WARN_ON(head->ip_defrag_offset != 0); + WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); delta = -head->truesize; /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) - return NULL; + goto out_restore_sk; delta += head->truesize; if (delta) @@ -486,7 +510,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, clone = alloc_skb(0, GFP_ATOMIC); if (!clone) - return NULL; + goto out_restore_sk; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); for (i = 0; i < skb_shinfo(head)->nr_frags; i++) @@ -503,6 +527,21 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, nextp = &skb_shinfo(head)->frag_list; } +out_restore_sk: + if (orig_truesize) { + int ts_delta = head->truesize - orig_truesize; + + /* if this reassembled skb is fragmented later, + * fraglist skbs will get skb->sk assigned from head->sk, + * and each frag skb will be released via sock_wfree. + * + * Update sk_wmem_alloc. + */ + head->sk = sk; + head->destructor = destructor; + refcount_add(ts_delta, &sk->sk_wmem_alloc); + } + return nextp; } EXPORT_SYMBOL(inet_frag_reasm_prepare); @@ -510,6 +549,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, void *reasm_data, bool try_coalesce) { + struct sock *sk = is_skb_wmem(head) ? head->sk : NULL; + const unsigned int head_truesize = head->truesize; struct sk_buff **nextp = reasm_data; struct rb_node *rbn; struct sk_buff *fp; @@ -573,6 +614,9 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, head->prev = NULL; head->tstamp = q->stamp; head->mono_delivery_time = q->mono_delivery_time; + + if (sk) + refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); } EXPORT_SYMBOL(inet_frag_reasm_finish); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fb153569889e..6c309c1ec3b0 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -378,6 +378,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } skb_dst_drop(skb); + skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -480,7 +481,6 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) struct ipq *qp; __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); - skb_orphan(skb); /* Lookup (or create) queue header */ qp = ip_find(net, ip_hdr(skb), user, vif); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 38db0064d661..87a394179092 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -293,6 +293,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, } skb_dst_drop(skb); + skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -468,7 +469,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); - skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); if (fq == NULL) { From 58638e3b48796fae3c5a6a47167c83c471e8a8dd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 28 Mar 2024 15:30:39 +0100 Subject: [PATCH 023/553] dm integrity: fix out-of-range warning [ Upstream commit 8e91c2342351e0f5ef6c0a704384a7f6fc70c3b2 ] Depending on the value of CONFIG_HZ, clang complains about a pointless comparison: drivers/md/dm-integrity.c:4085:12: error: result of comparison of constant 42949672950 with expression of type 'unsigned int' is always false [-Werror,-Wtautological-constant-out-of-range-compare] if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { As the check remains useful for other configurations, shut up the warning by adding a second type cast to uint64_t. Fixes: 468dfca38b1a ("dm integrity: add a bitmap mode") Signed-off-by: Arnd Bergmann Reviewed-by: Mikulas Patocka Reviewed-by: Justin Stitt Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 9c9e2b50c63c..696365f8f3b5 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4167,7 +4167,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) { log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval); } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) { - if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { + if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) { r = -EINVAL; ti->error = "Invalid bitmap_flush_interval argument"; goto bad; From 8d029111b809b59a8ba0510330d497481fc28991 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 25 Mar 2024 13:01:44 +0530 Subject: [PATCH 024/553] x86/cpufeatures: Add new word for scattered features [ Upstream commit 7f274e609f3d5f45c22b1dd59053f6764458b492 ] Add a new word for scattered features because all free bits among the existing Linux-defined auxiliary flags have been exhausted. Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/8380d2a0da469a1f0ad75b8954a79fb689599ff6.1711091584.git.sandipan.das@amd.com Stable-dep-of: 598c2fafc06f ("perf/x86/amd/lbr: Use freeze based on availability") Signed-off-by: Sasha Levin --- arch/x86/include/asm/cpufeature.h | 6 ++++-- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/disabled-features.h | 3 ++- arch/x86/include/asm/required-features.h | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index f835b328ba24..578c3020be7b 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -96,8 +96,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -121,8 +122,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 9a157942ae3d..09cca23f020e 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 21 /* N 32-bit words worth of info */ +#define NCAPINTS 22 /* N 32-bit words worth of info */ #define NBUGINTS 2 /* N 32-bit bug flags */ /* diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 000037078db4..380e963149cc 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -112,6 +112,7 @@ #define DISABLED_MASK18 0 #define DISABLED_MASK19 0 #define DISABLED_MASK20 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define DISABLED_MASK21 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 7ba1726b71c7..e9187ddd3d1f 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -99,6 +99,7 @@ #define REQUIRED_MASK18 0 #define REQUIRED_MASK19 0 #define REQUIRED_MASK20 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define REQUIRED_MASK21 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ From ad141b08d1ce078ad52a00e2eed1589208c54586 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 25 Mar 2024 13:01:45 +0530 Subject: [PATCH 025/553] perf/x86/amd/lbr: Use freeze based on availability [ Upstream commit 598c2fafc06fe5c56a1a415fb7b544b31453d637 ] Currently, the LBR code assumes that LBR Freeze is supported on all processors when X86_FEATURE_AMD_LBR_V2 is available i.e. CPUID leaf 0x80000022[EAX] bit 1 is set. This is incorrect as the availability of the feature is additionally dependent on CPUID leaf 0x80000022[EAX] bit 2 being set, which may not be set for all Zen 4 processors. Define a new feature bit for LBR and PMC freeze and set the freeze enable bit (FLBRI) in DebugCtl (MSR 0x1d9) conditionally. It should still be possible to use LBR without freeze for profile-guided optimization of user programs by using an user-only branch filter during profiling. When the user-only filter is enabled, branches are no longer recorded after the transition to CPL 0 upon PMI arrival. When branch entries are read in the PMI handler, the branch stack does not change. E.g. $ perf record -j any,u -e ex_ret_brn_tkn ./workload Since the feature bit is visible under flags in /proc/cpuinfo, it can be used to determine the feasibility of use-cases which require LBR Freeze to be supported by the hardware such as profile-guided optimization of kernels. Fixes: ca5b7c0d9621 ("perf/x86/amd/lbr: Add LbrExtV2 branch record support") Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/69a453c97cfd11c6f2584b19f937fe6df741510f.1711091584.git.sandipan.das@amd.com Signed-off-by: Sasha Levin --- arch/x86/events/amd/core.c | 4 ++-- arch/x86/events/amd/lbr.c | 16 ++++++++++------ arch/x86/include/asm/cpufeatures.h | 8 ++++++++ arch/x86/kernel/cpu/scattered.c | 1 + 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index fd091b9dd706..3ac069a4559b 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -904,8 +904,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!status) goto done; - /* Read branch records before unfreezing */ - if (status & GLOBAL_STATUS_LBRS_FROZEN) { + /* Read branch records */ + if (x86_pmu.lbr_nr) { amd_pmu_lbr_read(); status &= ~GLOBAL_STATUS_LBRS_FROZEN; } diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c index 38a75216c12c..b8fe74e8e0a6 100644 --- a/arch/x86/events/amd/lbr.c +++ b/arch/x86/events/amd/lbr.c @@ -400,10 +400,12 @@ void amd_pmu_lbr_enable_all(void) wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select); } - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + } - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); } @@ -416,10 +418,12 @@ void amd_pmu_lbr_disable_all(void) return; rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + + if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + } } __init int amd_pmu_lbr_init(void) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 09cca23f020e..1280daa72975 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -432,6 +432,14 @@ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ +/* + * Extended auxiliary flags: Linux defined - for features scattered in various + * CPUID levels like 0x80000022, etc. + * + * Reuse free bits when adding new feature flags! + */ +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ + /* * BUG word(s) */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index fc01f81f6e2a..94e0a42528dc 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -46,6 +46,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, + { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, { 0, 0, 0, 0, 0 } }; From 923579201dece7cb9fc30662b4f6e7d7801042a8 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 5 Mar 2024 18:48:39 +0000 Subject: [PATCH 026/553] KVM: arm64: Fix host-programmed guest events in nVHE commit e89c928bedd77d181edc2df01cb6672184775140 upstream. Programming PMU events in the host that count during guest execution is a feature supported by perf, e.g. perf stat -e cpu_cycles:G ./lkvm run While this works for VHE, the guest/host event bitmaps are not carried through to the hypervisor in the nVHE configuration. Make kvm_pmu_update_vcpu_events() conditional on whether or not _hardware_ supports PMUv3 rather than if the vCPU as vPMU enabled. Cc: stable@vger.kernel.org Fixes: 84d751a019a9 ("KVM: arm64: Pass pmu events to hyp via vcpu") Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240305184840.636212-3-oliver.upton@linux.dev Signed-off-by: Oliver Upton Signed-off-by: Greg Kroah-Hartman --- include/kvm/arm_pmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 96b192139a23..6196b71c5eb5 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -85,7 +85,7 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); */ #define kvm_pmu_update_vcpu_events(vcpu) \ do { \ - if (!has_vhe() && kvm_vcpu_has_pmu(vcpu)) \ + if (!has_vhe() && kvm_arm_support_pmu_v3()) \ vcpu->arch.pmu.events = *kvm_get_pmu_events(); \ } while (0) From 3d61f1704bdf2b4335336590baac0e755b06fd71 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 30 Mar 2024 12:49:02 +0100 Subject: [PATCH 027/553] r8169: fix issue caused by buggy BIOS on certain boards with RTL8168d commit 5d872c9f46bd2ea3524af3c2420a364a13667135 upstream. On some boards with this chip version the BIOS is buggy and misses to reset the PHY page selector. This results in the PHY ID read accessing registers on a different page, returning a more or less random value. Fix this by resetting the page selector first. Fixes: f1e911d5d0df ("r8169: add basic phylib support") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/64f2055e-98b8-45ec-8568-665e3d54d4e6@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 06663c11ca96..85ed9879af6c 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5032,6 +5032,15 @@ static int r8169_mdio_register(struct rtl8169_private *tp) struct mii_bus *new_bus; int ret; + /* On some boards with this chip version the BIOS is buggy and misses + * to reset the PHY page selector. This results in the PHY ID read + * accessing registers on a different page, returning a more or + * less random value. Fix this by resetting the page selector first. + */ + if (tp->mac_version == RTL_GIGA_MAC_VER_25 || + tp->mac_version == RTL_GIGA_MAC_VER_26) + r8169_mdio_write(tp, 0x1f, 0); + new_bus = devm_mdiobus_alloc(&pdev->dev); if (!new_bus) return -ENOMEM; From 5d920886c38209a6b321180b55f722f8d6aa1d32 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 4 Apr 2024 17:16:14 -0700 Subject: [PATCH 028/553] x86/cpufeatures: Add CPUID_LNX_5 to track recently added Linux-defined word commit 8cb4a9a82b21623dbb4b3051dd30d98356cf95bc upstream. Add CPUID_LNX_5 to track cpufeatures' word 21, and add the appropriate compile-time assert in KVM to prevent direct lookups on the features in CPUID_LNX_5. KVM uses X86_FEATURE_* flags to manage guest CPUID, and so must translate features that are scattered by Linux from the Linux-defined bit to the hardware-defined bit, i.e. should never try to directly access scattered features in guest CPUID. Opportunistically add NR_CPUID_WORDS to enum cpuid_leafs, along with a compile-time assert in KVM's CPUID infrastructure to ensure that future additions update cpuid_leafs along with NCAPINTS. No functional change intended. Fixes: 7f274e609f3d ("x86/cpufeatures: Add new word for scattered features") Cc: Sandipan Das Signed-off-by: Sean Christopherson Acked-by: Dave Hansen Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeature.h | 2 ++ arch/x86/kvm/reverse_cpuid.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 578c3020be7b..16051c6f3b13 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -33,6 +33,8 @@ enum cpuid_leafs CPUID_7_EDX, CPUID_8000_001F_EAX, CPUID_8000_0021_EAX, + CPUID_LNX_5, + NR_CPUID_WORDS, }; #define X86_CAP_FMT_NUM "%d:%d" diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 7c8e2b20a13b..d2f6703a2633 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -83,10 +83,12 @@ static const struct cpuid_reg reverse_cpuid[] = { */ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf) { + BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS); BUILD_BUG_ON(x86_leaf == CPUID_LNX_1); BUILD_BUG_ON(x86_leaf == CPUID_LNX_2); BUILD_BUG_ON(x86_leaf == CPUID_LNX_3); BUILD_BUG_ON(x86_leaf == CPUID_LNX_4); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_5); BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); } From 9bf4acc802966f07243777c3018400cc26cccbe4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 14 Mar 2024 09:44:12 +0100 Subject: [PATCH 029/553] Revert "Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT" commit 4790a73ace86f3d165bbedba898e0758e6e1b82d upstream. This reverts commit 7dcd3e014aa7faeeaf4047190b22d8a19a0db696. Qualcomm Bluetooth controllers like WCN6855 do not have persistent storage for the Bluetooth address and must therefore start as unconfigured to allow the user to set a valid address unless one has been provided by the boot firmware in the devicetree. A recent change snuck into v6.8-rc7 and incorrectly started marking the default (non-unique) address as valid. This specifically also breaks the Bluetooth setup for some user of the Lenovo ThinkPad X13s. Note that this is the second time Qualcomm breaks the driver this way and that this was fixed last year by commit 6945795bc81a ("Bluetooth: fix use-bdaddr-property quirk"), which also has some further details. Fixes: 7dcd3e014aa7 ("Bluetooth: hci_qca: Set BDA quirk bit if fwnode exists in DT") Cc: stable@vger.kernel.org # 6.8 Cc: Janaki Ramaiah Thota Signed-off-by: Johan Hovold Reported-by: Clayton Craft Tested-by: Clayton Craft Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_qca.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 2acda547f4f3..63f7f58de49f 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -7,7 +7,6 @@ * * Copyright (C) 2007 Texas Instruments, Inc. * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved. - * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. * * Acknowledgements: * This file is based on hci_ll.c, which was... @@ -1845,17 +1844,7 @@ retry: case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: - - /* Set BDA quirk bit for reading BDA value from fwnode property - * only if that property exist in DT. - */ - if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) { - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); - bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later"); - } else { - bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA"); - } - + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); hci_set_aosp_capable(hdev); ret = qca_read_soc_version(hdev, &ver, soc_type); From 298dc5dd38d2652cd6b362e3295be2d81f244a19 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 20 Mar 2024 08:55:52 +0100 Subject: [PATCH 030/553] arm64: dts: qcom: sc7180-trogdor: mark bluetooth address as broken commit e12e28009e584c8f8363439f6a928ec86278a106 upstream. Several Qualcomm Bluetooth controllers lack persistent storage for the device address and instead one can be provided by the boot firmware using the 'local-bd-address' devicetree property. The Bluetooth bindings clearly states that the address should be specified in little-endian order, but due to a long-standing bug in the Qualcomm driver which reversed the address some boot firmware has been providing the address in big-endian order instead. The boot firmware in SC7180 Trogdor Chromebooks is known to be affected so mark the 'local-bd-address' property as broken to maintain backwards compatibility with older firmware when fixing the underlying driver bug. Note that ChromeOS always updates the kernel and devicetree in lockstep so that there is no need to handle backwards compatibility with older devicetrees. Fixes: 7ec3e67307f8 ("arm64: dts: qcom: sc7180-trogdor: add initial trogdor and lazor dt") Cc: stable@vger.kernel.org # 5.10 Cc: Rob Clark Reviewed-by: Douglas Anderson Signed-off-by: Johan Hovold Acked-by: Bjorn Andersson Reviewed-by: Bjorn Andersson Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi index eae22e6e97c1..f55ce6f2fdc2 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi @@ -923,6 +923,8 @@ ap_spi_fp: &spi10 { vddrf-supply = <&pp1300_l2c>; vddch0-supply = <&pp3300_l10c>; max-speed = <3200000>; + + qcom,local-bd-address-broken; }; }; From a2812ff7ea40b53480835cb22dbcf2b40f19954a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 20 Mar 2024 08:55:54 +0100 Subject: [PATCH 031/553] Bluetooth: qca: fix device-address endianness commit 77f45cca8bc55d00520a192f5a7715133591c83e upstream. The WCN6855 firmware on the Lenovo ThinkPad X13s expects the Bluetooth device address in big-endian order when setting it using the EDL_WRITE_BD_ADDR_OPCODE command. Presumably, this is the case for all non-ROME devices which all use the EDL_WRITE_BD_ADDR_OPCODE command for this (unlike the ROME devices which use a different command and expect the address in little-endian order). Reverse the little-endian address before setting it to make sure that the address can be configured using tools like btmgmt or using the 'local-bd-address' devicetree property. Note that this can potentially break systems with boot firmware which has started relying on the broken behaviour and is incorrectly passing the address via devicetree in big-endian order. The only device affected by this should be the WCN3991 used in some Chromebooks. As ChromeOS updates the kernel and devicetree in lockstep, the new 'qcom,local-bd-address-broken' property can be used to determine if the firmware is buggy so that the underlying driver bug can be fixed without breaking backwards compatibility. Set the HCI_QUIRK_BDADDR_PROPERTY_BROKEN quirk for such platforms so that the address is reversed when parsing the address property. Fixes: 5c0a1001c8be ("Bluetooth: hci_qca: Add helper to set device address") Cc: stable@vger.kernel.org # 5.1 Cc: Balakrishna Godavarthi Cc: Matthias Kaehlcke Tested-by: Nikita Travkin # sc7180 Reviewed-by: Douglas Anderson Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btqca.c | 8 ++++++-- drivers/bluetooth/hci_qca.c | 10 ++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 0211f704a358..5277090c6d6d 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -758,11 +758,15 @@ EXPORT_SYMBOL_GPL(qca_uart_setup); int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) { + bdaddr_t bdaddr_swapped; struct sk_buff *skb; int err; - skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, bdaddr, - HCI_EV_VENDOR, HCI_INIT_TIMEOUT); + baswap(&bdaddr_swapped, bdaddr); + + skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, + &bdaddr_swapped, HCI_EV_VENDOR, + HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); bt_dev_err(hdev, "QCA Change address cmd failed (%d)", err); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 63f7f58de49f..33956ddec933 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -225,6 +225,7 @@ struct qca_serdev { struct qca_power *bt_power; u32 init_speed; u32 oper_speed; + bool bdaddr_property_broken; const char *firmware_name; }; @@ -1787,6 +1788,7 @@ static int qca_setup(struct hci_uart *hu) const char *firmware_name = qca_get_firmware_name(hu); int ret; struct qca_btsoc_version ver; + struct qca_serdev *qcadev; const char *soc_name; ret = qca_check_speeds(hu); @@ -1845,6 +1847,11 @@ retry: case QCA_WCN6855: case QCA_WCN7850: set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + + qcadev = serdev_device_get_drvdata(hu->serdev); + if (qcadev->bdaddr_property_broken) + set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks); + hci_set_aosp_capable(hdev); ret = qca_read_soc_version(hdev, &ver, soc_type); @@ -2212,6 +2219,9 @@ static int qca_serdev_probe(struct serdev_device *serdev) if (!qcadev->oper_speed) BT_DBG("UART will pick default operating speed"); + qcadev->bdaddr_property_broken = device_property_read_bool(&serdev->dev, + "qcom,local-bd-address-broken"); + if (data) qcadev->btsoc_type = data->soc_type; else From 3e773d04aef9c9e0cdf97cfd23bc82c52305e0a7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 20 Mar 2024 08:55:53 +0100 Subject: [PATCH 032/553] Bluetooth: add quirk for broken address properties commit 39646f29b100566451d37abc4cc8cdd583756dfe upstream. Some Bluetooth controllers lack persistent storage for the device address and instead one can be provided by the boot firmware using the 'local-bd-address' devicetree property. The Bluetooth devicetree bindings clearly states that the address should be specified in little-endian order, but due to a long-standing bug in the Qualcomm driver which reversed the address some boot firmware has been providing the address in big-endian order instead. Add a new quirk that can be set on platforms with broken firmware and use it to reverse the address when parsing the property so that the underlying driver bug can be fixed. Fixes: 5c0a1001c8be ("Bluetooth: hci_qca: Add helper to set device address") Cc: stable@vger.kernel.org # 5.1 Reviewed-by: Douglas Anderson Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/hci.h | 9 +++++++++ net/bluetooth/hci_sync.c | 5 ++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index c69e09909449..09bc4bf805c6 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -175,6 +175,15 @@ enum { */ HCI_QUIRK_USE_BDADDR_PROPERTY, + /* When this quirk is set, the Bluetooth Device Address provided by + * the 'local-bd-address' fwnode property is incorrectly specified in + * big-endian order. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BDADDR_PROPERTY_BROKEN, + /* When this quirk is set, the duplicate filtering during * scanning is based on Bluetooth devices addresses. To allow * RSSI based updates, restart scanning if needed. diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 7e64cf880f9f..e24b211b10ff 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -3293,7 +3293,10 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev) if (ret < 0 || !bacmp(&ba, BDADDR_ANY)) return; - bacpy(&hdev->public_addr, &ba); + if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks)) + baswap(&hdev->public_addr, &ba); + else + bacpy(&hdev->public_addr, &ba); } struct hci_init_stage { From 38e3eaa861bde62b92e207b07a9dc595a784191f Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 27 Mar 2024 12:30:30 +0800 Subject: [PATCH 033/553] Bluetooth: hci_event: set the conn encrypted before conn establishes commit c569242cd49287d53b73a94233db40097d838535 upstream. We have a BT headset (Lenovo Thinkplus XT99), the pairing and connecting has no problem, once this headset is paired, bluez will remember this device and will auto re-connect it whenever the device is powered on. The auto re-connecting works well with Windows and Android, but with Linux, it always fails. Through debugging, we found at the rfcomm connection stage, the bluetooth stack reports "Connection refused - security block (0x0003)". For this device, the re-connecting negotiation process is different from other BT headsets, it sends the Link_KEY_REQUEST command before the CONNECT_REQUEST completes, and it doesn't send ENCRYPT_CHANGE command during the negotiation. When the device sends the "connect complete" to hci, the ev->encr_mode is 1. So here in the conn_complete_evt(), if ev->encr_mode is 1, link type is ACL and HCI_CONN_ENCRYPT is not set, we set HCI_CONN_ENCRYPT to this conn, and update conn->enc_key_size accordingly. After this change, this BT headset could re-connect with Linux successfully. This is the btmon log after applying the patch, after receiving the "Connect Complete" with "Encryption: Enabled", will send the command to read encryption key size: > HCI Event: Connect Request (0x04) plen 10 Address: 8C:3C:AA:D8:11:67 (OUI 8C-3C-AA) Class: 0x240404 Major class: Audio/Video (headset, speaker, stereo, video, vcr) Minor class: Wearable Headset Device Rendering (Printing, Speaker) Audio (Speaker, Microphone, Headset) Link type: ACL (0x01) ... > HCI Event: Link Key Request (0x17) plen 6 Address: 8C:3C:AA:D8:11:67 (OUI 8C-3C-AA) < HCI Command: Link Key Request Reply (0x01|0x000b) plen 22 Address: 8C:3C:AA:D8:11:67 (OUI 8C-3C-AA) Link key: ${32-hex-digits-key} ... > HCI Event: Connect Complete (0x03) plen 11 Status: Success (0x00) Handle: 256 Address: 8C:3C:AA:D8:11:67 (OUI 8C-3C-AA) Link type: ACL (0x01) Encryption: Enabled (0x01) < HCI Command: Read Encryption Key... (0x05|0x0008) plen 2 Handle: 256 < ACL Data TX: Handle 256 flags 0x00 dlen 10 L2CAP: Information Request (0x0a) ident 1 len 2 Type: Extended features supported (0x0002) > HCI Event: Command Complete (0x0e) plen 7 Read Encryption Key Size (0x05|0x0008) ncmd 1 Status: Success (0x00) Handle: 256 Key size: 16 Cc: stable@vger.kernel.org Link: https://github.com/bluez/bluez/issues/704 Reviewed-by: Paul Menzel Reviewed-by: Luiz Augusto von Dentz Signed-off-by: Hui Wang Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_event.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b150dee88f35..bc14223f6693 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3234,6 +3234,31 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, if (test_bit(HCI_ENCRYPT, &hdev->flags)) set_bit(HCI_CONN_ENCRYPT, &conn->flags); + /* "Link key request" completed ahead of "connect request" completes */ + if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) && + ev->link_type == ACL_LINK) { + struct link_key *key; + struct hci_cp_read_enc_key_size cp; + + key = hci_find_link_key(hdev, &ev->bdaddr); + if (key) { + set_bit(HCI_CONN_ENCRYPT, &conn->flags); + + if (!(hdev->commands[20] & 0x10)) { + conn->enc_key_size = HCI_LINK_KEY_SIZE; + } else { + cp.handle = cpu_to_le16(conn->handle); + if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, + sizeof(cp), &cp)) { + bt_dev_err(hdev, "sending read key size failed"); + conn->enc_key_size = HCI_LINK_KEY_SIZE; + } + } + + hci_encrypt_cfm(conn, ev->status); + } + } + /* Get remote features */ if (conn->type == ACL_LINK) { struct hci_cp_read_remote_features cp; From 18e189442a5896255e764f8e875c13d16248ef2f Mon Sep 17 00:00:00 2001 From: Bastien Nocera Date: Wed, 27 Mar 2024 15:24:56 +0100 Subject: [PATCH 034/553] Bluetooth: Fix TOCTOU in HCI debugfs implementation commit 7835fcfd132eb88b87e8eb901f88436f63ab60f7 upstream. struct hci_dev members conn_info_max_age, conn_info_min_age, le_conn_max_interval, le_conn_min_interval, le_adv_max_interval, and le_adv_min_interval can be modified from the HCI core code, as well through debugfs. The debugfs implementation, that's only available to privileged users, will check for boundaries, making sure that the minimum value being set is strictly above the maximum value that already exists, and vice-versa. However, as both minimum and maximum values can be changed concurrently to us modifying them, we need to make sure that the value we check is the value we end up using. For example, with ->conn_info_max_age set to 10, conn_info_min_age_set() gets called from vfs handlers to set conn_info_min_age to 8. In conn_info_min_age_set(), this goes through: if (val == 0 || val > hdev->conn_info_max_age) return -EINVAL; Concurrently, conn_info_max_age_set() gets called to set to set the conn_info_max_age to 7: if (val == 0 || val > hdev->conn_info_max_age) return -EINVAL; That check will also pass because we used the old value (10) for conn_info_max_age. After those checks that both passed, the struct hci_dev access is mutex-locked, disabling concurrent access, but that does not matter because the invalid value checks both passed, and we'll end up with conn_info_min_age = 8 and conn_info_max_age = 7 To fix this problem, we need to lock the structure access before so the check and assignment are not interrupted. This fix was originally devised by the BassCheck[1] team, and considered the problem to be an atomicity one. This isn't the case as there aren't any concerns about the variable changing while we check it, but rather after we check it parallel to another change. This patch fixes CVE-2024-24858 and CVE-2024-24857. [1] https://sites.google.com/view/basscheck/ Co-developed-by: Gui-Dong Han <2045gemini@gmail.com> Signed-off-by: Gui-Dong Han <2045gemini@gmail.com> Link: https://lore.kernel.org/linux-bluetooth/20231222161317.6255-1-2045gemini@gmail.com/ Link: https://nvd.nist.gov/vuln/detail/CVE-2024-24858 Link: https://lore.kernel.org/linux-bluetooth/20231222162931.6553-1-2045gemini@gmail.com/ Link: https://lore.kernel.org/linux-bluetooth/20231222162310.6461-1-2045gemini@gmail.com/ Link: https://nvd.nist.gov/vuln/detail/CVE-2024-24857 Fixes: 31ad169148df ("Bluetooth: Add conn info lifetime parameters to debugfs") Fixes: 729a1051da6f ("Bluetooth: Expose default LE advertising interval via debugfs") Fixes: 71c3b60ec6d2 ("Bluetooth: Move BR/EDR debugfs file creation into hci_debugfs.c") Signed-off-by: Bastien Nocera Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_debugfs.c | 64 +++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index 6124b3425f35..c9400a7d93d7 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -217,10 +217,12 @@ static int conn_info_min_age_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val > hdev->conn_info_max_age) - return -EINVAL; - hci_dev_lock(hdev); + if (val == 0 || val > hdev->conn_info_max_age) { + hci_dev_unlock(hdev); + return -EINVAL; + } + hdev->conn_info_min_age = val; hci_dev_unlock(hdev); @@ -245,10 +247,12 @@ static int conn_info_max_age_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val < hdev->conn_info_min_age) - return -EINVAL; - hci_dev_lock(hdev); + if (val == 0 || val < hdev->conn_info_min_age) { + hci_dev_unlock(hdev); + return -EINVAL; + } + hdev->conn_info_max_age = val; hci_dev_unlock(hdev); @@ -566,10 +570,12 @@ static int sniff_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val % 2 || val > hdev->sniff_max_interval) - return -EINVAL; - hci_dev_lock(hdev); + if (val == 0 || val % 2 || val > hdev->sniff_max_interval) { + hci_dev_unlock(hdev); + return -EINVAL; + } + hdev->sniff_min_interval = val; hci_dev_unlock(hdev); @@ -594,10 +600,12 @@ static int sniff_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val % 2 || val < hdev->sniff_min_interval) - return -EINVAL; - hci_dev_lock(hdev); + if (val == 0 || val % 2 || val < hdev->sniff_min_interval) { + hci_dev_unlock(hdev); + return -EINVAL; + } + hdev->sniff_max_interval = val; hci_dev_unlock(hdev); @@ -849,10 +857,12 @@ static int conn_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) - return -EINVAL; - hci_dev_lock(hdev); + if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) { + hci_dev_unlock(hdev); + return -EINVAL; + } + hdev->le_conn_min_interval = val; hci_dev_unlock(hdev); @@ -877,10 +887,12 @@ static int conn_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) - return -EINVAL; - hci_dev_lock(hdev); + if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) { + hci_dev_unlock(hdev); + return -EINVAL; + } + hdev->le_conn_max_interval = val; hci_dev_unlock(hdev); @@ -989,10 +1001,12 @@ static int adv_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) - return -EINVAL; - hci_dev_lock(hdev); + if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) { + hci_dev_unlock(hdev); + return -EINVAL; + } + hdev->le_adv_min_interval = val; hci_dev_unlock(hdev); @@ -1017,10 +1031,12 @@ static int adv_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) - return -EINVAL; - hci_dev_lock(hdev); + if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) { + hci_dev_unlock(hdev); + return -EINVAL; + } + hdev->le_adv_max_interval = val; hci_dev_unlock(hdev); From 7c1250796b6c262b505a46192f4716b8c6a6a8c6 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 27 Mar 2024 13:14:56 +0100 Subject: [PATCH 035/553] xen-netfront: Add missing skb_mark_for_recycle commit 037965402a010898d34f4e35327d22c0a95cd51f upstream. Notice that skb_mark_for_recycle() is introduced later than fixes tag in commit 6a5bcd84e886 ("page_pool: Allow drivers to hint on SKB recycling"). It is believed that fixes tag were missing a call to page_pool_release_page() between v5.9 to v5.14, after which is should have used skb_mark_for_recycle(). Since v6.6 the call page_pool_release_page() were removed (in commit 535b9c61bdef ("net: page_pool: hide page_pool_release_page()") and remaining callers converted (in commit 6bfef2ec0172 ("Merge branch 'net-page_pool-remove-page_pool_release_page'")). This leak became visible in v6.8 via commit dba1b8a7ab68 ("mm/page_pool: catch page_pool memory leaks"). Cc: stable@vger.kernel.org Fixes: 6c5aa6fc4def ("xen networking: add basic XDP support for xen-netfront") Reported-by: Leonidas Spyropoulos Link: https://bugzilla.kernel.org/show_bug.cgi?id=218654 Reported-by: Arthur Borsboom Signed-off-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/171154167446.2671062.9127105384591237363.stgit@firesoul Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index dc404e05970c..95b5ab4b964e 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -285,6 +285,7 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue) return NULL; } skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE); + skb_mark_for_recycle(skb); /* Align ip header to a 16 bytes boundary */ skb_reserve(skb, NET_IP_ALIGN); From cbaac2e5488ed54833897264a5ffb2a341a9f196 Mon Sep 17 00:00:00 2001 From: Mahmoud Adam Date: Tue, 26 Mar 2024 16:31:33 +0100 Subject: [PATCH 036/553] net/rds: fix possible cp null dereference commit 62fc3357e079a07a22465b9b6ef71bb6ea75ee4b upstream. cp might be null, calling cp->cp_conn would produce null dereference [Simon Horman adds:] Analysis: * cp is a parameter of __rds_rdma_map and is not reassigned. * The following call-sites pass a NULL cp argument to __rds_rdma_map() - rds_get_mr() - rds_get_mr_for_dest * Prior to the code above, the following assumes that cp may be NULL (which is indicative, but could itself be unnecessary) trans_private = rs->rs_transport->get_mr( sg, nents, rs, &mr->r_key, cp ? cp->cp_conn : NULL, args->vec.addr, args->vec.bytes, need_odp ? ODP_ZEROBASED : ODP_NOT_NEEDED); * The code modified by this patch is guarded by IS_ERR(trans_private), where trans_private is assigned as per the previous point in this analysis. The only implementation of get_mr that I could locate is rds_ib_get_mr() which can return an ERR_PTR if the conn (4th) argument is NULL. * ret is set to PTR_ERR(trans_private). rds_ib_get_mr can return ERR_PTR(-ENODEV) if the conn (4th) argument is NULL. Thus ret may be -ENODEV in which case the code in question will execute. Conclusion: * cp may be NULL at the point where this patch adds a check; this patch does seem to address a possible bug Fixes: c055fc00c07b ("net/rds: fix WARNING in rds_conn_connect_if_down") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Mahmoud Adam Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240326153132.55580-1-mngyadam@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/rds/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index a4e3c5de998b..00dbcd4d28e6 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -302,7 +302,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, } ret = PTR_ERR(trans_private); /* Trigger connection so that its ready for the next retry */ - if (ret == -ENODEV) + if (ret == -ENODEV && cp) rds_conn_connect_if_down(cp->cp_conn); goto out; } From fc77240f6316d17fc58a8881927c3732b1d75d51 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Wed, 3 Apr 2024 15:21:58 +0200 Subject: [PATCH 037/553] net: usb: ax88179_178a: avoid the interface always configured as random address commit 2e91bb99b9d4f756e92e83c4453f894dda220f09 upstream. After the commit d2689b6a86b9 ("net: usb: ax88179_178a: avoid two consecutive device resets"), reset is not executed from bind operation and mac address is not read from the device registers or the devicetree at that moment. Since the check to configure if the assigned mac address is random or not for the interface, happens after the bind operation from usbnet_probe, the interface keeps configured as random address, although the address is correctly read and set during open operation (the only reset now). In order to keep only one reset for the device and to avoid the interface always configured as random address, after reset, configure correctly the suitable field from the driver, if the mac address is read successfully from the device registers or the devicetree. Take into account if a locally administered address (random) was previously stored. cc: stable@vger.kernel.org # 6.6+ Fixes: d2689b6a86b9 ("net: usb: ax88179_178a: avoid two consecutive device resets") Reported-by: Dave Stevenson Signed-off-by: Jose Ignacio Tornos Martinez Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240403132158.344838-1-jtornosm@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/ax88179_178a.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index d837c1887416..e0e9b4c53cb0 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1273,6 +1273,8 @@ static void ax88179_get_mac_addr(struct usbnet *dev) if (is_valid_ether_addr(mac)) { eth_hw_addr_set(dev->net, mac); + if (!is_local_ether_addr(mac)) + dev->net->addr_assign_type = NET_ADDR_PERM; } else { netdev_info(dev->net, "invalid MAC address, using random\n"); eth_hw_addr_random(dev->net); From db388b8e12aa7c3660617cc5e8beb90f71bba206 Mon Sep 17 00:00:00 2001 From: Marco Pinna Date: Fri, 29 Mar 2024 17:12:59 +0100 Subject: [PATCH 038/553] vsock/virtio: fix packet delivery to tap device commit b32a09ea7c38849ff925489a6bf5bd8914bc45df upstream. Commit 82dfb540aeb2 ("VSOCK: Add virtio vsock vsockmon hooks") added virtio_transport_deliver_tap_pkt() for handing packets to the vsockmon device. However, in virtio_transport_send_pkt_work(), the function is called before actually sending the packet (i.e. before placing it in the virtqueue with virtqueue_add_sgs() and checking whether it returned successfully). Queuing the packet in the virtqueue can fail even multiple times. However, in virtio_transport_deliver_tap_pkt() we deliver the packet to the monitoring tap interface only the first time we call it. This certainly avoids seeing the same packet replicated multiple times in the monitoring interface, but it can show the packet sent with the wrong timestamp or even before we succeed to queue it in the virtqueue. Move virtio_transport_deliver_tap_pkt() after calling virtqueue_add_sgs() and making sure it returned successfully. Fixes: 82dfb540aeb2 ("VSOCK: Add virtio vsock vsockmon hooks") Cc: stable@vge.kernel.org Signed-off-by: Marco Pinna Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20240329161259.411751-1-marco.pinn95@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/virtio_transport.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 16575ea83659..5434c9f11d28 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -109,7 +109,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) if (!skb) break; - virtio_transport_deliver_tap_pkt(skb); reply = virtio_vsock_skb_reply(skb); sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb))); @@ -128,6 +127,8 @@ virtio_transport_send_pkt_work(struct work_struct *work) break; } + virtio_transport_deliver_tap_pkt(skb); + if (reply) { struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; int val; From 1c9e71ca615debed700f665ba434e22dbbfd9cd6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 Mar 2024 11:47:51 +0100 Subject: [PATCH 039/553] Revert "x86/mm/ident_map: Use gbpages only where full GB page should be mapped." commit c567f2948f57bdc03ed03403ae0234085f376b7d upstream. This reverts commit d794734c9bbfe22f86686dc2909c25f5ffe1a572. While the original change tries to fix a bug, it also unintentionally broke existing systems, see the regressions reported at: https://lore.kernel.org/all/3a1b9909-45ac-4f97-ad68-d16ef1ce99db@pavinjoseph.com/ Since d794734c9bbf was also marked for -stable, let's back it out before causing more damage. Note that due to another upstream change the revert was not 100% automatic: 0a845e0f6348 mm/treewide: replace pud_large() with pud_leaf() Signed-off-by: Ingo Molnar Cc: Cc: Russ Anderson Cc: Steve Wahl Cc: Dave Hansen Link: https://lore.kernel.org/all/3a1b9909-45ac-4f97-ad68-d16ef1ce99db@pavinjoseph.com/ Fixes: d794734c9bbf ("x86/mm/ident_map: Use gbpages only where full GB page should be mapped.") Signed-off-by: Steve Wahl Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/ident_map.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index f50cc210a981..968d7005f4a7 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -26,31 +26,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; - bool use_gbpage; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; - /* if this is already a gbpage, this portion is already mapped */ - if (pud_large(*pud)) - continue; - - /* Is using a gbpage allowed? */ - use_gbpage = info->direct_gbpages; - - /* Don't use gbpage if it maps more than the requested region. */ - /* at the begining: */ - use_gbpage &= ((addr & ~PUD_MASK) == 0); - /* ... or at the end: */ - use_gbpage &= ((next & ~PUD_MASK) == 0); - - /* Never overwrite existing mappings */ - use_gbpage &= !pud_present(*pud); - - if (use_gbpage) { + if (info->direct_gbpages) { pud_t pudval; + if (pud_present(*pud)) + continue; + + addr &= PUD_MASK; pudval = __pud((addr - info->offset) | info->page_flag); set_pud(pud, pudval); continue; From 745cf6a843896cdac8766c74379300ed73c78830 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 1 Apr 2024 00:33:02 +0200 Subject: [PATCH 040/553] netfilter: nf_tables: reject new basechain after table flag update commit 994209ddf4f430946f6247616b2e33d179243769 upstream. When dormant flag is toggled, hooks are disabled in the commit phase by iterating over current chains in table (existing and new). The following configuration allows for an inconsistent state: add table x add chain x y { type filter hook input priority 0; } add table x { flags dormant; } add chain x w { type filter hook input priority 1; } which triggers the following warning when trying to unregister chain w which is already unregistered. [ 127.322252] WARNING: CPU: 7 PID: 1211 at net/netfilter/core.c:50 1 __nf_unregister_net_hook+0x21a/0x260 [...] [ 127.322519] Call Trace: [ 127.322521] [ 127.322524] ? __warn+0x9f/0x1a0 [ 127.322531] ? __nf_unregister_net_hook+0x21a/0x260 [ 127.322537] ? report_bug+0x1b1/0x1e0 [ 127.322545] ? handle_bug+0x3c/0x70 [ 127.322552] ? exc_invalid_op+0x17/0x40 [ 127.322556] ? asm_exc_invalid_op+0x1a/0x20 [ 127.322563] ? kasan_save_free_info+0x3b/0x60 [ 127.322570] ? __nf_unregister_net_hook+0x6a/0x260 [ 127.322577] ? __nf_unregister_net_hook+0x21a/0x260 [ 127.322583] ? __nf_unregister_net_hook+0x6a/0x260 [ 127.322590] ? __nf_tables_unregister_hook+0x8a/0xe0 [nf_tables] [ 127.322655] nft_table_disable+0x75/0xf0 [nf_tables] [ 127.322717] nf_tables_commit+0x2571/0x2620 [nf_tables] Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2a5d9075a081..a7f84fe96de8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2372,6 +2372,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_stats __percpu *stats = NULL; struct nft_chain_hook hook; + if (table->flags & __NFT_TABLE_F_UPDATE) + return -EINVAL; + if (flags & NFT_CHAIN_BINDING) return -EOPNOTSUPP; From 4e8447a9a3d367b5065a0b7abe101da6e0037b6e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 Apr 2024 18:04:36 +0200 Subject: [PATCH 041/553] netfilter: nf_tables: flush pending destroy work before exit_net release commit 24cea9677025e0de419989ecb692acd4bb34cac2 upstream. Similar to 2c9f0293280e ("netfilter: nf_tables: flush pending destroy work before netlink notifier") to address a race between exit_net and the destroy workqueue. The trace below shows an element to be released via destroy workqueue while exit_net path (triggered via module removal) has already released the set that is used in such transaction. [ 1360.547789] BUG: KASAN: slab-use-after-free in nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables] [ 1360.547861] Read of size 8 at addr ffff888140500cc0 by task kworker/4:1/152465 [ 1360.547870] CPU: 4 PID: 152465 Comm: kworker/4:1 Not tainted 6.8.0+ #359 [ 1360.547882] Workqueue: events nf_tables_trans_destroy_work [nf_tables] [ 1360.547984] Call Trace: [ 1360.547991] [ 1360.547998] dump_stack_lvl+0x53/0x70 [ 1360.548014] print_report+0xc4/0x610 [ 1360.548026] ? __virt_addr_valid+0xba/0x160 [ 1360.548040] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 1360.548054] ? nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables] [ 1360.548176] kasan_report+0xae/0xe0 [ 1360.548189] ? nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables] [ 1360.548312] nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables] [ 1360.548447] ? __pfx_nf_tables_trans_destroy_work+0x10/0x10 [nf_tables] [ 1360.548577] ? _raw_spin_unlock_irq+0x18/0x30 [ 1360.548591] process_one_work+0x2f1/0x670 [ 1360.548610] worker_thread+0x4d3/0x760 [ 1360.548627] ? __pfx_worker_thread+0x10/0x10 [ 1360.548640] kthread+0x16b/0x1b0 [ 1360.548653] ? __pfx_kthread+0x10/0x10 [ 1360.548665] ret_from_fork+0x2f/0x50 [ 1360.548679] ? __pfx_kthread+0x10/0x10 [ 1360.548690] ret_from_fork_asm+0x1a/0x30 [ 1360.548707] [ 1360.548719] Allocated by task 192061: [ 1360.548726] kasan_save_stack+0x20/0x40 [ 1360.548739] kasan_save_track+0x14/0x30 [ 1360.548750] __kasan_kmalloc+0x8f/0xa0 [ 1360.548760] __kmalloc_node+0x1f1/0x450 [ 1360.548771] nf_tables_newset+0x10c7/0x1b50 [nf_tables] [ 1360.548883] nfnetlink_rcv_batch+0xbc4/0xdc0 [nfnetlink] [ 1360.548909] nfnetlink_rcv+0x1a8/0x1e0 [nfnetlink] [ 1360.548927] netlink_unicast+0x367/0x4f0 [ 1360.548935] netlink_sendmsg+0x34b/0x610 [ 1360.548944] ____sys_sendmsg+0x4d4/0x510 [ 1360.548953] ___sys_sendmsg+0xc9/0x120 [ 1360.548961] __sys_sendmsg+0xbe/0x140 [ 1360.548971] do_syscall_64+0x55/0x120 [ 1360.548982] entry_SYSCALL_64_after_hwframe+0x55/0x5d [ 1360.548994] Freed by task 192222: [ 1360.548999] kasan_save_stack+0x20/0x40 [ 1360.549009] kasan_save_track+0x14/0x30 [ 1360.549019] kasan_save_free_info+0x3b/0x60 [ 1360.549028] poison_slab_object+0x100/0x180 [ 1360.549036] __kasan_slab_free+0x14/0x30 [ 1360.549042] kfree+0xb6/0x260 [ 1360.549049] __nft_release_table+0x473/0x6a0 [nf_tables] [ 1360.549131] nf_tables_exit_net+0x170/0x240 [nf_tables] [ 1360.549221] ops_exit_list+0x50/0xa0 [ 1360.549229] free_exit_list+0x101/0x140 [ 1360.549236] unregister_pernet_operations+0x107/0x160 [ 1360.549245] unregister_pernet_subsys+0x1c/0x30 [ 1360.549254] nf_tables_module_exit+0x43/0x80 [nf_tables] [ 1360.549345] __do_sys_delete_module+0x253/0x370 [ 1360.549352] do_syscall_64+0x55/0x120 [ 1360.549360] entry_SYSCALL_64_after_hwframe+0x55/0x5d (gdb) list *__nft_release_table+0x473 0x1e033 is in __nft_release_table (net/netfilter/nf_tables_api.c:11354). 11349 list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { 11350 list_del(&flowtable->list); 11351 nft_use_dec(&table->use); 11352 nf_tables_flowtable_destroy(flowtable); 11353 } 11354 list_for_each_entry_safe(set, ns, &table->sets, list) { 11355 list_del(&set->list); 11356 nft_use_dec(&table->use); 11357 if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) 11358 nft_map_deactivate(&ctx, set); (gdb) [ 1360.549372] Last potentially related work creation: [ 1360.549376] kasan_save_stack+0x20/0x40 [ 1360.549384] __kasan_record_aux_stack+0x9b/0xb0 [ 1360.549392] __queue_work+0x3fb/0x780 [ 1360.549399] queue_work_on+0x4f/0x60 [ 1360.549407] nft_rhash_remove+0x33b/0x340 [nf_tables] [ 1360.549516] nf_tables_commit+0x1c6a/0x2620 [nf_tables] [ 1360.549625] nfnetlink_rcv_batch+0x728/0xdc0 [nfnetlink] [ 1360.549647] nfnetlink_rcv+0x1a8/0x1e0 [nfnetlink] [ 1360.549671] netlink_unicast+0x367/0x4f0 [ 1360.549680] netlink_sendmsg+0x34b/0x610 [ 1360.549690] ____sys_sendmsg+0x4d4/0x510 [ 1360.549697] ___sys_sendmsg+0xc9/0x120 [ 1360.549706] __sys_sendmsg+0xbe/0x140 [ 1360.549715] do_syscall_64+0x55/0x120 [ 1360.549725] entry_SYSCALL_64_after_hwframe+0x55/0x5d Fixes: 0935d5588400 ("netfilter: nf_tables: asynchronous release") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a7f84fe96de8..6483186a48f6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10981,6 +10981,7 @@ static void __exit nf_tables_module_exit(void) unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); nft_chain_route_fini(); + nf_tables_trans_destroy_flush_work(); unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_gc_work); cancel_work_sync(&trans_destroy_work); From 9b5b7708ec2be21dd7ef8ca0e3abe4ae9f3b083b Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Wed, 3 Apr 2024 15:22:04 +0800 Subject: [PATCH 042/553] netfilter: nf_tables: Fix potential data-race in __nft_flowtable_type_get() commit 24225011d81b471acc0e1e315b7d9905459a6304 upstream. nft_unregister_flowtable_type() within nf_flow_inet_module_exit() can concurrent with __nft_flowtable_type_get() within nf_tables_newflowtable(). And thhere is not any protection when iterate over nf_tables_flowtables list in __nft_flowtable_type_get(). Therefore, there is pertential data-race of nf_tables_flowtables list entry. Use list_for_each_entry_rcu() to iterate over nf_tables_flowtables list in __nft_flowtable_type_get(), and use rcu_read_lock() in the caller nft_flowtable_type_get() to protect the entire type query process. Fixes: 3b49e2e94e6e ("netfilter: nf_tables: add flow table netlink frontend") Signed-off-by: Ziyang Xuan Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6483186a48f6..8d38cd504769 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7841,11 +7841,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx, return err; } +/* call under rcu_read_lock */ static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family) { const struct nf_flowtable_type *type; - list_for_each_entry(type, &nf_tables_flowtables, list) { + list_for_each_entry_rcu(type, &nf_tables_flowtables, list) { if (family == type->family) return type; } @@ -7857,9 +7858,13 @@ nft_flowtable_type_get(struct net *net, u8 family) { const struct nf_flowtable_type *type; + rcu_read_lock(); type = __nft_flowtable_type_get(family); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES From 18aae2cb87e5faa9c5bd865260ceadac60d5a6c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Apr 2024 12:20:51 +0000 Subject: [PATCH 043/553] netfilter: validate user input for expected length commit 0c83842df40f86e529db6842231154772c20edcc upstream. I got multiple syzbot reports showing old bugs exposed by BPF after commit 20f2505fb436 ("bpf: Try to avoid kzalloc in cgroup/{s,g}etsockopt") setsockopt() @optlen argument should be taken into account before copying data. BUG: KASAN: slab-out-of-bounds in copy_from_sockptr_offset include/linux/sockptr.h:49 [inline] BUG: KASAN: slab-out-of-bounds in copy_from_sockptr include/linux/sockptr.h:55 [inline] BUG: KASAN: slab-out-of-bounds in do_replace net/ipv4/netfilter/ip_tables.c:1111 [inline] BUG: KASAN: slab-out-of-bounds in do_ipt_set_ctl+0x902/0x3dd0 net/ipv4/netfilter/ip_tables.c:1627 Read of size 96 at addr ffff88802cd73da0 by task syz-executor.4/7238 CPU: 1 PID: 7238 Comm: syz-executor.4 Not tainted 6.9.0-rc2-next-20240403-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 print_address_description mm/kasan/report.c:377 [inline] print_report+0x169/0x550 mm/kasan/report.c:488 kasan_report+0x143/0x180 mm/kasan/report.c:601 kasan_check_range+0x282/0x290 mm/kasan/generic.c:189 __asan_memcpy+0x29/0x70 mm/kasan/shadow.c:105 copy_from_sockptr_offset include/linux/sockptr.h:49 [inline] copy_from_sockptr include/linux/sockptr.h:55 [inline] do_replace net/ipv4/netfilter/ip_tables.c:1111 [inline] do_ipt_set_ctl+0x902/0x3dd0 net/ipv4/netfilter/ip_tables.c:1627 nf_setsockopt+0x295/0x2c0 net/netfilter/nf_sockopt.c:101 do_sock_setsockopt+0x3af/0x720 net/socket.c:2311 __sys_setsockopt+0x1ae/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340 do_syscall_64+0xfb/0x240 entry_SYSCALL_64_after_hwframe+0x72/0x7a RIP: 0033:0x7fd22067dde9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fd21f9ff0c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 00007fd2207abf80 RCX: 00007fd22067dde9 RDX: 0000000000000040 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00007fd2206ca47a R08: 0000000000000001 R09: 0000000000000000 R10: 0000000020000880 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007fd2207abf80 R15: 00007ffd2d0170d8 Allocated by task 7238: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:370 [inline] __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:387 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slub.c:4069 [inline] __kmalloc_noprof+0x200/0x410 mm/slub.c:4082 kmalloc_noprof include/linux/slab.h:664 [inline] __cgroup_bpf_run_filter_setsockopt+0xd47/0x1050 kernel/bpf/cgroup.c:1869 do_sock_setsockopt+0x6b4/0x720 net/socket.c:2293 __sys_setsockopt+0x1ae/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340 do_syscall_64+0xfb/0x240 entry_SYSCALL_64_after_hwframe+0x72/0x7a The buggy address belongs to the object at ffff88802cd73da0 which belongs to the cache kmalloc-8 of size 8 The buggy address is located 0 bytes inside of allocated 1-byte region [ffff88802cd73da0, ffff88802cd73da1) The buggy address belongs to the physical page: page: refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88802cd73020 pfn:0x2cd73 flags: 0xfff80000000000(node=0|zone=1|lastcpupid=0xfff) page_type: 0xffffefff(slab) raw: 00fff80000000000 ffff888015041280 dead000000000100 dead000000000122 raw: ffff88802cd73020 000000008080007f 00000001ffffefff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x12cc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY), pid 5103, tgid 2119833701 (syz-executor.4), ts 5103, free_ts 70804600828 set_page_owner include/linux/page_owner.h:32 [inline] post_alloc_hook+0x1f3/0x230 mm/page_alloc.c:1490 prep_new_page mm/page_alloc.c:1498 [inline] get_page_from_freelist+0x2e7e/0x2f40 mm/page_alloc.c:3454 __alloc_pages_noprof+0x256/0x6c0 mm/page_alloc.c:4712 __alloc_pages_node_noprof include/linux/gfp.h:244 [inline] alloc_pages_node_noprof include/linux/gfp.h:271 [inline] alloc_slab_page+0x5f/0x120 mm/slub.c:2249 allocate_slab+0x5a/0x2e0 mm/slub.c:2412 new_slab mm/slub.c:2465 [inline] ___slab_alloc+0xcd1/0x14b0 mm/slub.c:3615 __slab_alloc+0x58/0xa0 mm/slub.c:3705 __slab_alloc_node mm/slub.c:3758 [inline] slab_alloc_node mm/slub.c:3936 [inline] __do_kmalloc_node mm/slub.c:4068 [inline] kmalloc_node_track_caller_noprof+0x286/0x450 mm/slub.c:4089 kstrdup+0x3a/0x80 mm/util.c:62 device_rename+0xb5/0x1b0 drivers/base/core.c:4558 dev_change_name+0x275/0x860 net/core/dev.c:1232 do_setlink+0xa4b/0x41f0 net/core/rtnetlink.c:2864 __rtnl_newlink net/core/rtnetlink.c:3680 [inline] rtnl_newlink+0x180b/0x20a0 net/core/rtnetlink.c:3727 rtnetlink_rcv_msg+0x89b/0x10d0 net/core/rtnetlink.c:6594 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2559 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1361 page last free pid 5146 tgid 5146 stack trace: reset_page_owner include/linux/page_owner.h:25 [inline] free_pages_prepare mm/page_alloc.c:1110 [inline] free_unref_page+0xd3c/0xec0 mm/page_alloc.c:2617 discard_slab mm/slub.c:2511 [inline] __put_partials+0xeb/0x130 mm/slub.c:2980 put_cpu_partial+0x17c/0x250 mm/slub.c:3055 __slab_free+0x2ea/0x3d0 mm/slub.c:4254 qlink_free mm/kasan/quarantine.c:163 [inline] qlist_free_all+0x9e/0x140 mm/kasan/quarantine.c:179 kasan_quarantine_reduce+0x14f/0x170 mm/kasan/quarantine.c:286 __kasan_slab_alloc+0x23/0x80 mm/kasan/common.c:322 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook mm/slub.c:3888 [inline] slab_alloc_node mm/slub.c:3948 [inline] __do_kmalloc_node mm/slub.c:4068 [inline] __kmalloc_node_noprof+0x1d7/0x450 mm/slub.c:4076 kmalloc_node_noprof include/linux/slab.h:681 [inline] kvmalloc_node_noprof+0x72/0x190 mm/util.c:634 bucket_table_alloc lib/rhashtable.c:186 [inline] rhashtable_rehash_alloc+0x9e/0x290 lib/rhashtable.c:367 rht_deferred_worker+0x4e1/0x2440 lib/rhashtable.c:427 process_one_work kernel/workqueue.c:3218 [inline] process_scheduled_works+0xa2c/0x1830 kernel/workqueue.c:3299 worker_thread+0x86d/0xd70 kernel/workqueue.c:3380 kthread+0x2f0/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:243 Memory state around the buggy address: ffff88802cd73c80: 07 fc fc fc 05 fc fc fc 05 fc fc fc fa fc fc fc ffff88802cd73d00: fa fc fc fc fa fc fc fc fa fc fc fc fa fc fc fc >ffff88802cd73d80: fa fc fc fc 01 fc fc fc fa fc fc fc fa fc fc fc ^ ffff88802cd73e00: fa fc fc fc fa fc fc fc 05 fc fc fc 07 fc fc fc ffff88802cd73e80: 07 fc fc fc 07 fc fc fc 07 fc fc fc 07 fc fc fc Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Pablo Neira Ayuso Link: https://lore.kernel.org/r/20240404122051.2303764-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/bridge/netfilter/ebtables.c | 6 ++++++ net/ipv4/netfilter/arp_tables.c | 4 ++++ net/ipv4/netfilter/ip_tables.c | 4 ++++ net/ipv6/netfilter/ip6_tables.c | 4 ++++ 4 files changed, 18 insertions(+) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index aa23479b20b2..ed62c1026fe9 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1111,6 +1111,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) struct ebt_table_info *newinfo; struct ebt_replace tmp; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1423,6 +1425,8 @@ static int update_counters(struct net *net, sockptr_t arg, unsigned int len) { struct ebt_replace hlp; + if (len < sizeof(hlp)) + return -EINVAL; if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) return -EFAULT; @@ -2352,6 +2356,8 @@ static int compat_update_counters(struct net *net, sockptr_t arg, { struct compat_ebt_replace hlp; + if (len < sizeof(hlp)) + return -EINVAL; if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) return -EFAULT; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 2407066b0fec..b150c9929b12 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -956,6 +956,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct arpt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1254,6 +1256,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct arpt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index da5998011ab9..1f365e28e316 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1110,6 +1110,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ipt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1494,6 +1496,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ipt_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 0ce0ed17c758..37a2b3301e42 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1127,6 +1127,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ip6t_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; @@ -1503,6 +1505,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) void *loc_cpu_entry; struct ip6t_entry *iter; + if (len < sizeof(tmp)) + return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; From 8a57544e9285a16dce8e58f470a1b30f426812c4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 1 Nov 2023 11:49:48 +0100 Subject: [PATCH 044/553] vboxsf: Avoid an spurious warning if load_nls_xxx() fails commit de3f64b738af57e2732b91a0774facc675b75b54 upstream. If an load_nls_xxx() function fails a few lines above, the 'sbi->bdi_id' is still 0. So, in the error handling path, we will call ida_simple_remove(..., 0) which is not allocated yet. In order to prevent a spurious "ida_free called for id=0 which is not allocated." message, tweak the error handling path and add a new label. Fixes: 0fd169576648 ("fs: Add VirtualBox guest shared folder (vboxsf) support") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/d09eaaa4e2e08206c58a1a27ca9b3e81dc168773.1698835730.git.christophe.jaillet@wanadoo.fr Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- fs/vboxsf/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index d2f6df69f611..74952e58cca0 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -151,7 +151,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) if (!sbi->nls) { vbg_err("vboxsf: Count not load '%s' nls\n", nls_name); err = -EINVAL; - goto fail_free; + goto fail_destroy_idr; } } @@ -224,6 +224,7 @@ fail_free: ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id); if (sbi->nls) unload_nls(sbi->nls); +fail_destroy_idr: idr_destroy(&sbi->ino_idr); kfree(sbi); return err; From a44770fed86515eedb5a7c00b787f847ebb134a5 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 2 Apr 2024 12:46:21 +0200 Subject: [PATCH 045/553] bpf, sockmap: Prevent lock inversion deadlock in map delete elem commit ff91059932401894e6c86341915615c5eb0eca48 upstream. syzkaller started using corpuses where a BPF tracing program deletes elements from a sockmap/sockhash map. Because BPF tracing programs can be invoked from any interrupt context, locks taken during a map_delete_elem operation must be hardirq-safe. Otherwise a deadlock due to lock inversion is possible, as reported by lockdep: CPU0 CPU1 ---- ---- lock(&htab->buckets[i].lock); local_irq_disable(); lock(&host->lock); lock(&htab->buckets[i].lock); lock(&host->lock); Locks in sockmap are hardirq-unsafe by design. We expects elements to be deleted from sockmap/sockhash only in task (normal) context with interrupts enabled, or in softirq context. Detect when map_delete_elem operation is invoked from a context which is _not_ hardirq-unsafe, that is interrupts are disabled, and bail out with an error. Note that map updates are not affected by this issue. BPF verifier does not allow updating sockmap/sockhash from a BPF tracing program today. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Reported-by: xingwei lee Reported-by: yue sun Reported-by: syzbot+bc922f476bd65abbd466@syzkaller.appspotmail.com Reported-by: syzbot+d4066896495db380182e@syzkaller.appspotmail.com Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Tested-by: syzbot+d4066896495db380182e@syzkaller.appspotmail.com Acked-by: John Fastabend Closes: https://syzkaller.appspot.com/bug?extid=d4066896495db380182e Closes: https://syzkaller.appspot.com/bug?extid=bc922f476bd65abbd466 Link: https://lore.kernel.org/bpf/20240402104621.1050319-1-jakub@cloudflare.com Signed-off-by: Greg Kroah-Hartman --- net/core/sock_map.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 91140bc0541f..aa7ff6a46429 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -413,6 +413,9 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, struct sock *sk; int err = 0; + if (irqs_disabled()) + return -EOPNOTSUPP; /* locks here are hardirq-unsafe */ + raw_spin_lock_bh(&stab->lock); sk = *psk; if (!sk_test || sk_test == sk) @@ -926,6 +929,9 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key) struct bpf_shtab_elem *elem; int ret = -ENOENT; + if (irqs_disabled()) + return -EOPNOTSUPP; /* locks here are hardirq-unsafe */ + hash = sock_hash_bucket_hash(key, key_size); bucket = sock_hash_select_bucket(htab, hash); From 55d3fe7b2b7bc354e7cbc1f7b8f98a29ccd5a366 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 3 Apr 2024 13:09:08 +0000 Subject: [PATCH 046/553] net/sched: act_skbmod: prevent kernel-infoleak commit d313eb8b77557a6d5855f42d2234bd592c7b50dd upstream. syzbot found that tcf_skbmod_dump() was copying four bytes from kernel stack to user space [1]. The issue here is that 'struct tc_skbmod' has a four bytes hole. We need to clear the structure before filling fields. [1] BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:114 [inline] BUG: KMSAN: kernel-infoleak in copy_to_user_iter lib/iov_iter.c:24 [inline] BUG: KMSAN: kernel-infoleak in iterate_ubuf include/linux/iov_iter.h:29 [inline] BUG: KMSAN: kernel-infoleak in iterate_and_advance2 include/linux/iov_iter.h:245 [inline] BUG: KMSAN: kernel-infoleak in iterate_and_advance include/linux/iov_iter.h:271 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x366/0x2520 lib/iov_iter.c:185 instrument_copy_to_user include/linux/instrumented.h:114 [inline] copy_to_user_iter lib/iov_iter.c:24 [inline] iterate_ubuf include/linux/iov_iter.h:29 [inline] iterate_and_advance2 include/linux/iov_iter.h:245 [inline] iterate_and_advance include/linux/iov_iter.h:271 [inline] _copy_to_iter+0x366/0x2520 lib/iov_iter.c:185 copy_to_iter include/linux/uio.h:196 [inline] simple_copy_to_iter net/core/datagram.c:532 [inline] __skb_datagram_iter+0x185/0x1000 net/core/datagram.c:420 skb_copy_datagram_iter+0x5c/0x200 net/core/datagram.c:546 skb_copy_datagram_msg include/linux/skbuff.h:4050 [inline] netlink_recvmsg+0x432/0x1610 net/netlink/af_netlink.c:1962 sock_recvmsg_nosec net/socket.c:1046 [inline] sock_recvmsg+0x2c4/0x340 net/socket.c:1068 __sys_recvfrom+0x35a/0x5f0 net/socket.c:2242 __do_sys_recvfrom net/socket.c:2260 [inline] __se_sys_recvfrom net/socket.c:2256 [inline] __x64_sys_recvfrom+0x126/0x1d0 net/socket.c:2256 do_syscall_64+0xd5/0x1f0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 Uninit was stored to memory at: pskb_expand_head+0x30f/0x19d0 net/core/skbuff.c:2253 netlink_trim+0x2c2/0x330 net/netlink/af_netlink.c:1317 netlink_unicast+0x9f/0x1260 net/netlink/af_netlink.c:1351 nlmsg_unicast include/net/netlink.h:1144 [inline] nlmsg_notify+0x21d/0x2f0 net/netlink/af_netlink.c:2610 rtnetlink_send+0x73/0x90 net/core/rtnetlink.c:741 rtnetlink_maybe_send include/linux/rtnetlink.h:17 [inline] tcf_add_notify net/sched/act_api.c:2048 [inline] tcf_action_add net/sched/act_api.c:2071 [inline] tc_ctl_action+0x146e/0x19d0 net/sched/act_api.c:2119 rtnetlink_rcv_msg+0x1737/0x1900 net/core/rtnetlink.c:6595 netlink_rcv_skb+0x375/0x650 net/netlink/af_netlink.c:2559 rtnetlink_rcv+0x34/0x40 net/core/rtnetlink.c:6613 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0xf4c/0x1260 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x10df/0x11f0 net/netlink/af_netlink.c:1905 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:745 ____sys_sendmsg+0x877/0xb60 net/socket.c:2584 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2638 __sys_sendmsg net/socket.c:2667 [inline] __do_sys_sendmsg net/socket.c:2676 [inline] __se_sys_sendmsg net/socket.c:2674 [inline] __x64_sys_sendmsg+0x307/0x4a0 net/socket.c:2674 do_syscall_64+0xd5/0x1f0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 Uninit was stored to memory at: __nla_put lib/nlattr.c:1041 [inline] nla_put+0x1c6/0x230 lib/nlattr.c:1099 tcf_skbmod_dump+0x23f/0xc20 net/sched/act_skbmod.c:256 tcf_action_dump_old net/sched/act_api.c:1191 [inline] tcf_action_dump_1+0x85e/0x970 net/sched/act_api.c:1227 tcf_action_dump+0x1fd/0x460 net/sched/act_api.c:1251 tca_get_fill+0x519/0x7a0 net/sched/act_api.c:1628 tcf_add_notify_msg net/sched/act_api.c:2023 [inline] tcf_add_notify net/sched/act_api.c:2042 [inline] tcf_action_add net/sched/act_api.c:2071 [inline] tc_ctl_action+0x1365/0x19d0 net/sched/act_api.c:2119 rtnetlink_rcv_msg+0x1737/0x1900 net/core/rtnetlink.c:6595 netlink_rcv_skb+0x375/0x650 net/netlink/af_netlink.c:2559 rtnetlink_rcv+0x34/0x40 net/core/rtnetlink.c:6613 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0xf4c/0x1260 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x10df/0x11f0 net/netlink/af_netlink.c:1905 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:745 ____sys_sendmsg+0x877/0xb60 net/socket.c:2584 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2638 __sys_sendmsg net/socket.c:2667 [inline] __do_sys_sendmsg net/socket.c:2676 [inline] __se_sys_sendmsg net/socket.c:2674 [inline] __x64_sys_sendmsg+0x307/0x4a0 net/socket.c:2674 do_syscall_64+0xd5/0x1f0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 Local variable opt created at: tcf_skbmod_dump+0x9d/0xc20 net/sched/act_skbmod.c:244 tcf_action_dump_old net/sched/act_api.c:1191 [inline] tcf_action_dump_1+0x85e/0x970 net/sched/act_api.c:1227 Bytes 188-191 of 248 are uninitialized Memory access of size 248 starts at ffff888117697680 Data copied to user address 00007ffe56d855f0 Fixes: 86da71b57383 ("net_sched: Introduce skbmod action") Signed-off-by: Eric Dumazet Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20240403130908.93421-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sched/act_skbmod.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index d98758a63934..744ff9729469 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -239,13 +239,13 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, struct tcf_skbmod *d = to_skbmod(a); unsigned char *b = skb_tail_pointer(skb); struct tcf_skbmod_params *p; - struct tc_skbmod opt = { - .index = d->tcf_index, - .refcnt = refcount_read(&d->tcf_refcnt) - ref, - .bindcnt = atomic_read(&d->tcf_bindcnt) - bind, - }; + struct tc_skbmod opt; struct tcf_t t; + memset(&opt, 0, sizeof(opt)); + opt.index = d->tcf_index; + opt.refcnt = refcount_read(&d->tcf_refcnt) - ref, + opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind; spin_lock_bh(&d->tcf_lock); opt.action = d->tcf_action; p = rcu_dereference_protected(d->skbmod_p, From b7d1ce2cc7192e8a037faa3f5d3ba72c25976460 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Apr 2024 13:41:33 +0000 Subject: [PATCH 047/553] net/sched: fix lockdep splat in qdisc_tree_reduce_backlog() commit 7eb322360b0266481e560d1807ee79e0cef5742b upstream. qdisc_tree_reduce_backlog() is called with the qdisc lock held, not RTNL. We must use qdisc_lookup_rcu() instead of qdisc_lookup() syzbot reported: WARNING: suspicious RCU usage 6.1.74-syzkaller #0 Not tainted ----------------------------- net/sched/sch_api.c:305 suspicious rcu_dereference_protected() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 3 locks held by udevd/1142: #0: ffffffff87c729a0 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:306 [inline] #0: ffffffff87c729a0 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:747 [inline] #0: ffffffff87c729a0 (rcu_read_lock){....}-{1:2}, at: net_tx_action+0x64a/0x970 net/core/dev.c:5282 #1: ffff888171861108 (&sch->q.lock){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:350 [inline] #1: ffff888171861108 (&sch->q.lock){+.-.}-{2:2}, at: net_tx_action+0x754/0x970 net/core/dev.c:5297 #2: ffffffff87c729a0 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:306 [inline] #2: ffffffff87c729a0 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:747 [inline] #2: ffffffff87c729a0 (rcu_read_lock){....}-{1:2}, at: qdisc_tree_reduce_backlog+0x84/0x580 net/sched/sch_api.c:792 stack backtrace: CPU: 1 PID: 1142 Comm: udevd Not tainted 6.1.74-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call Trace: [] __dump_stack lib/dump_stack.c:88 [inline] [] dump_stack_lvl+0x1b1/0x28f lib/dump_stack.c:106 [] dump_stack+0x15/0x1e lib/dump_stack.c:113 [] lockdep_rcu_suspicious+0x1b9/0x260 kernel/locking/lockdep.c:6592 [] qdisc_lookup+0xac/0x6f0 net/sched/sch_api.c:305 [] qdisc_tree_reduce_backlog+0x243/0x580 net/sched/sch_api.c:811 [] pfifo_tail_enqueue+0x32c/0x4b0 net/sched/sch_fifo.c:51 [] qdisc_enqueue include/net/sch_generic.h:833 [inline] [] netem_dequeue+0xeb3/0x15d0 net/sched/sch_netem.c:723 [] dequeue_skb net/sched/sch_generic.c:292 [inline] [] qdisc_restart net/sched/sch_generic.c:397 [inline] [] __qdisc_run+0x249/0x1e60 net/sched/sch_generic.c:415 [] qdisc_run+0xd6/0x260 include/net/pkt_sched.h:125 [] net_tx_action+0x7c9/0x970 net/core/dev.c:5313 [] __do_softirq+0x2bd/0x9bd kernel/softirq.c:616 [] invoke_softirq kernel/softirq.c:447 [inline] [] __irq_exit_rcu+0xca/0x230 kernel/softirq.c:700 [] irq_exit_rcu+0x9/0x20 kernel/softirq.c:712 [] sysvec_apic_timer_interrupt+0x42/0x90 arch/x86/kernel/apic/apic.c:1107 [] asm_sysvec_apic_timer_interrupt+0x1b/0x20 arch/x86/include/asm/idtentry.h:656 Fixes: d636fc5dd692 ("net: sched: add rcu annotations around qdisc->qdisc_sleeping") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20240402134133.2352776-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e8f988e1c7e6..334a563e0bc1 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -806,7 +806,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) notify = !sch->q.qlen && !WARN_ON_ONCE(!n && !qdisc_is_offloaded); /* TODO: perform the search on a per txq basis */ - sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); + sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { WARN_ON_ONCE(parentid != TC_H_ROOT); break; From e01835f3a1bd640e0f4b9e3b62fcfa7889c5d85a Mon Sep 17 00:00:00 2001 From: Piotr Wejman Date: Mon, 1 Apr 2024 21:22:39 +0200 Subject: [PATCH 048/553] net: stmmac: fix rx queue priority assignment commit b3da86d432b7cd65b025a11f68613e333d2483db upstream. The driver should ensure that same priority is not mapped to multiple rx queues. From DesignWare Cores Ethernet Quality-of-Service Databook, section 17.1.29 MAC_RxQ_Ctrl2: "[...]The software must ensure that the content of this field is mutually exclusive to the PSRQ fields for other queues, that is, the same priority is not mapped to multiple Rx queues[...]" Previously rx_queue_priority() function was: - clearing all priorities from a queue - adding new priorities to that queue After this patch it will: - first assign new priorities to a queue - then remove those priorities from all other queues - keep other priorities previously assigned to that queue Fixes: a8f5102af2a7 ("net: stmmac: TX and RX queue priority configuration") Fixes: 2142754f8b9c ("net: stmmac: Add MAC related callbacks for XGMAC2") Signed-off-by: Piotr Wejman Link: https://lore.kernel.org/r/20240401192239.33942-1-piotrwejman90@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/stmicro/stmmac/dwmac4_core.c | 40 ++++++++++++++----- .../ethernet/stmicro/stmmac/dwxgmac2_core.c | 38 ++++++++++++++---- 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 84276eb681d7..39112d5cb5b8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -87,19 +87,41 @@ static void dwmac4_rx_queue_priority(struct mac_device_info *hw, u32 prio, u32 queue) { void __iomem *ioaddr = hw->pcsr; - u32 base_register; - u32 value; + u32 clear_mask = 0; + u32 ctrl2, ctrl3; + int i; - base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3; - if (queue >= 4) + ctrl2 = readl(ioaddr + GMAC_RXQ_CTRL2); + ctrl3 = readl(ioaddr + GMAC_RXQ_CTRL3); + + /* The software must ensure that the same priority + * is not mapped to multiple Rx queues + */ + for (i = 0; i < 4; i++) + clear_mask |= ((prio << GMAC_RXQCTRL_PSRQX_SHIFT(i)) & + GMAC_RXQCTRL_PSRQX_MASK(i)); + + ctrl2 &= ~clear_mask; + ctrl3 &= ~clear_mask; + + /* First assign new priorities to a queue, then + * clear them from others queues + */ + if (queue < 4) { + ctrl2 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & + GMAC_RXQCTRL_PSRQX_MASK(queue); + + writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2); + writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3); + } else { queue -= 4; - value = readl(ioaddr + base_register); - - value &= ~GMAC_RXQCTRL_PSRQX_MASK(queue); - value |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & + ctrl3 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & GMAC_RXQCTRL_PSRQX_MASK(queue); - writel(value, ioaddr + base_register); + + writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3); + writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2); + } } static void dwmac4_tx_queue_priority(struct mac_device_info *hw, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index ec1616ffbfa7..dd73f38ec08d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -97,17 +97,41 @@ static void dwxgmac2_rx_queue_prio(struct mac_device_info *hw, u32 prio, u32 queue) { void __iomem *ioaddr = hw->pcsr; - u32 value, reg; + u32 clear_mask = 0; + u32 ctrl2, ctrl3; + int i; - reg = (queue < 4) ? XGMAC_RXQ_CTRL2 : XGMAC_RXQ_CTRL3; - if (queue >= 4) + ctrl2 = readl(ioaddr + XGMAC_RXQ_CTRL2); + ctrl3 = readl(ioaddr + XGMAC_RXQ_CTRL3); + + /* The software must ensure that the same priority + * is not mapped to multiple Rx queues + */ + for (i = 0; i < 4; i++) + clear_mask |= ((prio << XGMAC_PSRQ_SHIFT(i)) & + XGMAC_PSRQ(i)); + + ctrl2 &= ~clear_mask; + ctrl3 &= ~clear_mask; + + /* First assign new priorities to a queue, then + * clear them from others queues + */ + if (queue < 4) { + ctrl2 |= (prio << XGMAC_PSRQ_SHIFT(queue)) & + XGMAC_PSRQ(queue); + + writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2); + writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3); + } else { queue -= 4; - value = readl(ioaddr + reg); - value &= ~XGMAC_PSRQ(queue); - value |= (prio << XGMAC_PSRQ_SHIFT(queue)) & XGMAC_PSRQ(queue); + ctrl3 |= (prio << XGMAC_PSRQ_SHIFT(queue)) & + XGMAC_PSRQ(queue); - writel(value, ioaddr + reg); + writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3); + writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2); + } } static void dwxgmac2_tx_queue_prio(struct mac_device_info *hw, u32 prio, From 77f5e52d7beedd92ca9264eaa0f2f802dde347a3 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 2 Apr 2024 09:16:34 +0200 Subject: [PATCH 049/553] net: phy: micrel: lan8814: Fix when enabling/disabling 1-step timestamping commit de99e1ea3a35f23ff83a31d6b08f43d27b2c6345 upstream. There are 2 issues with the blamed commit. 1. When the phy is initialized, it would enable the disabled of UDPv4 checksums. The UDPv6 checksum is already enabled by default. So when 1-step is configured then it would clear these flags. 2. After the 1-step is configured, then if 2-step is configured then the 1-step would be still configured because it is not clearing the flag. So the sync frames will still have origin timestamps set. Fix this by reading first the value of the register and then just change bit 12 as this one determines if the timestamp needs to be inserted in the frame, without changing any other bits. Fixes: ece19502834d ("net: phy: micrel: 1588 support for LAN8814 phy") Signed-off-by: Horatiu Vultur Reviewed-by: Divya Koppera Link: https://lore.kernel.org/r/20240402071634.2483524-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/micrel.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 9481f172830f..e67bffd6f423 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2188,6 +2188,7 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) struct hwtstamp_config config; int txcfg = 0, rxcfg = 0; int pkt_ts_enable; + int tx_mod; if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) return -EFAULT; @@ -2237,9 +2238,14 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_TIMESTAMP_EN, pkt_ts_enable); lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_TIMESTAMP_EN, pkt_ts_enable); - if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) + tx_mod = lanphy_read_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD); + if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) { lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD, - PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); + tx_mod | PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); + } else if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ON) { + lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD, + tx_mod & ~PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); + } if (config.rx_filter != HWTSTAMP_FILTER_NONE) lan8814_config_ts_intr(ptp_priv->phydev, true); From 10608161696c2768f53426642f78a42bcaaa53e8 Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Fri, 29 Mar 2024 09:16:31 +0300 Subject: [PATCH 050/553] net: phy: micrel: Fix potential null pointer dereference commit 96c155943a703f0655c0c4cab540f67055960e91 upstream. In lan8814_get_sig_rx() and lan8814_get_sig_tx() ptp_parse_header() may return NULL as ptp_header due to abnormal packet type or corrupted packet. Fix this bug by adding ptp_header check. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: ece19502834d ("net: phy: micrel: 1588 support for LAN8814 phy") Signed-off-by: Aleksandr Mishin Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20240329061631.33199-1-amishin@t-argos.ru Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/micrel.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index e67bffd6f423..2cbb1d1830bb 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2303,7 +2303,7 @@ static void lan8814_txtstamp(struct mii_timestamper *mii_ts, } } -static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) +static bool lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) { struct ptp_header *ptp_header; u32 type; @@ -2313,7 +2313,11 @@ static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) ptp_header = ptp_parse_header(skb, type); skb_pull_inline(skb, ETH_HLEN); + if (!ptp_header) + return false; + *sig = (__force u16)(ntohs(ptp_header->sequence_id)); + return true; } static bool lan8814_match_rx_ts(struct kszphy_ptp_priv *ptp_priv, @@ -2325,7 +2329,8 @@ static bool lan8814_match_rx_ts(struct kszphy_ptp_priv *ptp_priv, bool ret = false; u16 skb_sig; - lan8814_get_sig_rx(skb, &skb_sig); + if (!lan8814_get_sig_rx(skb, &skb_sig)) + return ret; /* Iterate over all RX timestamps and match it with the received skbs */ spin_lock_irqsave(&ptp_priv->rx_ts_lock, flags); @@ -2605,7 +2610,7 @@ static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm) return 0; } -static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) +static bool lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) { struct ptp_header *ptp_header; u32 type; @@ -2613,7 +2618,11 @@ static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) type = ptp_classify_raw(skb); ptp_header = ptp_parse_header(skb, type); + if (!ptp_header) + return false; + *sig = (__force u16)(ntohs(ptp_header->sequence_id)); + return true; } static void lan8814_dequeue_tx_skb(struct kszphy_ptp_priv *ptp_priv) @@ -2631,7 +2640,8 @@ static void lan8814_dequeue_tx_skb(struct kszphy_ptp_priv *ptp_priv) spin_lock_irqsave(&ptp_priv->tx_queue.lock, flags); skb_queue_walk_safe(&ptp_priv->tx_queue, skb, skb_tmp) { - lan8814_get_sig_tx(skb, &skb_sig); + if (!lan8814_get_sig_tx(skb, &skb_sig)) + continue; if (memcmp(&skb_sig, &seq_id, sizeof(seq_id))) continue; @@ -2675,7 +2685,8 @@ static bool lan8814_match_skb(struct kszphy_ptp_priv *ptp_priv, spin_lock_irqsave(&ptp_priv->rx_queue.lock, flags); skb_queue_walk_safe(&ptp_priv->rx_queue, skb, skb_tmp) { - lan8814_get_sig_rx(skb, &skb_sig); + if (!lan8814_get_sig_rx(skb, &skb_sig)) + continue; if (memcmp(&skb_sig, &rx_ts->seq_id, sizeof(rx_ts->seq_id))) continue; From 66cb6659008b7b723ee558ff6df9e5459360b28d Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 26 Mar 2024 12:34:02 +0100 Subject: [PATCH 051/553] selftests: net: gro fwd: update vxlan GRO test expectations commit 0fb101be97ca27850c5ecdbd1269423ce4d1f607 upstream. UDP tunnel packets can't be GRO in-between their endpoints as this causes different issues. The UDP GRO fwd vxlan tests were relying on this and their expectations have to be fixed. We keep both vxlan tests and expected no GRO from happening. The vxlan UDP GRO bench test was removed as it's not providing any valuable information now. Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/udpgro_fwd.sh | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 9690a5d7ffd7..9e9b4644e0ea 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -239,7 +239,7 @@ for family in 4 6; do create_vxlan_pair ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on - run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1 + run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10 cleanup # use NAT to circumvent GRO FWD check @@ -252,13 +252,7 @@ for family in 4 6; do # load arp cache before running the test to reduce the amount of # stray traffic on top of the UDP tunnel ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null - run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST - cleanup - - create_vxlan_pair - run_bench "UDP tunnel fwd perf" $OL_NET$DST - ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on - run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST + run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST cleanup done From 2eeab8c47c3c0276e0746bc382f405c9a236a5ad Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 26 Mar 2024 12:33:59 +0100 Subject: [PATCH 052/553] gro: fix ownership transfer commit ed4cccef64c1d0d5b91e69f7a8a6697c3a865486 upstream. If packets are GROed with fraglist they might be segmented later on and continue their journey in the stack. In skb_segment_list those skbs can be reused as-is. This is an issue as their destructor was removed in skb_gro_receive_list but not the reference to their socket, and then they can't be orphaned. Fix this by also removing the reference to the socket. For example this could be observed, kernel BUG at include/linux/skbuff.h:3131! (skb_orphan) RIP: 0010:ip6_rcv_core+0x11bc/0x19a0 Call Trace: ipv6_list_rcv+0x250/0x3f0 __netif_receive_skb_list_core+0x49d/0x8f0 netif_receive_skb_list_internal+0x634/0xd40 napi_complete_done+0x1d2/0x7d0 gro_cell_poll+0x118/0x1f0 A similar construction is found in skb_gro_receive, apply the same change there. Fixes: 5e10da5385d2 ("skbuff: allow 'slow_gro' for skb carring sock reference") Signed-off-by: Antoine Tenart Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/gro.c | 3 ++- net/ipv4/udp_offload.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/gro.c b/net/core/gro.c index 352f966cb1da..47118e97ecfd 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -252,8 +252,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) } merge: - /* sk owenrship - if any - completely transferred to the aggregated packet */ + /* sk ownership - if any - completely transferred to the aggregated packet */ skb->destructor = NULL; + skb->sk = NULL; delta_truesize = skb->truesize; if (offset > headlen) { unsigned int eat = offset - headlen; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 8096576fd9bd..b9e638f76753 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -441,8 +441,9 @@ static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) NAPI_GRO_CB(p)->count++; p->data_len += skb->len; - /* sk owenrship - if any - completely transferred to the aggregated packet */ + /* sk ownership - if any - completely transferred to the aggregated packet */ skb->destructor = NULL; + skb->sk = NULL; p->truesize += skb->truesize; p->len += skb->len; From 23178ec5abbdecfdd67eca17f3494f7c6ca0d925 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 28 Mar 2024 13:59:05 +0100 Subject: [PATCH 053/553] x86/bugs: Fix the SRSO mitigation on Zen3/4 commit 4535e1a4174c4111d92c5a9a21e542d232e0fcaa upstream. The original version of the mitigation would patch in the calls to the untraining routines directly. That is, the alternative() in UNTRAIN_RET will patch in the CALL to srso_alias_untrain_ret() directly. However, even if commit e7c25c441e9e ("x86/cpu: Cleanup the untrain mess") meant well in trying to clean up the situation, due to micro- architectural reasons, the untraining routine srso_alias_untrain_ret() must be the target of a CALL instruction and not of a JMP instruction as it is done now. Reshuffle the alternative macros to accomplish that. Fixes: e7c25c441e9e ("x86/cpu: Cleanup the untrain mess") Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Ingo Molnar Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm-prototypes.h | 1 + arch/x86/include/asm/nospec-branch.h | 20 ++++++++++++++------ arch/x86/lib/retpoline.S | 4 +--- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 8f80de627c60..5cdccea45554 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8f6f17a8617b..47e4e06a47d7 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -167,11 +167,20 @@ .Lskip_rsb_\@: .endm +/* + * The CALL to srso_alias_untrain_ret() must be patched in directly at + * the spot where untraining must be done, ie., srso_alias_untrain_ret() + * must be the target of a CALL instruction instead of indirectly + * jumping to a wrapper which then calls it. Therefore, this macro is + * called outside of __UNTRAIN_RET below, for the time being, before the + * kernel can support nested alternatives with arbitrary nesting. + */ +.macro CALL_UNTRAIN_RET #ifdef CONFIG_CPU_UNRET_ENTRY -#define CALL_UNTRAIN_RET "call entry_untrain_ret" -#else -#define CALL_UNTRAIN_RET "" + ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \ + "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS #endif +.endm /* * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the @@ -188,9 +197,8 @@ #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ defined(CONFIG_CPU_SRSO) ANNOTATE_UNRET_END - ALTERNATIVE_2 "", \ - CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ - "call entry_ibpb", X86_FEATURE_ENTRY_IBPB + CALL_UNTRAIN_RET + ALTERNATIVE "", "call entry_ibpb", X86_FEATURE_ENTRY_IBPB #endif .endm diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 65c5c44f006b..24c850e1e239 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -252,9 +252,7 @@ SYM_CODE_START(srso_return_thunk) SYM_CODE_END(srso_return_thunk) SYM_FUNC_START(entry_untrain_ret) - ALTERNATIVE_2 "jmp retbleed_untrain_ret", \ - "jmp srso_untrain_ret", X86_FEATURE_SRSO, \ - "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS + ALTERNATIVE "jmp retbleed_untrain_ret", "jmp srso_untrain_ret", X86_FEATURE_SRSO SYM_FUNC_END(entry_untrain_ret) __EXPORT_THUNK(entry_untrain_ret) From ed37bdaee64b51faaee55d2dfce5b702ace7bab0 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Tue, 2 Apr 2024 16:05:49 +0200 Subject: [PATCH 054/553] x86/retpoline: Do the necessary fixup to the Zen3/4 srso return thunk for !SRSO commit 0e110732473e14d6520e49d75d2c88ef7d46fe67 upstream. The srso_alias_untrain_ret() dummy thunk in the !CONFIG_MITIGATION_SRSO case is there only for the altenative in CALL_UNTRAIN_RET to have a symbol to resolve. However, testing with kernels which don't have CONFIG_MITIGATION_SRSO enabled, leads to the warning in patch_return() to fire: missing return thunk: srso_alias_untrain_ret+0x0/0x10-0x0: eb 0e 66 66 2e WARNING: CPU: 0 PID: 0 at arch/x86/kernel/alternative.c:826 apply_returns (arch/x86/kernel/alternative.c:826 Put in a plain "ret" there so that gcc doesn't put a return thunk in in its place which special and gets checked. In addition: ERROR: modpost: "srso_alias_untrain_ret" [arch/x86/kvm/kvm-amd.ko] undefined! make[2]: *** [scripts/Makefile.modpost:145: Module.symvers] Chyba 1 make[1]: *** [/usr/src/linux-6.8.3/Makefile:1873: modpost] Chyba 2 make: *** [Makefile:240: __sub-make] Chyba 2 since !SRSO builds would use the dummy return thunk as reported by petr.pisar@atlas.cz, https://bugzilla.kernel.org/show_bug.cgi?id=218679. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202404020901.da75a60f-oliver.sang@intel.com Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/all/202404020901.da75a60f-oliver.sang@intel.com/ Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/retpoline.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 24c850e1e239..a96e816e5ccd 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -110,6 +110,7 @@ SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) ret int3 SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) #endif SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) From 1829b618ccc42c6740f0b91e9251d7dddc2b4108 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 29 Mar 2024 11:06:37 -0700 Subject: [PATCH 055/553] i40e: Fix VF MAC filter removal commit ea2a1cfc3b2019bdea6324acd3c03606b60d71ad upstream. Commit 73d9629e1c8c ("i40e: Do not allow untrusted VF to remove administratively set MAC") fixed an issue where untrusted VF was allowed to remove its own MAC address although this was assigned administratively from PF. Unfortunately the introduced check is wrong because it causes that MAC filters for other MAC addresses including multi-cast ones are not removed. if (ether_addr_equal(addr, vf->default_lan_addr.addr) && i40e_can_vf_change_mac(vf)) was_unimac_deleted = true; else continue; if (i40e_del_mac_filter(vsi, al->list[i].addr)) { ... The else path with `continue` effectively skips any MAC filter removal except one for primary MAC addr when VF is allowed to do so. Fix the check condition so the `continue` is only done for primary MAC address. Fixes: 73d9629e1c8c ("i40e: Do not allow untrusted VF to remove administratively set MAC") Signed-off-by: Ivan Vecera Reviewed-by: Michal Schmidt Reviewed-by: Brett Creeley Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240329180638.211412-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index ed4be80fec2a..99cab36fb107 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3078,11 +3078,12 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) /* Allow to delete VF primary MAC only if it was not set * administratively by PF or if VF is trusted. */ - if (ether_addr_equal(addr, vf->default_lan_addr.addr) && - i40e_can_vf_change_mac(vf)) - was_unimac_deleted = true; - else - continue; + if (ether_addr_equal(addr, vf->default_lan_addr.addr)) { + if (i40e_can_vf_change_mac(vf)) + was_unimac_deleted = true; + else + continue; + } if (i40e_del_mac_filter(vsi, al->list[i].addr)) { ret = I40E_ERR_INVALID_MAC_ADDR; From 1db7fcb2b290c47c202b79528824f119fa28937d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Mar 2024 11:22:48 +0000 Subject: [PATCH 056/553] erspan: make sure erspan_base_hdr is present in skb->head commit 17af420545a750f763025149fa7b833a4fc8b8f0 upstream. syzbot reported a problem in ip6erspan_rcv() [1] Issue is that ip6erspan_rcv() (and erspan_rcv()) no longer make sure erspan_base_hdr is present in skb linear part (skb->head) before getting @ver field from it. Add the missing pskb_may_pull() calls. v2: Reload iph pointer in erspan_rcv() after pskb_may_pull() because skb->head might have changed. [1] BUG: KMSAN: uninit-value in pskb_may_pull_reason include/linux/skbuff.h:2742 [inline] BUG: KMSAN: uninit-value in pskb_may_pull include/linux/skbuff.h:2756 [inline] BUG: KMSAN: uninit-value in ip6erspan_rcv net/ipv6/ip6_gre.c:541 [inline] BUG: KMSAN: uninit-value in gre_rcv+0x11f8/0x1930 net/ipv6/ip6_gre.c:610 pskb_may_pull_reason include/linux/skbuff.h:2742 [inline] pskb_may_pull include/linux/skbuff.h:2756 [inline] ip6erspan_rcv net/ipv6/ip6_gre.c:541 [inline] gre_rcv+0x11f8/0x1930 net/ipv6/ip6_gre.c:610 ip6_protocol_deliver_rcu+0x1d4c/0x2ca0 net/ipv6/ip6_input.c:438 ip6_input_finish net/ipv6/ip6_input.c:483 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] ip6_input+0x15d/0x430 net/ipv6/ip6_input.c:492 ip6_mc_input+0xa7e/0xc80 net/ipv6/ip6_input.c:586 dst_input include/net/dst.h:460 [inline] ip6_rcv_finish+0x955/0x970 net/ipv6/ip6_input.c:79 NF_HOOK include/linux/netfilter.h:314 [inline] ipv6_rcv+0xde/0x390 net/ipv6/ip6_input.c:310 __netif_receive_skb_one_core net/core/dev.c:5538 [inline] __netif_receive_skb+0x1da/0xa00 net/core/dev.c:5652 netif_receive_skb_internal net/core/dev.c:5738 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5798 tun_rx_batched+0x3ee/0x980 drivers/net/tun.c:1549 tun_get_user+0x5566/0x69e0 drivers/net/tun.c:2002 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:2108 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0xb63/0x1520 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xe0 fs/read_write.c:652 do_syscall_64+0xd5/0x1f0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 Uninit was created at: slab_post_alloc_hook mm/slub.c:3804 [inline] slab_alloc_node mm/slub.c:3845 [inline] kmem_cache_alloc_node+0x613/0xc50 mm/slub.c:3888 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:577 __alloc_skb+0x35b/0x7a0 net/core/skbuff.c:668 alloc_skb include/linux/skbuff.h:1318 [inline] alloc_skb_with_frags+0xc8/0xbf0 net/core/skbuff.c:6504 sock_alloc_send_pskb+0xa81/0xbf0 net/core/sock.c:2795 tun_alloc_skb drivers/net/tun.c:1525 [inline] tun_get_user+0x209a/0x69e0 drivers/net/tun.c:1846 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:2108 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0xb63/0x1520 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xe0 fs/read_write.c:652 do_syscall_64+0xd5/0x1f0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 CPU: 1 PID: 5045 Comm: syz-executor114 Not tainted 6.9.0-rc1-syzkaller-00021-g962490525cff #0 Fixes: cb73ee40b1b3 ("net: ip_gre: use erspan key field for tunnel lookup") Reported-by: syzbot+1c1cf138518bf0c53d68@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/000000000000772f2c0614b66ef7@google.com/ Signed-off-by: Eric Dumazet Cc: Lorenzo Bianconi Link: https://lore.kernel.org/r/20240328112248.1101491-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_gre.c | 5 +++++ net/ipv6/ip6_gre.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d67d026d7f97..0267fa05374a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -280,8 +280,13 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, tpi->flags | TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); } else { + if (unlikely(!pskb_may_pull(skb, + gre_hdr_len + sizeof(*ershdr)))) + return PACKET_REJECT; + ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); ver = ershdr->ver; + iph = ip_hdr(skb); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags | TUNNEL_KEY, iph->saddr, iph->daddr, tpi->key); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d3fba7d8dec4..b3e2d658af80 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -528,6 +528,9 @@ static int ip6erspan_rcv(struct sk_buff *skb, struct ip6_tnl *tunnel; u8 ver; + if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr)))) + return PACKET_REJECT; + ipv6h = ipv6_hdr(skb); ershdr = (struct erspan_base_hdr *)skb->data; ver = ershdr->ver; From 2febb7eeb493dd31ee05dfd307a4f0e61ac183ca Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 29 Mar 2024 09:05:59 -0700 Subject: [PATCH 057/553] selftests: reuseaddr_conflict: add missing new line at the end of the output commit 31974122cfdeaf56abc18d8ab740d580d9833e90 upstream. The netdev CI runs in a VM and captures serial, so stdout and stderr get combined. Because there's a missing new line in stderr the test ends up corrupting KTAP: # Successok 1 selftests: net: reuseaddr_conflict which should have been: # Success ok 1 selftests: net: reuseaddr_conflict Fixes: 422d8dc6fd3a ("selftest: add a reuseaddr test") Reviewed-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/20240329160559.249476-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/reuseaddr_conflict.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c index 7c5b12664b03..bfb07dc49518 100644 --- a/tools/testing/selftests/net/reuseaddr_conflict.c +++ b/tools/testing/selftests/net/reuseaddr_conflict.c @@ -109,6 +109,6 @@ int main(void) fd1 = open_port(0, 1); if (fd1 >= 0) error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6"); - fprintf(stderr, "Success"); + fprintf(stderr, "Success\n"); return 0; } From 167d4b47a9bdcb01541dfa29e9f3cbb8edd3dfd2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 1 Apr 2024 14:10:04 -0700 Subject: [PATCH 058/553] ipv6: Fix infinite recursion in fib6_dump_done(). commit d21d40605bca7bd5fc23ef03d4c1ca1f48bc2cae upstream. syzkaller reported infinite recursive calls of fib6_dump_done() during netlink socket destruction. [1] From the log, syzkaller sent an AF_UNSPEC RTM_GETROUTE message, and then the response was generated. The following recvmmsg() resumed the dump for IPv6, but the first call of inet6_dump_fib() failed at kzalloc() due to the fault injection. [0] 12:01:34 executing program 3: r0 = socket$nl_route(0x10, 0x3, 0x0) sendmsg$nl_route(r0, ... snip ...) recvmmsg(r0, ... snip ...) (fail_nth: 8) Here, fib6_dump_done() was set to nlk_sk(sk)->cb.done, and the next call of inet6_dump_fib() set it to nlk_sk(sk)->cb.args[3]. syzkaller stopped receiving the response halfway through, and finally netlink_sock_destruct() called nlk_sk(sk)->cb.done(). fib6_dump_done() calls fib6_dump_end() and nlk_sk(sk)->cb.done() if it is still not NULL. fib6_dump_end() rewrites nlk_sk(sk)->cb.done() by nlk_sk(sk)->cb.args[3], but it has the same function, not NULL, calling itself recursively and hitting the stack guard page. To avoid the issue, let's set the destructor after kzalloc(). [0]: FAULT_INJECTION: forcing a failure. name failslab, interval 1, probability 0, space 0, times 0 CPU: 1 PID: 432110 Comm: syz-executor.3 Not tainted 6.8.0-12821-g537c2e91d354-dirty #11 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl (lib/dump_stack.c:117) should_fail_ex (lib/fault-inject.c:52 lib/fault-inject.c:153) should_failslab (mm/slub.c:3733) kmalloc_trace (mm/slub.c:3748 mm/slub.c:3827 mm/slub.c:3992) inet6_dump_fib (./include/linux/slab.h:628 ./include/linux/slab.h:749 net/ipv6/ip6_fib.c:662) rtnl_dump_all (net/core/rtnetlink.c:4029) netlink_dump (net/netlink/af_netlink.c:2269) netlink_recvmsg (net/netlink/af_netlink.c:1988) ____sys_recvmsg (net/socket.c:1046 net/socket.c:2801) ___sys_recvmsg (net/socket.c:2846) do_recvmmsg (net/socket.c:2943) __x64_sys_recvmmsg (net/socket.c:3041 net/socket.c:3034 net/socket.c:3034) [1]: BUG: TASK stack guard page was hit at 00000000f2fa9af1 (stack is 00000000b7912430..000000009a436beb) stack guard page: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 223719 Comm: kworker/1:3 Not tainted 6.8.0-12821-g537c2e91d354-dirty #11 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: events netlink_sock_destruct_work RIP: 0010:fib6_dump_done (net/ipv6/ip6_fib.c:570) Code: 3c 24 e8 f3 e9 51 fd e9 28 fd ff ff 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 41 57 41 56 41 55 41 54 55 48 89 fd <53> 48 8d 5d 60 e8 b6 4d 07 fd 48 89 da 48 b8 00 00 00 00 00 fc ff RSP: 0018:ffffc9000d980000 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffffffff84405990 RCX: ffffffff844059d3 RDX: ffff8881028e0000 RSI: ffffffff84405ac2 RDI: ffff88810c02f358 RBP: ffff88810c02f358 R08: 0000000000000007 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000224 R12: 0000000000000000 R13: ffff888007c82c78 R14: ffff888007c82c68 R15: ffff888007c82c68 FS: 0000000000000000(0000) GS:ffff88811b100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffc9000d97fff8 CR3: 0000000102309002 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace: <#DF> fib6_dump_done (net/ipv6/ip6_fib.c:572 (discriminator 1)) fib6_dump_done (net/ipv6/ip6_fib.c:572 (discriminator 1)) ... fib6_dump_done (net/ipv6/ip6_fib.c:572 (discriminator 1)) fib6_dump_done (net/ipv6/ip6_fib.c:572 (discriminator 1)) netlink_sock_destruct (net/netlink/af_netlink.c:401) __sk_destruct (net/core/sock.c:2177 (discriminator 2)) sk_destruct (net/core/sock.c:2224) __sk_free (net/core/sock.c:2235) sk_free (net/core/sock.c:2246) process_one_work (kernel/workqueue.c:3259) worker_thread (kernel/workqueue.c:3329 kernel/workqueue.c:3416) kthread (kernel/kthread.c:388) ret_from_fork (arch/x86/kernel/process.c:153) ret_from_fork_asm (arch/x86/entry/entry_64.S:256) Modules linked in: Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzkaller Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240401211003.25274-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1840735e9cb0..e606374854ce 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -646,19 +646,19 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (!w) { /* New dump: * - * 1. hook callback destructor. - */ - cb->args[3] = (long)cb->done; - cb->done = fib6_dump_done; - - /* - * 2. allocate and initialize walker. + * 1. allocate and initialize walker. */ w = kzalloc(sizeof(*w), GFP_ATOMIC); if (!w) return -ENOMEM; w->func = fib6_dump_node; cb->args[2] = (long)w; + + /* 2. hook callback destructor. + */ + cb->args[3] = (long)cb->done; + cb->done = fib6_dump_done; + } arg.skb = skb; From 80247e0eca14ff177d565f58ecd3010f6b7910a4 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Mon, 25 Mar 2024 17:09:29 -0400 Subject: [PATCH 059/553] mlxbf_gige: stop interface during shutdown commit 09ba28e1cd3cf715daab1fca6e1623e22fd754a6 upstream. The mlxbf_gige driver intermittantly encounters a NULL pointer exception while the system is shutting down via "reboot" command. The mlxbf_driver will experience an exception right after executing its shutdown() method. One example of this exception is: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000070 Mem abort info: ESR = 0x0000000096000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=000000011d373000 [0000000000000070] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 96000004 [#1] SMP CPU: 0 PID: 13 Comm: ksoftirqd/0 Tainted: G S OE 5.15.0-bf.6.gef6992a #1 Hardware name: https://www.mellanox.com BlueField SoC/BlueField SoC, BIOS 4.0.2.12669 Apr 21 2023 pstate: 20400009 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : mlxbf_gige_handle_tx_complete+0xc8/0x170 [mlxbf_gige] lr : mlxbf_gige_poll+0x54/0x160 [mlxbf_gige] sp : ffff8000080d3c10 x29: ffff8000080d3c10 x28: ffffcce72cbb7000 x27: ffff8000080d3d58 x26: ffff0000814e7340 x25: ffff331cd1a05000 x24: ffffcce72c4ea008 x23: ffff0000814e4b40 x22: ffff0000814e4d10 x21: ffff0000814e4128 x20: 0000000000000000 x19: ffff0000814e4a80 x18: ffffffffffffffff x17: 000000000000001c x16: ffffcce72b4553f4 x15: ffff80008805b8a7 x14: 0000000000000000 x13: 0000000000000030 x12: 0101010101010101 x11: 7f7f7f7f7f7f7f7f x10: c2ac898b17576267 x9 : ffffcce720fa5404 x8 : ffff000080812138 x7 : 0000000000002e9a x6 : 0000000000000080 x5 : ffff00008de3b000 x4 : 0000000000000000 x3 : 0000000000000001 x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 Call trace: mlxbf_gige_handle_tx_complete+0xc8/0x170 [mlxbf_gige] mlxbf_gige_poll+0x54/0x160 [mlxbf_gige] __napi_poll+0x40/0x1c8 net_rx_action+0x314/0x3a0 __do_softirq+0x128/0x334 run_ksoftirqd+0x54/0x6c smpboot_thread_fn+0x14c/0x190 kthread+0x10c/0x110 ret_from_fork+0x10/0x20 Code: 8b070000 f9000ea0 f95056c0 f86178a1 (b9407002) ---[ end trace 7cc3941aa0d8e6a4 ]--- Kernel panic - not syncing: Oops: Fatal exception in interrupt Kernel Offset: 0x4ce722520000 from 0xffff800008000000 PHYS_OFFSET: 0x80000000 CPU features: 0x000005c1,a3330e5a Memory Limit: none ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]--- During system shutdown, the mlxbf_gige driver's shutdown() is always executed. However, the driver's stop() method will only execute if networking interface configuration logic within the Linux distribution has been setup to do so. If shutdown() executes but stop() does not execute, NAPI remains enabled and this can lead to an exception if NAPI is scheduled while the hardware interface has only been partially deinitialized. The networking interface managed by the mlxbf_gige driver must be properly stopped during system shutdown so that IFF_UP is cleared, the hardware interface is put into a clean state, and NAPI is fully deinitialized. Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver") Signed-off-by: David Thompson Link: https://lore.kernel.org/r/20240325210929.25362-1-davthompson@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index 65e92541db6e..d6b4d163bbbf 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "mlxbf_gige.h" @@ -417,8 +418,13 @@ static void mlxbf_gige_shutdown(struct platform_device *pdev) { struct mlxbf_gige *priv = platform_get_drvdata(pdev); - writeq(0, priv->base + MLXBF_GIGE_INT_EN); - mlxbf_gige_clean_port(priv); + rtnl_lock(); + netif_device_detach(priv->netdev); + + if (netif_running(priv->netdev)) + dev_close(priv->netdev); + + rtnl_unlock(); } static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = { From fd6692e9b5edc46a0751e5c9218f1ee7898718c5 Mon Sep 17 00:00:00 2001 From: Atlas Yu Date: Thu, 28 Mar 2024 13:51:52 +0800 Subject: [PATCH 060/553] r8169: skip DASH fw status checks when DASH is disabled commit 5e864d90b20803edf6bd44a99fb9afa7171785f2 upstream. On devices that support DASH, the current code in the "rtl_loop_wait" function raises false alarms when DASH is disabled. This occurs because the function attempts to wait for the DASH firmware to be ready, even though it's not relevant in this case. r8169 0000:0c:00.0 eth0: RTL8168ep/8111ep, 38:7c:76:49:08:d9, XID 502, IRQ 86 r8169 0000:0c:00.0 eth0: jumbo features [frames: 9194 bytes, tx checksumming: ko] r8169 0000:0c:00.0 eth0: DASH disabled ... r8169 0000:0c:00.0 eth0: rtl_ep_ocp_read_cond == 0 (loop: 30, delay: 10000). This patch modifies the driver start/stop functions to skip checking the DASH firmware status when DASH is explicitly disabled. This prevents unnecessary delays and false alarms. The patch has been tested on several ThinkStation P8/PX workstations. Fixes: 0ab0c45d8aae ("r8169: add handling DASH when DASH is disabled") Signed-off-by: Atlas Yu Reviewed-by: Heiner Kallweit Link: https://lore.kernel.org/r/20240328055152.18443-1-atlas.yu@canonical.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169_main.c | 31 ++++++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 85ed9879af6c..3dbcb311dcbf 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1135,17 +1135,40 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01); } +static void rtl_dash_loop_wait(struct rtl8169_private *tp, + const struct rtl_cond *c, + unsigned long usecs, int n, bool high) +{ + if (!tp->dash_enabled) + return; + rtl_loop_wait(tp, c, usecs, n, high); +} + +static void rtl_dash_loop_wait_high(struct rtl8169_private *tp, + const struct rtl_cond *c, + unsigned long d, int n) +{ + rtl_dash_loop_wait(tp, c, d, n, true); +} + +static void rtl_dash_loop_wait_low(struct rtl8169_private *tp, + const struct rtl_cond *c, + unsigned long d, int n) +{ + rtl_dash_loop_wait(tp, c, d, n, false); +} + static void rtl8168dp_driver_start(struct rtl8169_private *tp) { r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START); - rtl_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10); + rtl_dash_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10); } static void rtl8168ep_driver_start(struct rtl8169_private *tp) { r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START); r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01); - rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30); + rtl_dash_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30); } static void rtl8168_driver_start(struct rtl8169_private *tp) @@ -1159,7 +1182,7 @@ static void rtl8168_driver_start(struct rtl8169_private *tp) static void rtl8168dp_driver_stop(struct rtl8169_private *tp) { r8168dp_oob_notify(tp, OOB_CMD_DRIVER_STOP); - rtl_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10); + rtl_dash_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10); } static void rtl8168ep_driver_stop(struct rtl8169_private *tp) @@ -1167,7 +1190,7 @@ static void rtl8168ep_driver_stop(struct rtl8169_private *tp) rtl8168ep_stop_cmac(tp); r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP); r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01); - rtl_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10); + rtl_dash_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10); } static void rtl8168_driver_stop(struct rtl8169_private *tp) From d12245080cb259d82b34699f6cd4ec11bdb688bd Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 26 Mar 2024 12:33:58 +0100 Subject: [PATCH 061/553] udp: do not accept non-tunnel GSO skbs landing in a tunnel commit 3d010c8031e39f5fa1e8b13ada77e0321091011f upstream. When rx-udp-gro-forwarding is enabled UDP packets might be GROed when being forwarded. If such packets might land in a tunnel this can cause various issues and udp_gro_receive makes sure this isn't the case by looking for a matching socket. This is performed in udp4/6_gro_lookup_skb but only in the current netns. This is an issue with tunneled packets when the endpoint is in another netns. In such cases the packets will be GROed at the UDP level, which leads to various issues later on. The same thing can happen with rx-gro-list. We saw this with geneve packets being GROed at the UDP level. In such case gso_size is set; later the packet goes through the geneve rx path, the geneve header is pulled, the offset are adjusted and frag_list skbs are not adjusted with regard to geneve. When those skbs hit skb_fragment, it will misbehave. Different outcomes are possible depending on what the GROed skbs look like; from corrupted packets to kernel crashes. One example is a BUG_ON[1] triggered in skb_segment while processing the frag_list. Because gso_size is wrong (geneve header was pulled) skb_segment thinks there is "geneve header size" of data in frag_list, although it's in fact the next packet. The BUG_ON itself has nothing to do with the issue. This is only one of the potential issues. Looking up for a matching socket in udp_gro_receive is fragile: the lookup could be extended to all netns (not speaking about performances) but nothing prevents those packets from being modified in between and we could still not find a matching socket. It's OK to keep the current logic there as it should cover most cases but we also need to make sure we handle tunnel packets being GROed too early. This is done by extending the checks in udp_unexpected_gso: GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits and landing in a tunnel must be segmented. [1] kernel BUG at net/core/skbuff.c:4408! RIP: 0010:skb_segment+0xd2a/0xf70 __udp_gso_segment+0xaa/0x560 Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") Fixes: 36707061d6ba ("udp: allow forwarding of plain (non-fraglisted) UDP GRO packets") Signed-off-by: Antoine Tenart Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/udp.h | 28 ++++++++++++++++++++++++++++ net/ipv4/udp.c | 7 +++++++ net/ipv4/udp_offload.c | 6 ++++-- net/ipv6/udp.c | 2 +- 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/include/linux/udp.h b/include/linux/udp.h index efd9ab6df379..79a4eae6f1f8 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -140,6 +140,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, } } +DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); +#if IS_ENABLED(CONFIG_IPV6) +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +#endif + +static inline bool udp_encap_needed(void) +{ + if (static_branch_unlikely(&udp_encap_needed_key)) + return true; + +#if IS_ENABLED(CONFIG_IPV6) + if (static_branch_unlikely(&udpv6_encap_needed_key)) + return true; +#endif + + return false; +} + static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) { if (!skb_is_gso(skb)) @@ -153,6 +171,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) !udp_test_bit(ACCEPT_FRAGLIST, sk)) return true; + /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still + * land in a tunnel as the socket check in udp_gro_receive cannot be + * foolproof. + */ + if (udp_encap_needed() && + READ_ONCE(udp_sk(sk)->encap_rcv) && + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM))) + return true; + return false; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7856b7a3e0ee..2a78c78186c3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -603,6 +603,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, } DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); +EXPORT_SYMBOL(udp_encap_needed_key); + +#if IS_ENABLED(CONFIG_IPV6) +DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +EXPORT_SYMBOL(udpv6_encap_needed_key); +#endif + void udp_encap_enable(void) { static_branch_inc(&udp_encap_needed_key); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index b9e638f76753..be3ee7cfa955 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -544,8 +544,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, unsigned int off = skb_gro_offset(skb); int flush = 1; - /* we can do L4 aggregation only if the packet can't land in a tunnel - * otherwise we could corrupt the inner stream + /* We can do L4 aggregation only if the packet can't land in a tunnel + * otherwise we could corrupt the inner stream. Detecting such packets + * cannot be foolproof and the aggregation might still happen in some + * cases. Such packets should be caught in udp_unexpected_gso later. */ NAPI_GRO_CB(skb)->is_flist = 0; if (!sk || !udp_sk(sk)->gro_receive) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c2c02dea6c38..1775e9b9b85a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -476,7 +476,7 @@ csum_copy_err: goto try_again; } -DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void) { static_branch_inc(&udpv6_encap_needed_key); From 8c58d384050b99d0aac8dd4c5a0d1c09f0f33152 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 26 Mar 2024 12:34:00 +0100 Subject: [PATCH 062/553] udp: do not transition UDP GRO fraglist partial checksums to unnecessary commit f0b8c30345565344df2e33a8417a27503589247d upstream. UDP GRO validates checksums and in udp4/6_gro_complete fraglist packets are converted to CHECKSUM_UNNECESSARY to avoid later checks. However this is an issue for CHECKSUM_PARTIAL packets as they can be looped in an egress path and then their partial checksums are not fixed. Different issues can be observed, from invalid checksum on packets to traces like: gen01: hw csum failure skb len=3008 headroom=160 headlen=1376 tailroom=0 mac=(106,14) net=(120,40) trans=160 shinfo(txflags=0 nr_frags=0 gso(size=0 type=0 segs=0)) csum(0xffff232e ip_summed=2 complete_sw=0 valid=0 level=0) hash(0x77e3d716 sw=1 l4=1) proto=0x86dd pkttype=0 iif=12 ... Fix this by only converting CHECKSUM_NONE packets to CHECKSUM_UNNECESSARY by reusing __skb_incr_checksum_unnecessary. All other checksum types are kept as-is, including CHECKSUM_COMPLETE as fraglist packets being segmented back would have their skb->csum valid. Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") Signed-off-by: Antoine Tenart Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp_offload.c | 8 +------- net/ipv6/udp_offload.c | 8 +------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index be3ee7cfa955..de58415c1f97 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -710,13 +710,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (skb->csum_level < SKB_MAX_CSUM_LEVEL) - skb->csum_level++; - } else { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = 0; - } + __skb_incr_checksum_unnecessary(skb); return 0; } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 7720d04ed396..b98c4c8d8e27 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -169,13 +169,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (skb->csum_level < SKB_MAX_CSUM_LEVEL) - skb->csum_level++; - } else { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = 0; - } + __skb_incr_checksum_unnecessary(skb); return 0; } From 940ff35ae8d19dc820c3127d39ed391f8fe70b9e Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 26 Mar 2024 12:34:01 +0100 Subject: [PATCH 063/553] udp: prevent local UDP tunnel packets from being GROed commit 64235eabc4b5b18c507c08a1f16cdac6c5661220 upstream. GRO has a fundamental issue with UDP tunnel packets as it can't detect those in a foolproof way and GRO could happen before they reach the tunnel endpoint. Previous commits have fixed issues when UDP tunnel packets come from a remote host, but if those packets are issued locally they could run into checksum issues. If the inner packet has a partial checksum the information will be lost in the GRO logic, either in udp4/6_gro_complete or in udp_gro_complete_segment and packets will have an invalid checksum when leaving the host. Prevent local UDP tunnel packets from ever being GROed at the outer UDP level. Due to skb->encapsulation being wrongly used in some drivers this is actually only preventing UDP tunnel packets with a partial checksum to be GROed (see iptunnel_handle_offloads) but those were also the packets triggering issues so in practice this should be sufficient. Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") Fixes: 36707061d6ba ("udp: allow forwarding of plain (non-fraglisted) UDP GRO packets") Suggested-by: Paolo Abeni Signed-off-by: Antoine Tenart Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp_offload.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index de58415c1f97..84b7d6089f76 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -551,6 +551,12 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, */ NAPI_GRO_CB(skb)->is_flist = 0; if (!sk || !udp_sk(sk)->gro_receive) { + /* If the packet was locally encapsulated in a UDP tunnel that + * wasn't detected above, do not GRO. + */ + if (skb->encapsulation) + goto out; + if (skb->dev->features & NETIF_F_GRO_FRAGLIST) NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1; From 39efe5b6f6114247ea72fa8c1ed2faaead784cfe Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Tue, 26 Mar 2024 17:51:49 +0530 Subject: [PATCH 064/553] octeontx2-af: Fix issue with loading coalesced KPU profiles commit 0ba80d96585662299d4ea4624043759ce9015421 upstream. The current implementation for loading coalesced KPU profiles has a limitation. The "offset" field, which is used to locate profiles within the profile is restricted to a u16. This restricts the number of profiles that can be loaded. This patch addresses this limitation by increasing the size of the "offset" field. Fixes: 11c730bfbf5b ("octeontx2-af: support for coalescing KPU profiles") Signed-off-by: Hariprasad Kelam Reviewed-by: Kalesh AP Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 55639c133dd0..91a4ea529d07 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -1669,7 +1669,7 @@ static int npc_fwdb_detect_load_prfl_img(struct rvu *rvu, uint64_t prfl_sz, struct npc_coalesced_kpu_prfl *img_data = NULL; int i = 0, rc = -EINVAL; void __iomem *kpu_prfl_addr; - u16 offset; + u32 offset; img_data = (struct npc_coalesced_kpu_prfl __force *)rvu->kpu_prfl_addr; if (le64_to_cpu(img_data->signature) == KPU_SIGN && From e7e7030f0ae29cdc70ed845144f1cabd509e29d5 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 28 Mar 2024 10:06:21 +0800 Subject: [PATCH 065/553] octeontx2-pf: check negative error code in otx2_open() commit e709acbd84fb6ef32736331b0147f027a3ef4c20 upstream. otx2_rxtx_enable() return negative error code such as -EIO, check -EIO rather than EIO to fix this problem. Fixes: c926252205c4 ("octeontx2-pf: Disable packet I/O for graceful exit") Signed-off-by: Su Hui Reviewed-by: Subbaraya Sundeep Reviewed-by: Simon Horman Reviewed-by: Kalesh AP Link: https://lore.kernel.org/r/20240328020620.4054692-1-suhui@nfschina.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 7e2c30927c31..6b7fb324e756 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1914,7 +1914,7 @@ int otx2_open(struct net_device *netdev) * mcam entries are enabled to receive the packets. Hence disable the * packet I/O. */ - if (err == EIO) + if (err == -EIO) goto err_disable_rxtx; else if (err) goto err_tx_stop_queues; From d417e3c16dc55b40ecd9371c00d3215ca3191552 Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Thu, 28 Mar 2024 19:55:05 +0300 Subject: [PATCH 066/553] octeontx2-af: Add array index check commit ef15ddeeb6bee87c044bf7754fac524545bf71e8 upstream. In rvu_map_cgx_lmac_pf() the 'iter', which is used as an array index, can reach value (up to 14) that exceed the size (MAX_LMAC_COUNT = 8) of the array. Fix this bug by adding 'iter' value check. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 91c6945ea1f9 ("octeontx2-af: cn10k: Add RPM MAC support") Signed-off-by: Aleksandr Mishin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index d1e6b12ecfa7..cc6d6c94f400 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -160,6 +160,8 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) continue; lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu)); for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) { + if (iter >= MAX_LMAC_COUNT) + continue; lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu), iter); rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac); From fe74ea5b8b78d96b9ba313f070b18833a4e32b32 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Wed, 13 Mar 2024 10:44:00 +0100 Subject: [PATCH 067/553] i40e: fix i40e_count_filters() to count only active/new filters commit eb58c598ce45b7e787568fe27016260417c3d807 upstream. The bug usually affects untrusted VFs, because they are limited to 18 MACs, it affects them badly, not letting to create MAC all filters. Not stable to reproduce, it happens when VF user creates MAC filters when other MACVLAN operations are happened in parallel. But consequence is that VF can't receive desired traffic. Fix counter to be bumped only for new or active filters. Fixes: 621650cabee5 ("i40e: Refactoring VF MAC filters counting to make more reliable") Signed-off-by: Aleksandr Loktionov Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Paul Menzel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/i40e/i40e_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index d8a7fb21b7b7..5be56db1dafd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1249,8 +1249,11 @@ int i40e_count_filters(struct i40e_vsi *vsi) int bkt; int cnt = 0; - hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) - ++cnt; + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + if (f->state == I40E_FILTER_NEW || + f->state == I40E_FILTER_ACTIVE) + ++cnt; + } return cnt; } From 3e89846283f3cf7c7a8e28b342576fd7c561d2ba Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Wed, 13 Mar 2024 10:56:39 +0100 Subject: [PATCH 068/553] i40e: fix vf may be used uninitialized in this function warning commit f37c4eac99c258111d414d31b740437e1925b8e8 upstream. To fix the regression introduced by commit 52424f974bc5, which causes servers hang in very hard to reproduce conditions with resets races. Using two sources for the information is the root cause. In this function before the fix bumping v didn't mean bumping vf pointer. But the code used this variables interchangeably, so stale vf could point to different/not intended vf. Remove redundant "v" variable and iterate via single VF pointer across whole function instead to guarantee VF pointer validity. Fixes: 52424f974bc5 ("i40e: Fix VF hang when reset is triggered on another VF") Signed-off-by: Aleksandr Loktionov Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Przemek Kitszel Reviewed-by: Paul Menzel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Greg Kroah-Hartman --- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 99cab36fb107..a5f0c95cba8b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1626,8 +1626,8 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) { struct i40e_hw *hw = &pf->hw; struct i40e_vf *vf; - int i, v; u32 reg; + int i; /* If we don't have any VFs, then there is nothing to reset */ if (!pf->num_alloc_vfs) @@ -1638,11 +1638,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) return false; /* Begin reset on all VFs at once */ - for (v = 0; v < pf->num_alloc_vfs; v++) { - vf = &pf->vf[v]; + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* If VF is being reset no need to trigger reset again */ if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) - i40e_trigger_vf_reset(&pf->vf[v], flr); + i40e_trigger_vf_reset(vf, flr); } /* HW requires some time to make sure it can flush the FIFO for a VF @@ -1651,14 +1650,13 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) * the VFs using a simple iterator that increments once that VF has * finished resetting. */ - for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) { + for (i = 0, vf = &pf->vf[0]; i < 10 && vf < &pf->vf[pf->num_alloc_vfs]; ++i) { usleep_range(10000, 20000); /* Check each VF in sequence, beginning with the VF to fail * the previous check. */ - while (v < pf->num_alloc_vfs) { - vf = &pf->vf[v]; + while (vf < &pf->vf[pf->num_alloc_vfs]) { if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) { reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id)); if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK)) @@ -1668,7 +1666,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) /* If the current VF has finished resetting, move on * to the next VF in sequence. */ - v++; + ++vf; } } @@ -1678,39 +1676,39 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) /* Display a warning if at least one VF didn't manage to reset in * time, but continue on with the operation. */ - if (v < pf->num_alloc_vfs) + if (vf < &pf->vf[pf->num_alloc_vfs]) dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n", - pf->vf[v].vf_id); + vf->vf_id); usleep_range(10000, 20000); /* Begin disabling all the rings associated with VFs, but do not wait * between each VF. */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* On initial reset, we don't have any queues to disable */ - if (pf->vf[v].lan_vsi_idx == 0) + if (vf->lan_vsi_idx == 0) continue; /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]); + i40e_vsi_stop_rings_no_wait(pf->vsi[vf->lan_vsi_idx]); } /* Now that we've notified HW to disable all of the VF rings, wait * until they finish. */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* On initial reset, we don't have any queues to disable */ - if (pf->vf[v].lan_vsi_idx == 0) + if (vf->lan_vsi_idx == 0) continue; /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]); + i40e_vsi_wait_queues_disabled(pf->vsi[vf->lan_vsi_idx]); } /* Hw may need up to 50ms to finish disabling the RX queues. We @@ -1719,12 +1717,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) mdelay(50); /* Finish the reset on each VF */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { /* If VF is reset in another thread just continue */ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) continue; - i40e_cleanup_reset_vf(&pf->vf[v]); + i40e_cleanup_reset_vf(vf); } i40e_flush(hw); From 2990d8eacd2ccd545feec181b49867fa0e68591d Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 6 Oct 2023 13:50:20 -0500 Subject: [PATCH 069/553] drm/amd: Evict resources during PM ops prepare() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5095d5418193eb2748c7d8553c7150b8f1c44696 ] Linux PM core has a prepare() callback run before suspend. If the system is under high memory pressure, the resources may need to be evicted into swap instead. If the storage backing for swap is offlined during the suspend() step then such a call may fail. So move this step into prepare() to move evict majority of resources and update all non-pmops callers to call the same callback. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2362 Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Stable-dep-of: ca299b4512d4 ("drm/amd: Flush GFXOFF requests in prepare stage") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 ++++++--- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e636c7850f77..dd22d2559720 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1342,6 +1342,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, void amdgpu_driver_release_kms(struct drm_device *dev); int amdgpu_device_ip_suspend(struct amdgpu_device *adev); +int amdgpu_device_prepare(struct drm_device *dev); int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6a4749c0c5a5..902a446cc4d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1639,6 +1639,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, } else { pr_info("switched off\n"); dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; + amdgpu_device_prepare(dev); amdgpu_device_suspend(dev, true); amdgpu_device_cache_pci_state(pdev); /* Shut down the device */ @@ -4167,6 +4168,31 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) /* * Suspend & resume. */ +/** + * amdgpu_device_prepare - prepare for device suspend + * + * @dev: drm dev pointer + * + * Prepare to put the hw in the suspend state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver suspend. + */ +int amdgpu_device_prepare(struct drm_device *dev) +{ + struct amdgpu_device *adev = drm_to_adev(dev); + int r; + + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + return 0; + + /* Evict the majority of BOs before starting suspend sequence */ + r = amdgpu_device_evict_resources(adev); + if (r) + return r; + + return 0; +} + /** * amdgpu_device_suspend - initiate device suspend * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f24c3a20e901..9a5416331f02 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2391,8 +2391,9 @@ static int amdgpu_pmops_prepare(struct device *dev) /* Return a positive number here so * DPM_FLAG_SMART_SUSPEND works properly */ - if (amdgpu_device_supports_boco(drm_dev)) - return pm_runtime_suspended(dev); + if (amdgpu_device_supports_boco(drm_dev) && + pm_runtime_suspended(dev)) + return 1; /* if we will not support s3 or s2i for the device * then skip suspend @@ -2401,7 +2402,7 @@ static int amdgpu_pmops_prepare(struct device *dev) !amdgpu_acpi_is_s3_active(adev)) return 1; - return 0; + return amdgpu_device_prepare(drm_dev); } static void amdgpu_pmops_complete(struct device *dev) @@ -2600,6 +2601,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) if (amdgpu_device_supports_boco(drm_dev)) adev->mp1_state = PP_MP1_STATE_UNLOAD; + ret = amdgpu_device_prepare(drm_dev); + if (ret) + return ret; ret = amdgpu_device_suspend(drm_dev, false); if (ret) { adev->in_runpm = false; From 43df8e64dfb86940623bb23b2e20eb09b1791eaf Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 6 Oct 2023 13:50:21 -0500 Subject: [PATCH 070/553] drm/amd: Add concept of running prepare_suspend() sequence for IP blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cb11ca3233aa3303dc11dca25977d2e7f24be00f ] If any IP blocks allocate memory during their hw_fini() sequence this can cause the suspend to fail under memory pressure. Introduce a new phase that IP blocks can use to allocate memory before suspend starts so that it can potentially be evicted into swap instead. Reviewed-by: Christian König Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Stable-dep-of: ca299b4512d4 ("drm/amd: Flush GFXOFF requests in prepare stage") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++++++++++- drivers/gpu/drm/amd/include/amd_shared.h | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 902a446cc4d3..77e35b919b06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4180,7 +4180,7 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) int amdgpu_device_prepare(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); - int r; + int i, r; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; @@ -4190,6 +4190,16 @@ int amdgpu_device_prepare(struct drm_device *dev) if (r) return r; + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + if (!adev->ip_blocks[i].version->funcs->prepare_suspend) + continue; + r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); + if (r) + return r; + } + return 0; } diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index f175e65b853a..34467427c9f9 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -294,6 +294,7 @@ struct amd_ip_funcs { int (*hw_init)(void *handle); int (*hw_fini)(void *handle); void (*late_fini)(void *handle); + int (*prepare_suspend)(void *handle); int (*suspend)(void *handle); int (*resume)(void *handle); bool (*is_idle)(void *handle); From 810dd068ae3b3973b769bd52866ca3b9286b5254 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 20 Mar 2024 13:32:21 -0500 Subject: [PATCH 071/553] drm/amd: Flush GFXOFF requests in prepare stage [ Upstream commit ca299b4512d4b4f516732a48ce9aa19d91f4473e ] If the system hasn't entered GFXOFF when suspend starts it can cause hangs accessing GC and RLC during the suspend stage. Cc: # 6.1.y: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback") Cc: # 6.1.y: cb11ca3233aa ("drm/amd: Add concept of running prepare_suspend() sequence for IP blocks") Cc: # 6.1.y: 2ceec37b0e3d ("drm/amd: Add missing kernel doc for prepare_suspend()") Cc: # 6.1.y: 3a9626c816db ("drm/amd: Stop evicting resources on APUs in suspend") Cc: # 6.6.y: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback") Cc: # 6.6.y: cb11ca3233aa ("drm/amd: Add concept of running prepare_suspend() sequence for IP blocks") Cc: # 6.6.y: 2ceec37b0e3d ("drm/amd: Add missing kernel doc for prepare_suspend()") Cc: # 6.6.y: 3a9626c816db ("drm/amd: Stop evicting resources on APUs in suspend") Cc: # 6.1+ Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132 Fixes: ab4750332dbe ("drm/amdgpu/sdma5.2: add begin/end_use ring callbacks") Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 77e35b919b06..b11690a816e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4190,6 +4190,8 @@ int amdgpu_device_prepare(struct drm_device *dev) if (r) return r; + flush_delayed_work(&adev->gfx.gfx_off_delay_work); + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; From f3c2ceb847595d6c9f2b8bb9c786ba8027dbf1f6 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 7 Oct 2022 14:38:40 -0700 Subject: [PATCH 072/553] i40e: Store the irq number in i40e_q_vector [ Upstream commit 6b85a4f39ff7177b2428d4deab1151a31754e391 ] Make it easy to figure out the IRQ number for a particular i40e_q_vector by storing the assigned IRQ in the structure itself. Signed-off-by: Joe Damato Acked-by: Jesse Brandeburg Acked-by: Sridhar Samudrala Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Stable-dep-of: ea558de7238b ("i40e: Enforce software interrupt during busy-poll exit") Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 7d4cc4eafd59..59c4e9d64298 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -992,6 +992,7 @@ struct i40e_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; bool arm_wb_state; + int irq_num; /* IRQ assigned to this q_vector */ } ____cacheline_internodealigned_in_smp; /* lan device */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 5be56db1dafd..ee0d7c29e8f1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4145,6 +4145,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) } /* register for affinity change notifications */ + q_vector->irq_num = irq_num; q_vector->affinity_notify.notify = i40e_irq_affinity_notify; q_vector->affinity_notify.release = i40e_irq_affinity_release; irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); From 2f6953617d1c887524fbd66398b1d0a5efb1c289 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Mon, 13 Nov 2023 15:10:24 -0800 Subject: [PATCH 073/553] i40e: Remove _t suffix from enum type names [ Upstream commit addca9175e5f74cf29e8ad918c38c09b8663b5b8 ] Enum type names should not be suffixed by '_t'. Either to use 'typedef enum name name_t' to so plain 'name_t var' instead of 'enum name_t var'. Signed-off-by: Ivan Vecera Reviewed-by: Jacob Keller Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20231113231047.548659-6-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Stable-dep-of: ea558de7238b ("i40e: Enforce software interrupt during busy-poll exit") Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e.h | 4 ++-- drivers/net/ethernet/intel/i40e/i40e_ptp.c | 6 +++--- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 59c4e9d64298..35862dedd59b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -108,7 +108,7 @@ #define I40E_MAX_BW_INACTIVE_ACCUM 4 /* accumulate 4 credits max */ /* driver state flags */ -enum i40e_state_t { +enum i40e_state { __I40E_TESTING, __I40E_CONFIG_BUSY, __I40E_CONFIG_DONE, @@ -156,7 +156,7 @@ enum i40e_state_t { BIT_ULL(__I40E_PF_RESET_AND_REBUILD_REQUESTED) /* VSI state flags */ -enum i40e_vsi_state_t { +enum i40e_vsi_state { __I40E_VSI_DOWN, __I40E_VSI_NEEDS_RESTART, __I40E_VSI_SYNCING_FILTERS, diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 97a9efe7b713..5f2555848a69 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -34,7 +34,7 @@ enum i40e_ptp_pin { GPIO_4 }; -enum i40e_can_set_pins_t { +enum i40e_can_set_pins { CANT_DO_PINS = -1, CAN_SET_PINS, CAN_DO_PINS @@ -192,7 +192,7 @@ static bool i40e_is_ptp_pin_dev(struct i40e_hw *hw) * return CAN_DO_PINS if pins can be manipulated within a NIC or * return CANT_DO_PINS otherwise. **/ -static enum i40e_can_set_pins_t i40e_can_set_pins(struct i40e_pf *pf) +static enum i40e_can_set_pins i40e_can_set_pins(struct i40e_pf *pf) { if (!i40e_is_ptp_pin_dev(&pf->hw)) { dev_warn(&pf->pdev->dev, @@ -1081,7 +1081,7 @@ static void i40e_ptp_set_pins_hw(struct i40e_pf *pf) static int i40e_ptp_set_pins(struct i40e_pf *pf, struct i40e_ptp_pins_settings *pins) { - enum i40e_can_set_pins_t pin_caps = i40e_can_set_pins(pf); + enum i40e_can_set_pins pin_caps = i40e_can_set_pins(pf); int i = 0; if (pin_caps == CANT_DO_PINS) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 768290dc6f48..602ddd956245 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -57,7 +57,7 @@ static inline u16 i40e_intrl_usec_to_reg(int intrl) * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any * register but instead is a special value meaning "don't update" ITR0/1/2. */ -enum i40e_dyn_idx_t { +enum i40e_dyn_idx { I40E_IDX_ITR0 = 0, I40E_IDX_ITR1 = 1, I40E_IDX_ITR2 = 2, @@ -304,7 +304,7 @@ struct i40e_rx_queue_stats { u64 page_busy_count; }; -enum i40e_ring_state_t { +enum i40e_ring_state { __I40E_TX_FDIR_INIT_DONE, __I40E_TX_XPS_INIT_DONE, __I40E_RING_STATE_NBITS /* must be last */ From 095cfa2d9bde4ddf444fdf208ac3ace598a439de Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sat, 16 Mar 2024 12:38:29 +0100 Subject: [PATCH 074/553] i40e: Enforce software interrupt during busy-poll exit [ Upstream commit ea558de7238bb12c3435c47f0631e9d17bf4a09f ] As for ice bug fixed by commit b7306b42beaf ("ice: manage interrupts during poll exit") followed by commit 23be7075b318 ("ice: fix software generating extra interrupts") I'm seeing the similar issue also with i40e driver. In certain situation when busy-loop is enabled together with adaptive coalescing, the driver occasionally misses that there are outstanding descriptors to clean when exiting busy poll. Try to catch the remaining work by triggering a software interrupt when exiting busy poll. No extra interrupts will be generated when busy polling is not used. The issue was found when running sockperf ping-pong tcp test with adaptive coalescing and busy poll enabled (50 as value busy_pool and busy_read sysctl knobs) and results in huge latency spikes with more than 100000us. The fix is inspired from the ice driver and do the following: 1) During napi poll exit in case of busy-poll (napo_complete_done() returns false) this is recorded to q_vector that we were in busy loop. 2) Extends i40e_buildreg_itr() to be able to add an enforced software interrupt into built value 2) In i40e_update_enable_itr() enforces a software interrupt trigger if we are exiting busy poll to catch any pending clean-ups 3) Reuses unused 3rd ITR (interrupt throttle) index and set it to 20K interrupts per second to limit the number of these sw interrupts. Test results ============ Prior: [root@dell-per640-07 net]# sockperf ping-pong -i 10.9.9.1 --tcp -m 1000 --mps=max -t 120 sockperf: == version #3.10-no.git == sockperf[CLIENT] send on:sockperf: using recvfrom() to block on socket(s) [ 0] IP = 10.9.9.1 PORT = 11111 # TCP sockperf: Warmup stage (sending a few dummy messages)... sockperf: Starting test... sockperf: Test end (interrupted by timer) sockperf: Test ended sockperf: [Total Run] RunTime=119.999 sec; Warm up time=400 msec; SentMessages=2438563; ReceivedMessages=2438562 sockperf: ========= Printing statistics for Server No: 0 sockperf: [Valid Duration] RunTime=119.549 sec; SentMessages=2429473; ReceivedMessages=2429473 sockperf: ====> avg-latency=24.571 (std-dev=93.297, mean-ad=4.904, median-ad=1.510, siqr=1.063, cv=3.797, std-error=0.060, 99.0% ci=[24.417, 24.725]) sockperf: # dropped messages = 0; # duplicated messages = 0; # out-of-order messages = 0 sockperf: Summary: Latency is 24.571 usec sockperf: Total 2429473 observations; each percentile contains 24294.73 observations sockperf: ---> observation = 103294.331 sockperf: ---> percentile 99.999 = 45.633 sockperf: ---> percentile 99.990 = 37.013 sockperf: ---> percentile 99.900 = 35.910 sockperf: ---> percentile 99.000 = 33.390 sockperf: ---> percentile 90.000 = 28.626 sockperf: ---> percentile 75.000 = 27.741 sockperf: ---> percentile 50.000 = 26.743 sockperf: ---> percentile 25.000 = 25.614 sockperf: ---> observation = 12.220 After: [root@dell-per640-07 net]# sockperf ping-pong -i 10.9.9.1 --tcp -m 1000 --mps=max -t 120 sockperf: == version #3.10-no.git == sockperf[CLIENT] send on:sockperf: using recvfrom() to block on socket(s) [ 0] IP = 10.9.9.1 PORT = 11111 # TCP sockperf: Warmup stage (sending a few dummy messages)... sockperf: Starting test... sockperf: Test end (interrupted by timer) sockperf: Test ended sockperf: [Total Run] RunTime=119.999 sec; Warm up time=400 msec; SentMessages=2400055; ReceivedMessages=2400054 sockperf: ========= Printing statistics for Server No: 0 sockperf: [Valid Duration] RunTime=119.549 sec; SentMessages=2391186; ReceivedMessages=2391186 sockperf: ====> avg-latency=24.965 (std-dev=5.934, mean-ad=4.642, median-ad=1.485, siqr=1.067, cv=0.238, std-error=0.004, 99.0% ci=[24.955, 24.975]) sockperf: # dropped messages = 0; # duplicated messages = 0; # out-of-order messages = 0 sockperf: Summary: Latency is 24.965 usec sockperf: Total 2391186 observations; each percentile contains 23911.86 observations sockperf: ---> observation = 195.841 sockperf: ---> percentile 99.999 = 45.026 sockperf: ---> percentile 99.990 = 39.009 sockperf: ---> percentile 99.900 = 35.922 sockperf: ---> percentile 99.000 = 33.482 sockperf: ---> percentile 90.000 = 28.902 sockperf: ---> percentile 75.000 = 27.821 sockperf: ---> percentile 50.000 = 26.860 sockperf: ---> percentile 25.000 = 25.685 sockperf: ---> observation = 12.277 Fixes: 0bcd952feec7 ("ethernet/intel: consolidate NAPI and NAPI exit") Reported-by: Hugo Ferreira Reviewed-by: Michal Schmidt Signed-off-by: Ivan Vecera Reviewed-by: Jesse Brandeburg Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c | 6 ++ .../net/ethernet/intel/i40e/i40e_register.h | 3 + drivers/net/ethernet/intel/i40e/i40e_txrx.c | 82 ++++++++++++++----- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 1 + 5 files changed, 72 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 35862dedd59b..5293fc00938c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -992,6 +992,7 @@ struct i40e_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; bool arm_wb_state; + bool in_busy_poll; int irq_num; /* IRQ assigned to this q_vector */ } ____cacheline_internodealigned_in_smp; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ee0d7c29e8f1..a9db1ed74d3f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3891,6 +3891,12 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) q_vector->tx.target_itr >> 1); q_vector->tx.current_itr = q_vector->tx.target_itr; + /* Set ITR for software interrupts triggered after exiting + * busy-loop polling. + */ + wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1), + I40E_ITR_20K); + wr32(hw, I40E_PFINT_RATEN(vector - 1), i40e_intrl_usec_to_reg(vsi->int_rate_limit)); diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 7339003aa17c..694cb3e45c1e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -328,8 +328,11 @@ #define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3 #define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) #define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5 +#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT) #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24 #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT) +#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25 +#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT) #define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */ #define I40E_PFINT_ICR0_INTEVENT_SHIFT 0 #define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 94cf82668efa..3d83fccf742b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2571,7 +2571,22 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) return failure ? budget : (int)total_rx_packets; } -static inline u32 i40e_buildreg_itr(const int type, u16 itr) +/** + * i40e_buildreg_itr - build a value for writing to I40E_PFINT_DYN_CTLN register + * @itr_idx: interrupt throttling index + * @interval: interrupt throttling interval value in usecs + * @force_swint: force software interrupt + * + * The function builds a value for I40E_PFINT_DYN_CTLN register that + * is used to update interrupt throttling interval for specified ITR index + * and optionally enforces a software interrupt. If the @itr_idx is equal + * to I40E_ITR_NONE then no interval change is applied and only @force_swint + * parameter is taken into account. If the interval change and enforced + * software interrupt are not requested then the built value just enables + * appropriate vector interrupt. + **/ +static u32 i40e_buildreg_itr(enum i40e_dyn_idx itr_idx, u16 interval, + bool force_swint) { u32 val; @@ -2585,23 +2600,33 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr) * an event in the PBA anyway so we need to rely on the automask * to hold pending events for us until the interrupt is re-enabled * - * The itr value is reported in microseconds, and the register - * value is recorded in 2 microsecond units. For this reason we - * only need to shift by the interval shift - 1 instead of the - * full value. + * We have to shift the given value as it is reported in microseconds + * and the register value is recorded in 2 microsecond units. */ - itr &= I40E_ITR_MASK; + interval >>= 1; + /* 1. Enable vector interrupt + * 2. Update the interval for the specified ITR index + * (I40E_ITR_NONE in the register is used to indicate that + * no interval update is requested) + */ val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | - (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1)); + FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX_MASK, itr_idx) | + FIELD_PREP(I40E_PFINT_DYN_CTLN_INTERVAL_MASK, interval); + + /* 3. Enforce software interrupt trigger if requested + * (These software interrupts rate is limited by ITR2 that is + * set to 20K interrupts per second) + */ + if (force_swint) + val |= I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | + I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK | + FIELD_PREP(I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK, + I40E_SW_ITR); return val; } -/* a small macro to shorten up some long lines */ -#define INTREG I40E_PFINT_DYN_CTLN - /* The act of updating the ITR will cause it to immediately trigger. In order * to prevent this from throwing off adaptive update statistics we defer the * update so that it can only happen so often. So after either Tx or Rx are @@ -2620,8 +2645,10 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr) static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { + enum i40e_dyn_idx itr_idx = I40E_ITR_NONE; struct i40e_hw *hw = &vsi->back->hw; - u32 intval; + u16 interval = 0; + u32 itr_val; /* If we don't have MSIX, then we only need to re-enable icr0 */ if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) { @@ -2643,8 +2670,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ if (q_vector->rx.target_itr < q_vector->rx.current_itr) { /* Rx ITR needs to be reduced, this is highest priority */ - intval = i40e_buildreg_itr(I40E_RX_ITR, - q_vector->rx.target_itr); + itr_idx = I40E_RX_ITR; + interval = q_vector->rx.target_itr; q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || @@ -2653,25 +2680,36 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, /* Tx ITR needs to be reduced, this is second priority * Tx ITR needs to be increased more than Rx, fourth priority */ - intval = i40e_buildreg_itr(I40E_TX_ITR, - q_vector->tx.target_itr); + itr_idx = I40E_TX_ITR; + interval = q_vector->tx.target_itr; q_vector->tx.current_itr = q_vector->tx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { /* Rx ITR needs to be increased, third priority */ - intval = i40e_buildreg_itr(I40E_RX_ITR, - q_vector->rx.target_itr); + itr_idx = I40E_RX_ITR; + interval = q_vector->rx.target_itr; q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->itr_countdown = ITR_COUNTDOWN_START; } else { /* No ITR update, lowest priority */ - intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); if (q_vector->itr_countdown) q_vector->itr_countdown--; } - if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - wr32(hw, INTREG(q_vector->reg_idx), intval); + /* Do not update interrupt control register if VSI is down */ + if (test_bit(__I40E_VSI_DOWN, vsi->state)) + return; + + /* Update ITR interval if necessary and enforce software interrupt + * if we are exiting busy poll. + */ + if (q_vector->in_busy_poll) { + itr_val = i40e_buildreg_itr(itr_idx, interval, true); + q_vector->in_busy_poll = false; + } else { + itr_val = i40e_buildreg_itr(itr_idx, interval, false); + } + wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->reg_idx), itr_val); } /** @@ -2778,6 +2816,8 @@ tx_only: */ if (likely(napi_complete_done(napi, work_done))) i40e_update_enable_itr(vsi, q_vector); + else + q_vector->in_busy_poll = true; return min(work_done, budget - 1); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 602ddd956245..6e567d343e03 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -67,6 +67,7 @@ enum i40e_dyn_idx { /* these are indexes into ITRN registers */ #define I40E_RX_ITR I40E_IDX_ITR0 #define I40E_TX_ITR I40E_IDX_ITR1 +#define I40E_SW_ITR I40E_IDX_ITR2 /* Supported RSS offloads */ #define I40E_DEFAULT_RSS_HENA ( \ From a33b7cb184921487ae7b8b739bcdc7d0266d2eb7 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 6 Mar 2023 22:23:15 +0100 Subject: [PATCH 075/553] r8169: use spinlock to protect mac ocp register access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 91c8643578a21e435c412ffbe902bb4b4773e262 ] For disabling ASPM during NAPI poll we'll have to access mac ocp registers in atomic context. This could result in races because a mac ocp read consists of a write to register OCPDR, followed by a read from the same register. Therefore add a spinlock to protect access to mac ocp registers. Reviewed-by: Simon Horman Tested-by: Kai-Heng Feng Tested-by: Holger Hoffstätte Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Stable-dep-of: 5e864d90b208 ("r8169: skip DASH fw status checks when DASH is disabled") Signed-off-by: Sasha Levin --- drivers/net/ethernet/realtek/r8169_main.c | 37 ++++++++++++++++++++--- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 3dbcb311dcbf..c7dd0eb94817 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -615,6 +615,8 @@ struct rtl8169_private { struct work_struct work; } wk; + spinlock_t mac_ocp_lock; + unsigned supports_gmii:1; unsigned aspm_manageable:1; unsigned dash_enabled:1; @@ -850,7 +852,7 @@ static int r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg) (RTL_R32(tp, GPHY_OCP) & 0xffff) : -ETIMEDOUT; } -static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data) +static void __r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data) { if (rtl_ocp_reg_failure(reg)) return; @@ -858,7 +860,16 @@ static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data) RTL_W32(tp, OCPDR, OCPAR_FLAG | (reg << 15) | data); } -static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg) +static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data) +{ + unsigned long flags; + + spin_lock_irqsave(&tp->mac_ocp_lock, flags); + __r8168_mac_ocp_write(tp, reg, data); + spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); +} + +static u16 __r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg) { if (rtl_ocp_reg_failure(reg)) return 0; @@ -868,12 +879,28 @@ static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg) return RTL_R32(tp, OCPDR); } +static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg) +{ + unsigned long flags; + u16 val; + + spin_lock_irqsave(&tp->mac_ocp_lock, flags); + val = __r8168_mac_ocp_read(tp, reg); + spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); + + return val; +} + static void r8168_mac_ocp_modify(struct rtl8169_private *tp, u32 reg, u16 mask, u16 set) { - u16 data = r8168_mac_ocp_read(tp, reg); + unsigned long flags; + u16 data; - r8168_mac_ocp_write(tp, reg, (data & ~mask) | set); + spin_lock_irqsave(&tp->mac_ocp_lock, flags); + data = __r8168_mac_ocp_read(tp, reg); + __r8168_mac_ocp_write(tp, reg, (data & ~mask) | set); + spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); } /* Work around a hw issue with RTL8168g PHY, the quirk disables @@ -5232,6 +5259,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->eee_adv = -1; tp->ocp_base = OCP_STD_PHY_BASE; + spin_lock_init(&tp->mac_ocp_lock); + dev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev, struct pcpu_sw_netstats); if (!dev->tstats) From 9109472e7102f58b915e473dabc8b7961a39b3f7 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 6 Mar 2023 22:24:00 +0100 Subject: [PATCH 076/553] r8169: use spinlock to protect access to registers Config2 and Config5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6bc6c4e6893ee79a9862c61d1635e7da6d5a3333 ] For disabling ASPM during NAPI poll we'll have to access both registers in atomic context. Use a spinlock to protect access. Reviewed-by: Simon Horman Tested-by: Kai-Heng Feng Tested-by: Holger Hoffstätte Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Stable-dep-of: 5e864d90b208 ("r8169: skip DASH fw status checks when DASH is disabled") Signed-off-by: Sasha Levin --- drivers/net/ethernet/realtek/r8169_main.c | 47 ++++++++++++++++++----- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index c7dd0eb94817..4a1710b2726c 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -615,6 +615,7 @@ struct rtl8169_private { struct work_struct work; } wk; + spinlock_t config25_lock; spinlock_t mac_ocp_lock; unsigned supports_gmii:1; @@ -680,6 +681,28 @@ static void rtl_pci_commit(struct rtl8169_private *tp) RTL_R8(tp, ChipCmd); } +static void rtl_mod_config2(struct rtl8169_private *tp, u8 clear, u8 set) +{ + unsigned long flags; + u8 val; + + spin_lock_irqsave(&tp->config25_lock, flags); + val = RTL_R8(tp, Config2); + RTL_W8(tp, Config2, (val & ~clear) | set); + spin_unlock_irqrestore(&tp->config25_lock, flags); +} + +static void rtl_mod_config5(struct rtl8169_private *tp, u8 clear, u8 set) +{ + unsigned long flags; + u8 val; + + spin_lock_irqsave(&tp->config25_lock, flags); + val = RTL_R8(tp, Config5); + RTL_W8(tp, Config5, (val & ~clear) | set); + spin_unlock_irqrestore(&tp->config25_lock, flags); +} + static bool rtl_is_8125(struct rtl8169_private *tp) { return tp->mac_version >= RTL_GIGA_MAC_VER_61; @@ -1401,6 +1424,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) { WAKE_MAGIC, Config3, MagicPacket } }; unsigned int i, tmp = ARRAY_SIZE(cfg); + unsigned long flags; u8 options; rtl_unlock_config_regs(tp); @@ -1419,12 +1443,14 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) r8168_mac_ocp_modify(tp, 0xc0b6, BIT(0), 0); } + spin_lock_irqsave(&tp->config25_lock, flags); for (i = 0; i < tmp; i++) { options = RTL_R8(tp, cfg[i].reg) & ~cfg[i].mask; if (wolopts & cfg[i].opt) options |= cfg[i].mask; RTL_W8(tp, cfg[i].reg, options); } + spin_unlock_irqrestore(&tp->config25_lock, flags); switch (tp->mac_version) { case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06: @@ -1436,10 +1462,10 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) case RTL_GIGA_MAC_VER_34: case RTL_GIGA_MAC_VER_37: case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_63: - options = RTL_R8(tp, Config2) & ~PME_SIGNAL; if (wolopts) - options |= PME_SIGNAL; - RTL_W8(tp, Config2, options); + rtl_mod_config2(tp, 0, PME_SIGNAL); + else + rtl_mod_config2(tp, PME_SIGNAL, 0); break; default: break; @@ -2748,8 +2774,8 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) { /* Don't enable ASPM in the chip if OS can't control ASPM */ if (enable && tp->aspm_manageable) { - RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en); - RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn); + rtl_mod_config5(tp, 0, ASPM_en); + rtl_mod_config2(tp, 0, ClkReqEn); switch (tp->mac_version) { case RTL_GIGA_MAC_VER_46 ... RTL_GIGA_MAC_VER_48: @@ -2772,8 +2798,8 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) break; } - RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn); - RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en); + rtl_mod_config2(tp, ClkReqEn, 0); + rtl_mod_config5(tp, ASPM_en, 0); } udelay(10); @@ -2934,7 +2960,7 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp) RTL_W32(tp, MISC, RTL_R32(tp, MISC) | TXPLA_RST); RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~TXPLA_RST); - RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en); + rtl_mod_config5(tp, Spi_en, 0); } static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) @@ -2967,7 +2993,7 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN); RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN); - RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en); + rtl_mod_config5(tp, Spi_en, 0); rtl_hw_aspm_clkreq_enable(tp, true); } @@ -2990,7 +3016,7 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp) RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB); RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN); RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN); - RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en); + rtl_mod_config5(tp, Spi_en, 0); rtl8168_config_eee_mac(tp); } @@ -5259,6 +5285,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->eee_adv = -1; tp->ocp_base = OCP_STD_PHY_BASE; + spin_lock_init(&tp->config25_lock); spin_lock_init(&tp->mac_ocp_lock); dev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev, From 77db987b47b7c93d6e7eadec5c211f3afcea42dd Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 6 Mar 2023 22:25:49 +0100 Subject: [PATCH 077/553] r8169: prepare rtl_hw_aspm_clkreq_enable for usage in atomic context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 49ef7d846d4bd77b0b9f1f801fc765b004690a07 ] Bail out if the function is used with chip versions that don't support ASPM configuration. In addition remove the delay, it tuned out that it's not needed, also vendor driver r8125 doesn't have it. Suggested-by: Kai-Heng Feng Reviewed-by: Simon Horman Tested-by: Kai-Heng Feng Tested-by: Holger Hoffstätte Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Stable-dep-of: 5e864d90b208 ("r8169: skip DASH fw status checks when DASH is disabled") Signed-off-by: Sasha Levin --- drivers/net/ethernet/realtek/r8169_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 4a1710b2726c..256630f57ffe 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2772,6 +2772,9 @@ static void rtl_disable_exit_l1(struct rtl8169_private *tp) static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) { + if (tp->mac_version < RTL_GIGA_MAC_VER_32) + return; + /* Don't enable ASPM in the chip if OS can't control ASPM */ if (enable && tp->aspm_manageable) { rtl_mod_config5(tp, 0, ASPM_en); @@ -2801,8 +2804,6 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) rtl_mod_config2(tp, ClkReqEn, 0); rtl_mod_config5(tp, ASPM_en, 0); } - - udelay(10); } static void rtl_set_fifo_size(struct rtl8169_private *tp, u16 rx_stat, From 4eed9d0a4816d81bf6b009a8d3a05f3817b9463e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 26 Mar 2024 13:42:45 -0700 Subject: [PATCH 078/553] tcp: Fix bind() regression for v6-only wildcard and v4(-mapped-v6) non-wildcard addresses. [ Upstream commit d91ef1e1b55f730bee8ce286b02b7bdccbc42973 ] Jianguo Wu reported another bind() regression introduced by bhash2. Calling bind() for the following 3 addresses on the same port, the 3rd one should fail but now succeeds. 1. 0.0.0.0 or ::ffff:0.0.0.0 2. [::] w/ IPV6_V6ONLY 3. IPv4 non-wildcard address or v4-mapped-v6 non-wildcard address The first two bind() create tb2 like this: bhash2 -> tb2(:: w/ IPV6_V6ONLY) -> tb2(0.0.0.0) The 3rd bind() will match with the IPv6 only wildcard address bucket in inet_bind2_bucket_match_addr_any(), however, no conflicting socket exists in the bucket. So, inet_bhash2_conflict() will returns false, and thus, inet_bhash2_addr_any_conflict() returns false consequently. As a result, the 3rd bind() bypasses conflict check, which should be done against the IPv4 wildcard address bucket. So, in inet_bhash2_addr_any_conflict(), we must iterate over all buckets. Note that we cannot add ipv6_only flag for inet_bind2_bucket as it would confuse the following patetrn. 1. [::] w/ SO_REUSE{ADDR,PORT} and IPV6_V6ONLY 2. [::] w/ SO_REUSE{ADDR,PORT} 3. IPv4 non-wildcard address or v4-mapped-v6 non-wildcard address The first bind() would create a bucket with ipv6_only flag true, the second bind() would add the [::] socket into the same bucket, and the third bind() could succeed based on the wrong assumption that ipv6_only bucket would not conflict with v4(-mapped-v6) address. Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address") Diagnosed-by: Jianguo Wu Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240326204251.51301-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/inet_connection_sock.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f7832d425382..8407098a5939 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -289,6 +289,7 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l struct sock_reuseport *reuseport_cb; struct inet_bind_hashbucket *head2; struct inet_bind2_bucket *tb2; + bool conflict = false; bool reuseport_cb_ok; rcu_read_lock(); @@ -301,18 +302,20 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l spin_lock(&head2->lock); - inet_bind_bucket_for_each(tb2, &head2->chain) - if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk)) - break; + inet_bind_bucket_for_each(tb2, &head2->chain) { + if (!inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk)) + continue; - if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, - reuseport_ok)) { - spin_unlock(&head2->lock); - return true; + if (!inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok)) + continue; + + conflict = true; + break; } spin_unlock(&head2->lock); - return false; + + return conflict; } /* From c1781222079cac0510a276194ca0a18fe12b8a80 Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Thu, 27 Oct 2022 21:45:02 +0300 Subject: [PATCH 079/553] drivers: net: convert to boolean for the mac_managed_pm flag [ Upstream commit eca485d22165695587bed02d8b9d0f7f44246c4a ] Signed-off-by: Dennis Kirjanov Signed-off-by: David S. Miller Stable-dep-of: cbc17e7802f5 ("net: fec: Set mac_managed_pm during probe") Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fec_main.c | 2 +- drivers/net/ethernet/realtek/r8169_main.c | 2 +- drivers/net/usb/asix_devices.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 97d12c7eea77..51eb30c20c7c 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2236,7 +2236,7 @@ static int fec_enet_mii_probe(struct net_device *ndev) fep->link = 0; fep->full_duplex = 0; - phy_dev->mac_managed_pm = 1; + phy_dev->mac_managed_pm = true; phy_attached_info(phy_dev); diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 256630f57ffe..6e3417712e40 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5148,7 +5148,7 @@ static int r8169_mdio_register(struct rtl8169_private *tp) return -EUNATCH; } - tp->phydev->mac_managed_pm = 1; + tp->phydev->mac_managed_pm = true; phy_support_asym_pause(tp->phydev); diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 6eacbf17f1c0..34cd568b27f1 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -714,7 +714,7 @@ static int ax88772_init_phy(struct usbnet *dev) } phy_suspend(priv->phydev); - priv->phydev->mac_managed_pm = 1; + priv->phydev->mac_managed_pm = true; phy_attached_info(priv->phydev); @@ -734,7 +734,7 @@ static int ax88772_init_phy(struct usbnet *dev) return -ENODEV; } - priv->phydev_int->mac_managed_pm = 1; + priv->phydev_int->mac_managed_pm = true; phy_suspend(priv->phydev_int); return 0; From b3608fe28fab298ee2bcdde26bdaf9c179e8c69f Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 28 Mar 2024 15:59:29 +0000 Subject: [PATCH 080/553] net: fec: Set mac_managed_pm during probe [ Upstream commit cbc17e7802f5de37c7c262204baadfad3f7f99e5 ] Setting mac_managed_pm during interface up is too late. In situations where the link is not brought up yet and the system suspends the regular PHY power management will run. Since the FEC ETHEREN control bit is cleared (automatically) on suspend the controller is off in resume. When the regular PHY power management resume path runs in this context it will write to the MII_DATA register but nothing will be transmitted on the MDIO bus. This can be observed by the following log: fec 5b040000.ethernet eth0: MDIO read timeout Microchip LAN87xx T1 5b040000.ethernet-1:04: PM: dpm_run_callback(): mdio_bus_phy_resume+0x0/0xc8 returns -110 Microchip LAN87xx T1 5b040000.ethernet-1:04: PM: failed to resume: error -110 The data written will however remain in the MII_DATA register. When the link later is set to administrative up it will trigger a call to fec_restart() which will restore the MII_SPEED register. This triggers the quirk explained in f166f890c8f0 ("net: ethernet: fec: Replace interrupt driven MDIO with polled IO") causing an extra MII_EVENT. This extra event desynchronizes all the MDIO register reads, causing them to complete too early. Leading all reads to read as 0 because fec_enet_mdio_wait() returns too early. When a Microchip LAN8700R PHY is connected to the FEC, the 0 reads causes the PHY to be initialized incorrectly and the PHY will not transmit any ethernet signal in this state. It cannot be brought out of this state without a power cycle of the PHY. Fixes: 557d5dc83f68 ("net: fec: use mac-managed PHY PM") Closes: https://lore.kernel.org/netdev/1f45bdbe-eab1-4e59-8f24-add177590d27@actia.se/ Signed-off-by: Wei Fang [jernberg: commit message] Signed-off-by: John Ernberg Link: https://lore.kernel.org/r/20240328155909.59613-2-john.ernberg@actia.se Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fec_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 51eb30c20c7c..ebff14b0837d 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2236,8 +2236,6 @@ static int fec_enet_mii_probe(struct net_device *ndev) fep->link = 0; fep->full_duplex = 0; - phy_dev->mac_managed_pm = true; - phy_attached_info(phy_dev); return 0; @@ -2249,10 +2247,12 @@ static int fec_enet_mii_init(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); bool suppress_preamble = false; + struct phy_device *phydev; struct device_node *node; int err = -ENXIO; u32 mii_speed, holdtime; u32 bus_freq; + int addr; /* * The i.MX28 dual fec interfaces are not equal. @@ -2362,6 +2362,13 @@ static int fec_enet_mii_init(struct platform_device *pdev) goto err_out_free_mdiobus; of_node_put(node); + /* find all the PHY devices on the bus and set mac_managed_pm to true */ + for (addr = 0; addr < PHY_MAX_ADDR; addr++) { + phydev = mdiobus_get_phy(fep->mii_bus, addr); + if (phydev) + phydev->mac_managed_pm = true; + } + mii_cnt++; /* save fec0 mii_bus */ From 265a0fc55f137823c10005c4bce2d235d3a4f4ab Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 2 Feb 2024 10:41:22 +0200 Subject: [PATCH 081/553] net: ravb: Let IP-specific receive function to interrogate descriptors [ Upstream commit 2b993bfdb47b3aaafd8fe9cd5038b5e297b18ee1 ] ravb_poll() initial code used to interrogate the first descriptor of the RX queue in case gPTP is false to determine if ravb_rx() should be called. This is done for non-gPTP IPs. For gPTP IPs the driver PTP-specific information was used to determine if receive function should be called. As every IP has its own receive function that interrogates the RX descriptors list in the same way the ravb_poll() was doing there is no need to double check this in ravb_poll(). Removing the code from ravb_poll() leads to a cleaner code. Signed-off-by: Claudiu Beznea Reviewed-by: Sergey Shtylyov Signed-off-by: Paolo Abeni Stable-dep-of: 596a4254915f ("net: ravb: Always process TX descriptor ring") Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index e7b70006261f..36e2718c564a 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1290,25 +1290,16 @@ static int ravb_poll(struct napi_struct *napi, int budget) struct net_device *ndev = napi->dev; struct ravb_private *priv = netdev_priv(ndev); const struct ravb_hw_info *info = priv->info; - bool gptp = info->gptp || info->ccc_gac; - struct ravb_rx_desc *desc; unsigned long flags; int q = napi - priv->napi; int mask = BIT(q); int quota = budget; - unsigned int entry; - if (!gptp) { - entry = priv->cur_rx[q] % priv->num_rx_ring[q]; - desc = &priv->gbeth_rx_ring[entry]; - } /* Processing RX Descriptor Ring */ /* Clear RX interrupt */ ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0); - if (gptp || desc->die_dt != DT_FEMPTY) { - if (ravb_rx(ndev, "a, q)) - goto out; - } + if (ravb_rx(ndev, "a, q)) + goto out; /* Processing TX Descriptor Ring */ spin_lock_irqsave(&priv->lock, flags); From f9690dfa181434d828f7583e83d3176116f88566 Mon Sep 17 00:00:00 2001 From: Paul Barker Date: Tue, 2 Apr 2024 15:53:04 +0100 Subject: [PATCH 082/553] net: ravb: Always process TX descriptor ring [ Upstream commit 596a4254915f94c927217fe09c33a6828f33fb25 ] The TX queue should be serviced each time the poll function is called, even if the full RX work budget has been consumed. This prevents starvation of the TX queue when RX bandwidth usage is high. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Paul Barker Reviewed-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20240402145305.82148-1-paul.barker.ct@bp.renesas.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 36e2718c564a..b11cc365d19e 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1294,12 +1294,12 @@ static int ravb_poll(struct napi_struct *napi, int budget) int q = napi - priv->napi; int mask = BIT(q); int quota = budget; + bool unmask; /* Processing RX Descriptor Ring */ /* Clear RX interrupt */ ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0); - if (ravb_rx(ndev, "a, q)) - goto out; + unmask = !ravb_rx(ndev, "a, q); /* Processing TX Descriptor Ring */ spin_lock_irqsave(&priv->lock, flags); @@ -1309,6 +1309,9 @@ static int ravb_poll(struct napi_struct *napi, int budget) netif_wake_subqueue(ndev, q); spin_unlock_irqrestore(&priv->lock, flags); + if (!unmask) + goto out; + napi_complete(napi); /* Re-enable RX/TX interrupts */ From f089d4554a0b22860bfb1b8392f1aed32033245f Mon Sep 17 00:00:00 2001 From: Paul Barker Date: Tue, 2 Apr 2024 15:53:05 +0100 Subject: [PATCH 083/553] net: ravb: Always update error counters [ Upstream commit 101b76418d7163240bc74a7e06867dca0e51183e ] The error statistics should be updated each time the poll function is called, even if the full RX work budget has been consumed. This prevents the counts from becoming stuck when RX bandwidth usage is high. This also ensures that error counters are not updated after we've re-enabled interrupts as that could result in a race condition. Also drop an unnecessary space. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Paul Barker Reviewed-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20240402145305.82148-2-paul.barker.ct@bp.renesas.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index b11cc365d19e..756ac4a07f60 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1309,6 +1309,15 @@ static int ravb_poll(struct napi_struct *napi, int budget) netif_wake_subqueue(ndev, q); spin_unlock_irqrestore(&priv->lock, flags); + /* Receive error message handling */ + priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors; + if (info->nc_queues) + priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors; + if (priv->rx_over_errors != ndev->stats.rx_over_errors) + ndev->stats.rx_over_errors = priv->rx_over_errors; + if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) + ndev->stats.rx_fifo_errors = priv->rx_fifo_errors; + if (!unmask) goto out; @@ -1325,14 +1334,6 @@ static int ravb_poll(struct napi_struct *napi, int budget) } spin_unlock_irqrestore(&priv->lock, flags); - /* Receive error message handling */ - priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors; - if (info->nc_queues) - priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors; - if (priv->rx_over_errors != ndev->stats.rx_over_errors) - ndev->stats.rx_over_errors = priv->rx_over_errors; - if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) - ndev->stats.rx_fifo_errors = priv->rx_fifo_errors; out: return budget - quota; } From 2f7efda53a0a8b462fdd217e732147d7611befe2 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 22 May 2023 18:12:48 +0200 Subject: [PATCH 084/553] KVM: SVM: enhance info printk's in SEV init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6d1bc9754b04075d938b47cf7f7800814b8911a7 ] Let's print available ASID ranges for SEV/SEV-ES guests. This information can be useful for system administrator to debug if SEV/SEV-ES fails to enable. There are a few reasons. SEV: - NPT is disabled (module parameter) - CPU lacks some features (sev, decodeassists) - Maximum SEV ASID is 0 SEV-ES: - mmio_caching is disabled (module parameter) - CPU lacks sev_es feature - Minimum SEV ASID value is 1 (can be adjusted in BIOS/UEFI) Cc: Sean Christopherson Cc: Paolo Bonzini Cc: Stéphane Graber Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Suggested-by: Sean Christopherson Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20230522161249.800829-3-aleksandr.mikhalitsyn@canonical.com [sean: print '0' for min SEV-ES ASID if there are no available ASIDs] Signed-off-by: Sean Christopherson Stable-dep-of: 0aa6b90ef9d7 ("KVM: SVM: Add support for allowing zero SEV ASIDs") Signed-off-by: Sasha Levin --- arch/x86/kvm/svm/sev.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 3dc0ee1fe9db..1fe9257d87b2 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2217,7 +2217,6 @@ void __init sev_hardware_setup(void) if (misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)) goto out; - pr_info("SEV supported: %u ASIDs\n", sev_asid_count); sev_supported = true; /* SEV-ES support requested? */ @@ -2245,10 +2244,18 @@ void __init sev_hardware_setup(void) if (misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count)) goto out; - pr_info("SEV-ES supported: %u ASIDs\n", sev_es_asid_count); sev_es_supported = true; out: + if (boot_cpu_has(X86_FEATURE_SEV)) + pr_info("SEV %s (ASIDs %u - %u)\n", + sev_supported ? "enabled" : "disabled", + min_sev_asid, max_sev_asid); + if (boot_cpu_has(X86_FEATURE_SEV_ES)) + pr_info("SEV-ES %s (ASIDs %u - %u)\n", + sev_es_supported ? "enabled" : "disabled", + min_sev_asid > 1 ? 1 : 0, min_sev_asid - 1); + sev_enabled = sev_supported; sev_es_enabled = sev_es_supported; #endif From 815c2a1c432b072c7cb9f9fde69cbf093ecd34b0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 6 Jun 2023 17:44:49 -0700 Subject: [PATCH 085/553] KVM: SVM: WARN, but continue, if misc_cg_set_capacity() fails [ Upstream commit 106ed2cad9f7bd803bd31a18fe7a9219b077bf95 ] WARN and continue if misc_cg_set_capacity() fails, as the only scenario in which it can fail is if the specified resource is invalid, which should never happen when CONFIG_KVM_AMD_SEV=y. Deliberately not bailing "fixes" a theoretical bug where KVM would leak the ASID bitmaps on failure, which again can't happen. If the impossible should happen, the end result is effectively the same with respect to SEV and SEV-ES (they are unusable), while continuing on has the advantage of letting KVM load, i.e. userspace can still run non-SEV guests. Reported-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20230607004449.1421131-1-seanjc@google.com Signed-off-by: Sean Christopherson Stable-dep-of: 0aa6b90ef9d7 ("KVM: SVM: Add support for allowing zero SEV ASIDs") Signed-off-by: Sasha Levin --- arch/x86/kvm/svm/sev.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 1fe9257d87b2..0316fdf5040f 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2214,9 +2214,7 @@ void __init sev_hardware_setup(void) } sev_asid_count = max_sev_asid - min_sev_asid + 1; - if (misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)) - goto out; - + WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); sev_supported = true; /* SEV-ES support requested? */ @@ -2241,9 +2239,7 @@ void __init sev_hardware_setup(void) goto out; sev_es_asid_count = min_sev_asid - 1; - if (misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count)) - goto out; - + WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count)); sev_es_supported = true; out: From 2233bd583cb5e9175c75d54686d77bbdd56e09e2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 31 Jan 2024 15:56:07 -0800 Subject: [PATCH 086/553] KVM: SVM: Use unsigned integers when dealing with ASIDs [ Upstream commit 466eec4a22a76c462781bf6d45cb02cbedf21a61 ] Convert all local ASID variables and parameters throughout the SEV code from signed integers to unsigned integers. As ASIDs are fundamentally unsigned values, and the global min/max variables are appropriately unsigned integers, too. Functionally, this is a glorified nop as KVM guarantees min_sev_asid is non-zero, and no CPU supports -1u as the _only_ asid, i.e. the signed vs. unsigned goof won't cause problems in practice. Opportunistically use sev_get_asid() in sev_flush_encrypted_page() instead of open coding an equivalent. Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20240131235609.4161407-3-seanjc@google.com Signed-off-by: Sean Christopherson Stable-dep-of: 0aa6b90ef9d7 ("KVM: SVM: Add support for allowing zero SEV ASIDs") Signed-off-by: Sasha Levin --- arch/x86/kvm/svm/sev.c | 18 ++++++++++-------- arch/x86/kvm/trace.h | 10 +++++----- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0316fdf5040f..9e50eaf967f2 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -76,9 +76,10 @@ struct enc_region { }; /* Called with the sev_bitmap_lock held, or on shutdown */ -static int sev_flush_asids(int min_asid, int max_asid) +static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid) { - int ret, asid, error = 0; + int ret, error = 0; + unsigned int asid; /* Check if there are any ASIDs to reclaim before performing a flush */ asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); @@ -108,7 +109,7 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm) } /* Must be called with the sev_bitmap_lock held */ -static bool __sev_recycle_asids(int min_asid, int max_asid) +static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid) { if (sev_flush_asids(min_asid, max_asid)) return false; @@ -135,8 +136,9 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev) static int sev_asid_new(struct kvm_sev_info *sev) { - int asid, min_asid, max_asid, ret; + unsigned int asid, min_asid, max_asid; bool retry = true; + int ret; WARN_ON(sev->misc_cg); sev->misc_cg = get_current_misc_cg(); @@ -179,7 +181,7 @@ e_uncharge: return ret; } -static int sev_get_asid(struct kvm *kvm) +static unsigned int sev_get_asid(struct kvm *kvm) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; @@ -276,8 +278,8 @@ e_no_asid: static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) { + unsigned int asid = sev_get_asid(kvm); struct sev_data_activate activate; - int asid = sev_get_asid(kvm); int ret; /* activate ASID on the given handle */ @@ -2290,7 +2292,7 @@ int sev_cpu_init(struct svm_cpu_data *sd) */ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) { - int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid; + unsigned int asid = sev_get_asid(vcpu->kvm); /* * Note! The address must be a kernel address, as regular page walk @@ -2611,7 +2613,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) void pre_sev_run(struct vcpu_svm *svm, int cpu) { struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); - int asid = sev_get_asid(svm->vcpu.kvm); + unsigned int asid = sev_get_asid(svm->vcpu.kvm); /* Assign the asid allocated with this SEV guest */ svm->asid = asid; diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index bc25589ad588..6c1dcf44c4fa 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -729,13 +729,13 @@ TRACE_EVENT(kvm_nested_intr_vmexit, * Tracepoint for nested #vmexit because of interrupt pending */ TRACE_EVENT(kvm_invlpga, - TP_PROTO(__u64 rip, int asid, u64 address), + TP_PROTO(__u64 rip, unsigned int asid, u64 address), TP_ARGS(rip, asid, address), TP_STRUCT__entry( - __field( __u64, rip ) - __field( int, asid ) - __field( __u64, address ) + __field( __u64, rip ) + __field( unsigned int, asid ) + __field( __u64, address ) ), TP_fast_assign( @@ -744,7 +744,7 @@ TRACE_EVENT(kvm_invlpga, __entry->address = address; ), - TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx", + TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx", __entry->rip, __entry->asid, __entry->address) ); From 4af6d5b4d9f1769b2d95d2a96cc270274b91337e Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Wed, 31 Jan 2024 15:56:08 -0800 Subject: [PATCH 087/553] KVM: SVM: Add support for allowing zero SEV ASIDs [ Upstream commit 0aa6b90ef9d75b4bd7b6d106d85f2a3437697f91 ] Some BIOSes allow the end user to set the minimum SEV ASID value (CPUID 0x8000001F_EDX) to be greater than the maximum number of encrypted guests, or maximum SEV ASID value (CPUID 0x8000001F_ECX) in order to dedicate all the SEV ASIDs to SEV-ES or SEV-SNP. The SEV support, as coded, does not handle the case where the minimum SEV ASID value can be greater than the maximum SEV ASID value. As a result, the following confusing message is issued: [ 30.715724] kvm_amd: SEV enabled (ASIDs 1007 - 1006) Fix the support to properly handle this case. Fixes: 916391a2d1dc ("KVM: SVM: Add support for SEV-ES capability in KVM") Suggested-by: Sean Christopherson Signed-off-by: Ashish Kalra Cc: stable@vger.kernel.org Acked-by: Tom Lendacky Link: https://lore.kernel.org/r/20240104190520.62510-1-Ashish.Kalra@amd.com Link: https://lore.kernel.org/r/20240131235609.4161407-4-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Sasha Levin --- arch/x86/kvm/svm/sev.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 9e50eaf967f2..d8e192ad5953 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -136,10 +136,21 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev) static int sev_asid_new(struct kvm_sev_info *sev) { - unsigned int asid, min_asid, max_asid; + /* + * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. + * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. + * Note: min ASID can end up larger than the max if basic SEV support is + * effectively disabled by disallowing use of ASIDs for SEV guests. + */ + unsigned int min_asid = sev->es_active ? 1 : min_sev_asid; + unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; + unsigned int asid; bool retry = true; int ret; + if (min_asid > max_asid) + return -ENOTTY; + WARN_ON(sev->misc_cg); sev->misc_cg = get_current_misc_cg(); ret = sev_misc_cg_try_charge(sev); @@ -151,12 +162,6 @@ static int sev_asid_new(struct kvm_sev_info *sev) mutex_lock(&sev_bitmap_lock); - /* - * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. - * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. - */ - min_asid = sev->es_active ? 1 : min_sev_asid; - max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; again: asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); if (asid > max_asid) { @@ -2215,8 +2220,10 @@ void __init sev_hardware_setup(void) goto out; } - sev_asid_count = max_sev_asid - min_sev_asid + 1; - WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); + if (min_sev_asid <= max_sev_asid) { + sev_asid_count = max_sev_asid - min_sev_asid + 1; + WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); + } sev_supported = true; /* SEV-ES support requested? */ @@ -2247,7 +2254,9 @@ void __init sev_hardware_setup(void) out: if (boot_cpu_has(X86_FEATURE_SEV)) pr_info("SEV %s (ASIDs %u - %u)\n", - sev_supported ? "enabled" : "disabled", + sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" : + "unusable" : + "disabled", min_sev_asid, max_sev_asid); if (boot_cpu_has(X86_FEATURE_SEV_ES)) pr_info("SEV-ES %s (ASIDs %u - %u)\n", From 90a477dfda3be83709e1f761710e09835f51b49f Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 24 Nov 2023 16:08:22 +0100 Subject: [PATCH 088/553] fs/pipe: Fix lockdep false-positive in watchqueue pipe_write() [ Upstream commit 055ca83559912f2cfd91c9441427bac4caf3c74e ] When you try to splice between a normal pipe and a notification pipe, get_pipe_info(..., true) fails, so splice() falls back to treating the notification pipe like a normal pipe - so we end up in iter_file_splice_write(), which first locks the input pipe, then calls vfs_iter_write(), which locks the output pipe. Lockdep complains about that, because we're taking a pipe lock while already holding another pipe lock. I think this probably (?) can't actually lead to deadlocks, since you'd need another way to nest locking a normal pipe into locking a watch_queue pipe, but the lockdep annotations don't make that clear. Bail out earlier in pipe_write() for notification pipes, before taking the pipe lock. Reported-and-tested-by: Closes: https://syzkaller.appspot.com/bug?extid=011e4ea1da6692cf881c Fixes: c73be61cede5 ("pipe: Add general notification queue support") Signed-off-by: Jann Horn Link: https://lore.kernel.org/r/20231124150822.2121798-1-jannh@google.com Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/pipe.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 9873a6030df5..aa8e6ffe1cb5 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -424,6 +424,18 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) bool was_empty = false; bool wake_next_writer = false; + /* + * Reject writing to watch queue pipes before the point where we lock + * the pipe. + * Otherwise, lockdep would be unhappy if the caller already has another + * pipe locked. + * If we had to support locking a normal pipe and a notification pipe at + * the same time, we could set up lockdep annotations for that, but + * since we don't actually need that, it's simpler to just bail here. + */ + if (pipe_has_watch_queue(pipe)) + return -EXDEV; + /* Null write succeeds. */ if (unlikely(total_len == 0)) return 0; @@ -436,11 +448,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) goto out; } - if (pipe_has_watch_queue(pipe)) { - ret = -EXDEV; - goto out; - } - /* * If it wasn't empty we try to merge new data into * the last buffer. From d00c24ddec511f73bc5f91b86a114f36ab508076 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Tue, 9 Jan 2024 12:39:03 +0900 Subject: [PATCH 089/553] 9p: Fix read/write debug statements to report server reply [ Upstream commit be3193e58ec210b2a72fb1134c2a0695088a911d ] Previous conversion to iov missed these debug statements which would now always print the requested size instead of the actual server reply. Write also added a loop in a much older commit but we didn't report these, while reads do report each iteration -- it's more coherent to keep reporting all requests to server so move that at the same time. Fixes: 7f02464739da ("9p: convert to advancing variant of iov_iter_get_pages_alloc()") Signed-off-by: Dominique Martinet Message-ID: <20240109-9p-rw-trace-v1-1-327178114257@codewreck.org> Signed-off-by: Sasha Levin --- net/9p/client.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index 84b93b04d0f0..1d9a8a1f3f10 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1581,7 +1581,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, received = rsize; } - p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received); if (non_zc) { int n = copy_to_iter(dataptr, received, to); @@ -1607,9 +1607,6 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) int total = 0; *err = 0; - p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n", - fid->fid, offset, iov_iter_count(from)); - while (iov_iter_count(from)) { int count = iov_iter_count(from); int rsize = fid->iounit; @@ -1621,6 +1618,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) if (count < rsize) rsize = count; + p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n", + fid->fid, offset, rsize, count); + /* Don't bother zerocopy for small IO (< 1024) */ if (clnt->trans_mod->zc_request && rsize > 1024) { req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, from, 0, @@ -1648,7 +1648,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) written = rsize; } - p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written); p9_req_put(clnt, req); iov_iter_revert(from, count - written - iov_iter_count(from)); From 81f7c9da2bed3df06fd4c6a1ae8034001b1ed959 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Tue, 12 Mar 2024 01:20:53 +0000 Subject: [PATCH 090/553] drivers/perf: riscv: Disable PERF_SAMPLE_BRANCH_* while not supported [ Upstream commit ea6873118493019474abbf57d5a800da365734df ] RISC-V perf driver does not yet support branch sampling. Although the specification is in the works [0], it is best to disable such events until support is available, otherwise we will get unexpected results. Due to this reason, two riscv bpf testcases get_branch_snapshot and perf_branches/perf_branches_hw fail. Link: https://github.com/riscv/riscv-control-transfer-records [0] Fixes: f5bfa23f576f ("RISC-V: Add a perf core library for pmu drivers") Signed-off-by: Pu Lehui Reviewed-by: Atish Patra Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20240312012053.1178140-1-pulehui@huaweicloud.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- drivers/perf/riscv_pmu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index 56897d4d4fd3..2d5cf135e8a1 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -246,6 +246,10 @@ static int riscv_pmu_event_init(struct perf_event *event) u64 event_config = 0; uint64_t cmask; + /* driver does not support branch stack sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + hwc->flags = 0; mapped_event = rvpmu->event_map(event, &event_config); if (mapped_event < 0) { From 392c47fea7aac184340684774d982607065fca82 Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Fri, 22 Mar 2024 16:45:25 +0000 Subject: [PATCH 091/553] drm/panfrost: fix power transition timeout warnings [ Upstream commit 2bd02f5a0bac4bb13e0da18652dc75ba0e4958ec ] Increase the timeout value to prevent system logs on Amlogic boards flooding with power transition warnings: [ 13.047638] panfrost ffe40000.gpu: shader power transition timeout [ 13.048674] panfrost ffe40000.gpu: l2 power transition timeout [ 13.937324] panfrost ffe40000.gpu: shader power transition timeout [ 13.938351] panfrost ffe40000.gpu: l2 power transition timeout ... [39829.506904] panfrost ffe40000.gpu: shader power transition timeout [39829.507938] panfrost ffe40000.gpu: l2 power transition timeout [39949.508369] panfrost ffe40000.gpu: shader power transition timeout [39949.509405] panfrost ffe40000.gpu: l2 power transition timeout The 2000 value has been found through trial and error testing with devices using G52 and G31 GPUs. Fixes: 22aa1a209018 ("drm/panfrost: Really power off GPU cores in panfrost_gpu_power_off()") Signed-off-by: Christian Hewitt Reviewed-by: Steven Price Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20240322164525.2617508-1-christianshewitt@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panfrost/panfrost_gpu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index 55d243048516..40b631445992 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -379,19 +379,19 @@ void panfrost_gpu_power_off(struct panfrost_device *pfdev) gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present); ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO, - val, !val, 1, 1000); + val, !val, 1, 2000); if (ret) dev_err(pfdev->dev, "shader power transition timeout"); gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present); ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO, - val, !val, 1, 1000); + val, !val, 1, 2000); if (ret) dev_err(pfdev->dev, "tiler power transition timeout"); gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present); ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO, - val, !val, 0, 1000); + val, !val, 0, 2000); if (ret) dev_err(pfdev->dev, "l2 power transition timeout"); } From eb028d1ebd0b410095b79308b82f01ca1598223d Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 25 Mar 2024 17:18:12 -0500 Subject: [PATCH 092/553] ASoC: rt5682-sdw: fix locking sequence [ Upstream commit 310a5caa4e861616a27a83c3e8bda17d65026fa8 ] The disable_irq_lock protects the 'disable_irq' value, we need to lock before testing it. Fixes: 02fb23d72720 ("ASoC: rt5682-sdw: fix for JD event handling in ClockStop Mode0") Signed-off-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Chao Song Link: https://msgid.link/r/20240325221817.206465-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5682-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c index 868a61c8b060..7685011a0935 100644 --- a/sound/soc/codecs/rt5682-sdw.c +++ b/sound/soc/codecs/rt5682-sdw.c @@ -787,12 +787,12 @@ static int __maybe_unused rt5682_dev_resume(struct device *dev) return 0; if (!slave->unattach_request) { + mutex_lock(&rt5682->disable_irq_lock); if (rt5682->disable_irq == true) { - mutex_lock(&rt5682->disable_irq_lock); sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF); rt5682->disable_irq = false; - mutex_unlock(&rt5682->disable_irq_lock); } + mutex_unlock(&rt5682->disable_irq_lock); goto regmap_sync; } From 044c34fe3531a8061b7c23a40da82e3f2b28f80f Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 25 Mar 2024 17:18:13 -0500 Subject: [PATCH 093/553] ASoC: rt711-sdca: fix locking sequence [ Upstream commit ee287771644394d071e6a331951ee8079b64f9a7 ] The disable_irq_lock protects the 'disable_irq' value, we need to lock before testing it. Fixes: 23adeb7056ac ("ASoC: rt711-sdca: fix for JD event handling in ClockStop Mode0") Signed-off-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Chao Song Link: https://msgid.link/r/20240325221817.206465-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt711-sdca-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c index 487d3010ddc1..931dbc68548e 100644 --- a/sound/soc/codecs/rt711-sdca-sdw.c +++ b/sound/soc/codecs/rt711-sdca-sdw.c @@ -443,13 +443,13 @@ static int __maybe_unused rt711_sdca_dev_resume(struct device *dev) return 0; if (!slave->unattach_request) { + mutex_lock(&rt711->disable_irq_lock); if (rt711->disable_irq == true) { - mutex_lock(&rt711->disable_irq_lock); sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0); sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8); rt711->disable_irq = false; - mutex_unlock(&rt711->disable_irq_lock); } + mutex_unlock(&rt711->disable_irq_lock); goto regmap_sync; } From 4ff3d8ac62348697accf6d3e661c2d28f929f44a Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 25 Mar 2024 17:18:14 -0500 Subject: [PATCH 094/553] ASoC: rt711-sdw: fix locking sequence [ Upstream commit aae86cfd8790bcc7693a5a0894df58de5cb5128c ] The disable_irq_lock protects the 'disable_irq' value, we need to lock before testing it. Fixes: b69de265bd0e ("ASoC: rt711: fix for JD event handling in ClockStop Mode0") Signed-off-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Chao Song Link: https://msgid.link/r/20240325221817.206465-4-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt711-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c index 9545b8a7eb19..af7a0ab5669f 100644 --- a/sound/soc/codecs/rt711-sdw.c +++ b/sound/soc/codecs/rt711-sdw.c @@ -542,12 +542,12 @@ static int __maybe_unused rt711_dev_resume(struct device *dev) return 0; if (!slave->unattach_request) { + mutex_lock(&rt711->disable_irq_lock); if (rt711->disable_irq == true) { - mutex_lock(&rt711->disable_irq_lock); sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF); rt711->disable_irq = false; - mutex_unlock(&rt711->disable_irq_lock); } + mutex_unlock(&rt711->disable_irq_lock); goto regmap_sync; } From 7ff957cea8af6af4d6f1079f10aafe721d71f144 Mon Sep 17 00:00:00 2001 From: Stephen Lee Date: Mon, 25 Mar 2024 18:01:31 -0700 Subject: [PATCH 095/553] ASoC: ops: Fix wraparound for mask in snd_soc_get_volsw [ Upstream commit fc563aa900659a850e2ada4af26b9d7a3de6c591 ] In snd_soc_info_volsw(), mask is generated by figuring out the index of the most significant bit set in max and converting the index to a bitmask through bit shift 1. Unintended wraparound occurs when max is an integer value with msb bit set. Since the bit shift value 1 is treated as an integer type, the left shift operation will wraparound and set mask to 0 instead of all 1's. In order to fix this, we type cast 1 as `1ULL` to prevent the wraparound. Fixes: 7077148fb50a ("ASoC: core: Split ops out of soc-core.c") Signed-off-by: Stephen Lee Link: https://msgid.link/r/20240326010131.6211-1-slee08177@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 2d25748ca706..b27e89ff6a16 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -263,7 +263,7 @@ int snd_soc_get_volsw(struct snd_kcontrol *kcontrol, int max = mc->max; int min = mc->min; int sign_bit = mc->sign_bit; - unsigned int mask = (1 << fls(max)) - 1; + unsigned int mask = (1ULL << fls(max)) - 1; unsigned int invert = mc->invert; int val; int ret; From 7171d6aef1f1d96213786443a16d3baca73c7c32 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 26 Mar 2024 15:53:37 +0100 Subject: [PATCH 096/553] ata: sata_sx4: fix pdc20621_get_from_dimm() on 64-bit [ Upstream commit 52f80bb181a9a1530ade30bc18991900bbb9697f ] gcc warns about a memcpy() with overlapping pointers because of an incorrect size calculation: In file included from include/linux/string.h:369, from drivers/ata/sata_sx4.c:66: In function 'memcpy_fromio', inlined from 'pdc20621_get_from_dimm.constprop' at drivers/ata/sata_sx4.c:962:2: include/linux/fortify-string.h:97:33: error: '__builtin_memcpy' accessing 4294934464 bytes at offsets 0 and [16, 16400] overlaps 6442385281 bytes at offset -2147450817 [-Werror=restrict] 97 | #define __underlying_memcpy __builtin_memcpy | ^ include/linux/fortify-string.h:620:9: note: in expansion of macro '__underlying_memcpy' 620 | __underlying_##op(p, q, __fortify_size); \ | ^~~~~~~~~~~~~ include/linux/fortify-string.h:665:26: note: in expansion of macro '__fortify_memcpy_chk' 665 | #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ | ^~~~~~~~~~~~~~~~~~~~ include/asm-generic/io.h:1184:9: note: in expansion of macro 'memcpy' 1184 | memcpy(buffer, __io_virt(addr), size); | ^~~~~~ The problem here is the overflow of an unsigned 32-bit number to a negative that gets converted into a signed 'long', keeping a large positive number. Replace the complex calculation with a more readable min() variant that avoids the warning. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Arnd Bergmann Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/sata_sx4.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c index 6ceec59cb291..fa1966638c06 100644 --- a/drivers/ata/sata_sx4.c +++ b/drivers/ata/sata_sx4.c @@ -958,8 +958,7 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource, offset -= (idx * window_size); idx++; - dist = ((long) (window_size - (offset + size))) >= 0 ? size : - (long) (window_size - offset); + dist = min(size, window_size - offset); memcpy_fromio(psource, dimm_mmio + offset / 4, dist); psource += dist; @@ -1006,8 +1005,7 @@ static void pdc20621_put_to_dimm(struct ata_host *host, void *psource, readl(mmio + PDC_DIMM_WINDOW_CTLR); offset -= (idx * window_size); idx++; - dist = ((long)(s32)(window_size - (offset + size))) >= 0 ? size : - (long) (window_size - offset); + dist = min(size, window_size - offset); memcpy_toio(dimm_mmio + offset / 4, psource, dist); writel(0x01, mmio + PDC_GENERAL_CTLR); readl(mmio + PDC_GENERAL_CTLR); From 4b87c1bc25593b5915b398d3998b3e3aff4d7dc7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 26 Mar 2024 23:38:06 +0100 Subject: [PATCH 097/553] scsi: mylex: Fix sysfs buffer lengths [ Upstream commit 1197c5b2099f716b3de327437fb50900a0b936c9 ] The myrb and myrs drivers use an odd way of implementing their sysfs files, calling snprintf() with a fixed length of 32 bytes to print into a page sized buffer. One of the strings is actually longer than 32 bytes, which clang can warn about: drivers/scsi/myrb.c:1906:10: error: 'snprintf' will always be truncated; specified size is 32, but format string expands to at least 34 [-Werror,-Wformat-truncation] drivers/scsi/myrs.c:1089:10: error: 'snprintf' will always be truncated; specified size is 32, but format string expands to at least 34 [-Werror,-Wformat-truncation] These could all be plain sprintf() without a length as the buffer is always long enough. On the other hand, sysfs files should not be overly long either, so just double the length to make sure the longest strings don't get truncated here. Fixes: 77266186397c ("scsi: myrs: Add Mylex RAID controller (SCSI interface)") Fixes: 081ff398c56c ("scsi: myrb: Add Mylex RAID controller (block interface)") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240326223825.4084412-8-arnd@kernel.org Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/myrb.c | 20 ++++++++++---------- drivers/scsi/myrs.c | 24 ++++++++++++------------ 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index e885c1dbf61f..e2f1b186efd0 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -1775,9 +1775,9 @@ static ssize_t raid_state_show(struct device *dev, name = myrb_devstate_name(ldev_info->state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->state); } else { struct myrb_pdev_state *pdev_info = sdev->hostdata; @@ -1796,9 +1796,9 @@ static ssize_t raid_state_show(struct device *dev, else name = myrb_devstate_name(pdev_info->state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", pdev_info->state); } return ret; @@ -1886,11 +1886,11 @@ static ssize_t raid_level_show(struct device *dev, name = myrb_raidlevel_name(ldev_info->raid_level); if (!name) - return snprintf(buf, 32, "Invalid (%02X)\n", + return snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->state); - return snprintf(buf, 32, "%s\n", name); + return snprintf(buf, 64, "%s\n", name); } - return snprintf(buf, 32, "Physical Drive\n"); + return snprintf(buf, 64, "Physical Drive\n"); } static DEVICE_ATTR_RO(raid_level); @@ -1903,15 +1903,15 @@ static ssize_t rebuild_show(struct device *dev, unsigned char status; if (sdev->channel < myrb_logical_channel(sdev->host)) - return snprintf(buf, 32, "physical device - not rebuilding\n"); + return snprintf(buf, 64, "physical device - not rebuilding\n"); status = myrb_get_rbld_progress(cb, &rbld_buf); if (rbld_buf.ldev_num != sdev->id || status != MYRB_STATUS_SUCCESS) - return snprintf(buf, 32, "not rebuilding\n"); + return snprintf(buf, 64, "not rebuilding\n"); - return snprintf(buf, 32, "rebuilding block %u of %u\n", + return snprintf(buf, 64, "rebuilding block %u of %u\n", rbld_buf.ldev_size - rbld_buf.blocks_left, rbld_buf.ldev_size); } diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index 7eb8c39da366..95e7c00cb7e5 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -947,9 +947,9 @@ static ssize_t raid_state_show(struct device *dev, name = myrs_devstate_name(ldev_info->dev_state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->dev_state); } else { struct myrs_pdev_info *pdev_info; @@ -958,9 +958,9 @@ static ssize_t raid_state_show(struct device *dev, pdev_info = sdev->hostdata; name = myrs_devstate_name(pdev_info->dev_state); if (name) - ret = snprintf(buf, 32, "%s\n", name); + ret = snprintf(buf, 64, "%s\n", name); else - ret = snprintf(buf, 32, "Invalid (%02X)\n", + ret = snprintf(buf, 64, "Invalid (%02X)\n", pdev_info->dev_state); } return ret; @@ -1066,13 +1066,13 @@ static ssize_t raid_level_show(struct device *dev, ldev_info = sdev->hostdata; name = myrs_raid_level_name(ldev_info->raid_level); if (!name) - return snprintf(buf, 32, "Invalid (%02X)\n", + return snprintf(buf, 64, "Invalid (%02X)\n", ldev_info->dev_state); } else name = myrs_raid_level_name(MYRS_RAID_PHYSICAL); - return snprintf(buf, 32, "%s\n", name); + return snprintf(buf, 64, "%s\n", name); } static DEVICE_ATTR_RO(raid_level); @@ -1086,7 +1086,7 @@ static ssize_t rebuild_show(struct device *dev, unsigned char status; if (sdev->channel < cs->ctlr_info->physchan_present) - return snprintf(buf, 32, "physical device - not rebuilding\n"); + return snprintf(buf, 64, "physical device - not rebuilding\n"); ldev_info = sdev->hostdata; ldev_num = ldev_info->ldev_num; @@ -1098,11 +1098,11 @@ static ssize_t rebuild_show(struct device *dev, return -EIO; } if (ldev_info->rbld_active) { - return snprintf(buf, 32, "rebuilding block %zu of %zu\n", + return snprintf(buf, 64, "rebuilding block %zu of %zu\n", (size_t)ldev_info->rbld_lba, (size_t)ldev_info->cfg_devsize); } else - return snprintf(buf, 32, "not rebuilding\n"); + return snprintf(buf, 64, "not rebuilding\n"); } static ssize_t rebuild_store(struct device *dev, @@ -1190,7 +1190,7 @@ static ssize_t consistency_check_show(struct device *dev, unsigned short ldev_num; if (sdev->channel < cs->ctlr_info->physchan_present) - return snprintf(buf, 32, "physical device - not checking\n"); + return snprintf(buf, 64, "physical device - not checking\n"); ldev_info = sdev->hostdata; if (!ldev_info) @@ -1198,11 +1198,11 @@ static ssize_t consistency_check_show(struct device *dev, ldev_num = ldev_info->ldev_num; myrs_get_ldev_info(cs, ldev_num, ldev_info); if (ldev_info->cc_active) - return snprintf(buf, 32, "checking block %zu of %zu\n", + return snprintf(buf, 64, "checking block %zu of %zu\n", (size_t)ldev_info->cc_lba, (size_t)ldev_info->cfg_devsize); else - return snprintf(buf, 32, "not checking\n"); + return snprintf(buf, 64, "not checking\n"); } static ssize_t consistency_check_store(struct device *dev, From 9adcfd56703cfcdbeb5f076c714594873f02ec72 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Fri, 8 Dec 2023 16:23:35 +0800 Subject: [PATCH 098/553] scsi: sd: Unregister device if device_add_disk() failed in sd_probe() [ Upstream commit 0296bea01cfa6526be6bd2d16dc83b4e7f1af91f ] "if device_add() succeeds, you should call device_del() when you want to get rid of it." In sd_probe(), device_add_disk() fails when device_add() has already succeeded, so change put_device() to device_unregister() to ensure device resources are released. Fixes: 2a7a891f4c40 ("scsi: sd: Add error handling support for add_disk()") Signed-off-by: Li Nan Link: https://lore.kernel.org/r/20231208082335.1754205-1-linan666@huaweicloud.com Reviewed-by: Bart Van Assche Reviewed-by: Yu Kuai Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/sd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c793bca88223..f32236c3f81c 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3636,7 +3636,7 @@ static int sd_probe(struct device *dev) error = device_add_disk(dev, gd, NULL); if (error) { - put_device(&sdkp->disk_dev); + device_unregister(&sdkp->disk_dev); put_disk(gd); goto out; } From 78942ac754990d200ba5454678e3d8cb2e327d59 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Apr 2024 10:11:35 +0100 Subject: [PATCH 099/553] cifs: Fix caching to try to do open O_WRONLY as rdwr on server [ Upstream commit e9e62243a3e2322cf639f653a0b0a88a76446ce7 ] When we're engaged in local caching of a cifs filesystem, we cannot perform caching of a partially written cache granule unless we can read the rest of the granule. This can result in unexpected access errors being reported to the user. Fix this by the following: if a file is opened O_WRONLY locally, but the mount was given the "-o fsc" flag, try first opening the remote file with GENERIC_READ|GENERIC_WRITE and if that returns -EACCES, try dropping the GENERIC_READ and doing the open again. If that last succeeds, invalidate the cache for that file as for O_DIRECT. Fixes: 70431bfd825d ("cifs: Support fscache indexing rewrite") Signed-off-by: David Howells cc: Steve French cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/dir.c | 15 +++++++++++++ fs/smb/client/file.c | 48 ++++++++++++++++++++++++++++++++--------- fs/smb/client/fscache.h | 6 ++++++ 3 files changed, 59 insertions(+), 10 deletions(-) diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c index e382b794acbe..863c7bc3db86 100644 --- a/fs/smb/client/dir.c +++ b/fs/smb/client/dir.c @@ -180,6 +180,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int disposition; struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; *oplock = 0; if (tcon->ses->server->oplocks) @@ -191,6 +192,10 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned return PTR_ERR(full_path); } + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (oflags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open && (CIFS_UNIX_POSIX_PATH_OPS_CAP & @@ -267,6 +272,8 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned desired_access |= GENERIC_READ; /* is this too little? */ if (OPEN_FMODE(oflags) & FMODE_WRITE) desired_access |= GENERIC_WRITE; + if (rdwr_for_fscache == 1) + desired_access |= GENERIC_READ; disposition = FILE_OVERWRITE_IF; if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) @@ -295,6 +302,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned if (!tcon->unix_ext && (mode & S_IWUGO) == 0) create_options |= CREATE_OPTION_READONLY; +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -308,8 +316,15 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned rc = server->ops->open(xid, &oparms, oplock, buf); if (rc) { cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc); + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access &= ~GENERIC_READ; + rdwr_for_fscache = 2; + goto retry_open; + } goto out; } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY /* diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 0f3405e0f2e4..c240cea7ca34 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -77,12 +77,12 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) */ } -static inline int cifs_convert_flags(unsigned int flags) +static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache) { if ((flags & O_ACCMODE) == O_RDONLY) return GENERIC_READ; else if ((flags & O_ACCMODE) == O_WRONLY) - return GENERIC_WRITE; + return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE; else if ((flags & O_ACCMODE) == O_RDWR) { /* GENERIC_ALL is too much permission to request can cause unnecessary access denied on create */ @@ -219,11 +219,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ int create_options = CREATE_NOT_DIR; struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; if (!server->ops->open) return -ENOSYS; - desired_access = cifs_convert_flags(f_flags); + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + + desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache); /********************************************************************* * open flag mapping table: @@ -260,6 +265,7 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ if (f_flags & O_DIRECT) create_options |= CREATE_NO_BUFFER; +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -271,8 +277,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ }; rc = server->ops->open(xid, &oparms, oplock, buf); - if (rc) + if (rc) { + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access = cifs_convert_flags(f_flags, 0); + rdwr_for_fscache = 2; + goto retry_open; + } return rc; + } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); /* TODO: Add support for calling posix query info but with passing in fid */ if (tcon->unix_ext) @@ -705,11 +719,11 @@ int cifs_open(struct inode *inode, struct file *file) use_cache: fscache_use_cookie(cifs_inode_cookie(file_inode(file)), file->f_mode & FMODE_WRITE); - if (file->f_flags & O_DIRECT && - (!((file->f_flags & O_ACCMODE) != O_RDONLY) || - file->f_flags & O_APPEND)) - cifs_invalidate_cache(file_inode(file), - FSCACHE_INVAL_DIO_WRITE); + if (!(file->f_flags & O_DIRECT)) + goto out; + if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY) + goto out; + cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE); out: free_dentry_path(page); @@ -774,6 +788,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) int disposition = FILE_OPEN; int create_options = CREATE_NOT_DIR; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; xid = get_xid(); mutex_lock(&cfile->fh_mutex); @@ -837,7 +852,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ - desired_access = cifs_convert_flags(cfile->f_flags); + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + + desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache); /* O_SYNC also has bit for O_DSYNC so following check picks up either */ if (cfile->f_flags & O_SYNC) @@ -849,6 +868,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) if (server->ops->get_lease_key) server->ops->get_lease_key(inode, &cfile->fid); +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -874,6 +894,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) /* indicate that we need to relock the file */ oparms.reconnect = true; } + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access = cifs_convert_flags(cfile->f_flags, 0); + rdwr_for_fscache = 2; + goto retry_open; + } if (rc) { mutex_unlock(&cfile->fh_mutex); @@ -882,6 +907,9 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) goto reopen_error_exit; } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY reopen_success: #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ diff --git a/fs/smb/client/fscache.h b/fs/smb/client/fscache.h index 67b601041f0a..c691b98b442a 100644 --- a/fs/smb/client/fscache.h +++ b/fs/smb/client/fscache.h @@ -108,6 +108,11 @@ static inline void cifs_readpage_to_fscache(struct inode *inode, __cifs_readpage_to_fscache(inode, page); } +static inline bool cifs_fscache_enabled(struct inode *inode) +{ + return fscache_cookie_enabled(cifs_inode_cookie(inode)); +} + #else /* CONFIG_CIFS_FSCACHE */ static inline void cifs_fscache_fill_coherency(struct inode *inode, @@ -123,6 +128,7 @@ static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {} static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {} static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; } static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {} +static inline bool cifs_fscache_enabled(struct inode *inode) { return false; } static inline int cifs_fscache_query_occupancy(struct inode *inode, pgoff_t first, unsigned int nr_pages, From 12059cf0487ff5299f03e3e3bc0f71213c99a3de Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Apr 2024 10:06:48 +0200 Subject: [PATCH 100/553] ata: sata_mv: Fix PCI device ID table declaration compilation warning [ Upstream commit 3137b83a90646917c90951d66489db466b4ae106 ] Building with W=1 shows a warning for an unused variable when CONFIG_PCI is diabled: drivers/ata/sata_mv.c:790:35: error: unused variable 'mv_pci_tbl' [-Werror,-Wunused-const-variable] static const struct pci_device_id mv_pci_tbl[] = { Move the table into the same block that containsn the pci_driver definition. Fixes: 7bb3c5290ca0 ("sata_mv: Remove PCI dependency") Signed-off-by: Arnd Bergmann Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/sata_mv.c | 63 +++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 17f9062b0eaa..9cf540017a5e 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -787,37 +787,6 @@ static const struct ata_port_info mv_port_info[] = { }, }; -static const struct pci_device_id mv_pci_tbl[] = { - { PCI_VDEVICE(MARVELL, 0x5040), chip_504x }, - { PCI_VDEVICE(MARVELL, 0x5041), chip_504x }, - { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 }, - { PCI_VDEVICE(MARVELL, 0x5081), chip_508x }, - /* RocketRAID 1720/174x have different identifiers */ - { PCI_VDEVICE(TTI, 0x1720), chip_6042 }, - { PCI_VDEVICE(TTI, 0x1740), chip_6042 }, - { PCI_VDEVICE(TTI, 0x1742), chip_6042 }, - - { PCI_VDEVICE(MARVELL, 0x6040), chip_604x }, - { PCI_VDEVICE(MARVELL, 0x6041), chip_604x }, - { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 }, - { PCI_VDEVICE(MARVELL, 0x6080), chip_608x }, - { PCI_VDEVICE(MARVELL, 0x6081), chip_608x }, - - { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x }, - - /* Adaptec 1430SA */ - { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 }, - - /* Marvell 7042 support */ - { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 }, - - /* Highpoint RocketRAID PCIe series */ - { PCI_VDEVICE(TTI, 0x2300), chip_7042 }, - { PCI_VDEVICE(TTI, 0x2310), chip_7042 }, - - { } /* terminate list */ -}; - static const struct mv_hw_ops mv5xxx_ops = { .phy_errata = mv5_phy_errata, .enable_leds = mv5_enable_leds, @@ -4301,6 +4270,36 @@ static int mv_pci_init_one(struct pci_dev *pdev, static int mv_pci_device_resume(struct pci_dev *pdev); #endif +static const struct pci_device_id mv_pci_tbl[] = { + { PCI_VDEVICE(MARVELL, 0x5040), chip_504x }, + { PCI_VDEVICE(MARVELL, 0x5041), chip_504x }, + { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 }, + { PCI_VDEVICE(MARVELL, 0x5081), chip_508x }, + /* RocketRAID 1720/174x have different identifiers */ + { PCI_VDEVICE(TTI, 0x1720), chip_6042 }, + { PCI_VDEVICE(TTI, 0x1740), chip_6042 }, + { PCI_VDEVICE(TTI, 0x1742), chip_6042 }, + + { PCI_VDEVICE(MARVELL, 0x6040), chip_604x }, + { PCI_VDEVICE(MARVELL, 0x6041), chip_604x }, + { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 }, + { PCI_VDEVICE(MARVELL, 0x6080), chip_608x }, + { PCI_VDEVICE(MARVELL, 0x6081), chip_608x }, + + { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x }, + + /* Adaptec 1430SA */ + { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 }, + + /* Marvell 7042 support */ + { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 }, + + /* Highpoint RocketRAID PCIe series */ + { PCI_VDEVICE(TTI, 0x2300), chip_7042 }, + { PCI_VDEVICE(TTI, 0x2310), chip_7042 }, + + { } /* terminate list */ +}; static struct pci_driver mv_pci_driver = { .name = DRV_NAME, @@ -4313,6 +4312,7 @@ static struct pci_driver mv_pci_driver = { #endif }; +MODULE_DEVICE_TABLE(pci, mv_pci_tbl); /** * mv_print_info - Dump key info to kernel log for perusal. @@ -4485,7 +4485,6 @@ static void __exit mv_exit(void) MODULE_AUTHOR("Brett Russ"); MODULE_DESCRIPTION("SCSI low-level driver for Marvell SATA controllers"); MODULE_LICENSE("GPL v2"); -MODULE_DEVICE_TABLE(pci, mv_pci_tbl); MODULE_VERSION(DRV_VERSION); MODULE_ALIAS("platform:" DRV_NAME); From c19715ec258d4be92e0995660d31ae2612439f0b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 5 Apr 2024 13:56:18 -0400 Subject: [PATCH 101/553] nfsd: hold a lighter-weight client reference over CB_RECALL_ANY [ Upstream commit 10396f4df8b75ff6ab0aa2cd74296565466f2c8d ] Currently the CB_RECALL_ANY job takes a cl_rpc_users reference to the client. While a callback job is technically an RPC that counter is really more for client-driven RPCs, and this has the effect of preventing the client from being unhashed until the callback completes. If nfsd decides to send a CB_RECALL_ANY just as the client reboots, we can end up in a situation where the callback can't complete on the (now dead) callback channel, but the new client can't connect because the old client can't be unhashed. This usually manifests as a NFS4ERR_DELAY return on the CREATE_SESSION operation. The job is only holding a reference to the client so it can clear a flag after the RPC completes. Fix this by having CB_RECALL_ANY instead hold a reference to the cl_nfsdfs.cl_ref. Typically we only take that sort of reference when dealing with the nfsdfs info files, but it should work appropriately here to ensure that the nfs4_client doesn't disappear. Fixes: 44df6f439a17 ("NFSD: add delegation reaper to react to low memory condition") Reported-by: Vladimir Benes Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/nfs4state.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e4522e86e984..8d15959004ad 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2889,12 +2889,9 @@ static void nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - spin_lock(&nn->client_lock); clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); - put_client_renew_locked(clp); - spin_unlock(&nn->client_lock); + drop_client(clp); } static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { @@ -6231,7 +6228,7 @@ deleg_reaper(struct nfsd_net *nn) list_add(&clp->cl_ra_cblist, &cblist); /* release in nfsd4_cb_recall_any_release */ - atomic_inc(&clp->cl_rpc_users); + kref_get(&clp->cl_nfsdfs.cl_ref); set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); clp->cl_ra_time = ktime_get_boottime_seconds(); } From 7ef6a7f9b32fdfc8bec0a10e6d5ac5374d4f02e7 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 5 Apr 2024 16:46:37 +0200 Subject: [PATCH 102/553] x86/retpoline: Add NOENDBR annotation to the SRSO dummy return thunk commit b377c66ae3509ccea596512d6afb4777711c4870 upstream. srso_alias_untrain_ret() is special code, even if it is a dummy which is called in the !SRSO case, so annotate it like its real counterpart, to address the following objtool splat: vmlinux.o: warning: objtool: .export_symbol+0x2b290: data relocation to !ENDBR: srso_alias_untrain_ret+0x0 Fixes: 4535e1a4174c ("x86/bugs: Fix the SRSO mitigation on Zen3/4") Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240405144637.17908-1-bp@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/retpoline.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index a96e816e5ccd..055955c9bfcb 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -261,6 +261,7 @@ SYM_CODE_START(__x86_return_thunk) UNWIND_HINT_FUNC ANNOTATE_NOENDBR ANNOTATE_UNRET_SAFE + ANNOTATE_NOENDBR ret int3 SYM_CODE_END(__x86_return_thunk) From 2e5f8dc1dec824d9df6d5f5eb9f1b5a73b57574f Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Mar 2024 21:58:26 +0900 Subject: [PATCH 103/553] ksmbd: don't send oplock break if rename fails commit c1832f67035dc04fb89e6b591b64e4d515843cda upstream. Don't send oplock break if rename fails. This patch fix smb2.oplock.batch20 test. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb2pdu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 39fc078284c8..c02b1772cb80 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -5579,8 +5579,9 @@ static int smb2_rename(struct ksmbd_work *work, if (!file_info->ReplaceIfExists) flags = RENAME_NOREPLACE; - smb_break_all_levII_oplock(work, fp, 0); rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags); + if (!rc) + smb_break_all_levII_oplock(work, fp, 0); out: kfree(new_name); return rc; From 51a6c2af9d20203ddeeaf73314ba8854b38d01bd Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Mar 2024 21:59:10 +0900 Subject: [PATCH 104/553] ksmbd: validate payload size in ipc response commit a677ebd8ca2f2632ccdecbad7b87641274e15aac upstream. If installing malicious ksmbd-tools, ksmbd.mountd can return invalid ipc response to ksmbd kernel server. ksmbd should validate payload size of ipc response from ksmbd.mountd to avoid memory overrun or slab-out-of-bounds. This patch validate 3 ipc response that has payload. Cc: stable@vger.kernel.org Reported-by: Chao Ma Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/ksmbd_netlink.h | 3 ++- fs/smb/server/mgmt/share_config.c | 7 +++++- fs/smb/server/transport_ipc.c | 37 +++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index 0ebf91ffa236..4464a62228cf 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -166,7 +166,8 @@ struct ksmbd_share_config_response { __u16 force_uid; __u16 force_gid; __s8 share_name[KSMBD_REQ_MAX_SHARE_NAME]; - __u32 reserved[112]; /* Reserved room */ + __u32 reserved[111]; /* Reserved room */ + __u32 payload_sz; __u32 veto_list_sz; __s8 ____payload[]; }; diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c index 328a412259dc..a2f0a2edceb8 100644 --- a/fs/smb/server/mgmt/share_config.c +++ b/fs/smb/server/mgmt/share_config.c @@ -158,7 +158,12 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um, share->name = kstrdup(name, GFP_KERNEL); if (!test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) { - share->path = kstrdup(ksmbd_share_config_path(resp), + int path_len = PATH_MAX; + + if (resp->payload_sz) + path_len = resp->payload_sz - resp->veto_list_sz; + + share->path = kstrndup(ksmbd_share_config_path(resp), path_len, GFP_KERNEL); if (share->path) share->path_sz = strlen(share->path); diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index f29bb03f0dc4..8752ac82c557 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -65,6 +65,7 @@ struct ipc_msg_table_entry { struct hlist_node ipc_table_hlist; void *response; + unsigned int msg_sz; }; static struct delayed_work ipc_timer_work; @@ -275,6 +276,7 @@ static int handle_response(int type, void *payload, size_t sz) } memcpy(entry->response, payload, sz); + entry->msg_sz = sz; wake_up_interruptible(&entry->wait); ret = 0; break; @@ -453,6 +455,34 @@ out: return ret; } +static int ipc_validate_msg(struct ipc_msg_table_entry *entry) +{ + unsigned int msg_sz = entry->msg_sz; + + if (entry->type == KSMBD_EVENT_RPC_REQUEST) { + struct ksmbd_rpc_command *resp = entry->response; + + msg_sz = sizeof(struct ksmbd_rpc_command) + resp->payload_sz; + } else if (entry->type == KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST) { + struct ksmbd_spnego_authen_response *resp = entry->response; + + msg_sz = sizeof(struct ksmbd_spnego_authen_response) + + resp->session_key_len + resp->spnego_blob_len; + } else if (entry->type == KSMBD_EVENT_SHARE_CONFIG_REQUEST) { + struct ksmbd_share_config_response *resp = entry->response; + + if (resp->payload_sz) { + if (resp->payload_sz < resp->veto_list_sz) + return -EINVAL; + + msg_sz = sizeof(struct ksmbd_share_config_response) + + resp->payload_sz; + } + } + + return entry->msg_sz != msg_sz ? -EINVAL : 0; +} + static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle) { struct ipc_msg_table_entry entry; @@ -477,6 +507,13 @@ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle ret = wait_event_interruptible_timeout(entry.wait, entry.response != NULL, IPC_WAIT_TIMEOUT); + if (entry.response) { + ret = ipc_validate_msg(&entry); + if (ret) { + kvfree(entry.response); + entry.response = NULL; + } + } out: down_write(&ipc_msg_table_lock); hash_del(&entry.ipc_table_hlist); From 883e072e83f1f322ebfebbcede7655420c6cf0ab Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 2 Apr 2024 09:31:22 +0900 Subject: [PATCH 105/553] ksmbd: do not set SMB2_GLOBAL_CAP_ENCRYPTION for SMB 3.1.1 commit 5ed11af19e56f0434ce0959376d136005745a936 upstream. SMB2_GLOBAL_CAP_ENCRYPTION flag should be used only for 3.0 and 3.0.2 dialects. This flags set cause compatibility problems with other SMB clients. Reported-by: James Christopher Adduono Tested-by: James Christopher Adduono Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb2ops.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/smb2ops.c b/fs/smb/server/smb2ops.c index 27a9dce3e03a..8600f32c981a 100644 --- a/fs/smb/server/smb2ops.c +++ b/fs/smb/server/smb2ops.c @@ -228,6 +228,11 @@ void init_smb3_0_server(struct ksmbd_conn *conn) conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION) conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || + (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && + conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)) + conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; } @@ -275,11 +280,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn) conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_DIRECTORY_LEASING; - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || - (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && - conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; From 2ff8f06550575e60b05ea650047ab9a9bfb32d66 Mon Sep 17 00:00:00 2001 From: Christoffer Sandberg Date: Thu, 28 Mar 2024 11:27:57 +0100 Subject: [PATCH 106/553] ALSA: hda/realtek - Fix inactive headset mic jack commit daf6c4681a74034d5723e2fb761e0d7f3a1ca18f upstream. This patch adds the existing fixup to certain TF platforms implementing the ALC274 codec with a headset jack. It fixes/activates the inactive microphone of the headset. Signed-off-by: Christoffer Sandberg Signed-off-by: Werner Sembach Cc: Message-ID: <20240328102757.50310-1-wse@tuxedocomputers.com> Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fb12034d464e..24ab799b7ef1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10121,6 +10121,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), From f35d7ede62d989b65d37a49ccbfc3b1b06182deb Mon Sep 17 00:00:00 2001 From: I Gede Agastya Darma Laksana Date: Tue, 2 Apr 2024 00:46:02 +0700 Subject: [PATCH 107/553] ALSA: hda/realtek: Update Panasonic CF-SZ6 quirk to support headset with microphone commit 1576f263ee2147dc395531476881058609ad3d38 upstream. This patch addresses an issue with the Panasonic CF-SZ6's existing quirk, specifically its headset microphone functionality. Previously, the quirk used ALC269_FIXUP_HEADSET_MODE, which does not support the CF-SZ6's design of a single 3.5mm jack for both mic and audio output effectively. The device uses pin 0x19 for the headset mic without jack detection. Following verification on the CF-SZ6 and discussions with the original patch author, i determined that the update to ALC269_FIXUP_ASPIRE_HEADSET_MIC is the appropriate solution. This change is custom-designed for the CF-SZ6's unique hardware setup, which includes a single 3.5mm jack for both mic and audio output, connecting the headset microphone to pin 0x19 without the use of jack detection. Fixes: 0fca97a29b83 ("ALSA: hda/realtek - Add Panasonic CF-SZ6 headset jack quirk") Signed-off-by: I Gede Agastya Darma Laksana Cc: Message-ID: <20240401174602.14133-1-gedeagas22@gmail.com> Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 24ab799b7ef1..e8cf38dc8a5e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9905,7 +9905,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), - SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE), + SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP), From 9406d598a13ad4e0f13b63d3a2bdbaf30d73af44 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Mon, 25 Mar 2024 16:21:25 +0100 Subject: [PATCH 108/553] driver core: Introduce device_link_wait_removal() commit 0462c56c290a99a7f03e817ae5b843116dfb575c upstream. The commit 80dd33cf72d1 ("drivers: base: Fix device link removal") introduces a workqueue to release the consumer and supplier devices used in the devlink. In the job queued, devices are release and in turn, when all the references to these devices are dropped, the release function of the device itself is called. Nothing is present to provide some synchronisation with this workqueue in order to ensure that all ongoing releasing operations are done and so, some other operations can be started safely. For instance, in the following sequence: 1) of_platform_depopulate() 2) of_overlay_remove() During the step 1, devices are released and related devlinks are removed (jobs pushed in the workqueue). During the step 2, OF nodes are destroyed but, without any synchronisation with devlink removal jobs, of_overlay_remove() can raise warnings related to missing of_node_put(): ERROR: memory leak, expected refcount 1 instead of 2 Indeed, the missing of_node_put() call is going to be done, too late, from the workqueue job execution. Introduce device_link_wait_removal() to offer a way to synchronize operations waiting for the end of devlink removals (i.e. end of workqueue jobs). Also, as a flushing operation is done on the workqueue, the workqueue used is moved from a system-wide workqueue to a local one. Cc: stable@vger.kernel.org Signed-off-by: Herve Codina Tested-by: Luca Ceresoli Reviewed-by: Nuno Sa Reviewed-by: Saravana Kannan Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20240325152140.198219-2-herve.codina@bootlin.com Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 26 +++++++++++++++++++++++--- include/linux/device.h | 1 + 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 3078f44dc186..8d87808cdb8a 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -56,6 +56,7 @@ static bool fw_devlink_is_permissive(void); static void __fw_devlink_link_to_consumers(struct device *dev); static bool fw_devlink_drv_reg_done; static bool fw_devlink_best_effort; +static struct workqueue_struct *device_link_wq; /** * __fwnode_link_add - Create a link between two fwnode_handles. @@ -585,12 +586,26 @@ static void devlink_dev_release(struct device *dev) /* * It may take a while to complete this work because of the SRCU * synchronization in device_link_release_fn() and if the consumer or - * supplier devices get deleted when it runs, so put it into the "long" - * workqueue. + * supplier devices get deleted when it runs, so put it into the + * dedicated workqueue. */ - queue_work(system_long_wq, &link->rm_work); + queue_work(device_link_wq, &link->rm_work); } +/** + * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate + */ +void device_link_wait_removal(void) +{ + /* + * devlink removal jobs are queued in the dedicated work queue. + * To be sure that all removal jobs are terminated, ensure that any + * scheduled work has run to completion. + */ + flush_workqueue(device_link_wq); +} +EXPORT_SYMBOL_GPL(device_link_wait_removal); + static struct class devlink_class = { .name = "devlink", .owner = THIS_MODULE, @@ -4132,9 +4147,14 @@ int __init devices_init(void) sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj); if (!sysfs_dev_char_kobj) goto char_kobj_err; + device_link_wq = alloc_workqueue("device_link_wq", 0, 0); + if (!device_link_wq) + goto wq_err; return 0; + wq_err: + kobject_put(sysfs_dev_char_kobj); char_kobj_err: kobject_put(sysfs_dev_block_kobj); block_kobj_err: diff --git a/include/linux/device.h b/include/linux/device.h index 5520bb546a4a..f88b498ee9da 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1099,6 +1099,7 @@ void device_link_del(struct device_link *link); void device_link_remove(void *consumer, struct device *supplier); void device_links_supplier_sync_state_pause(void); void device_links_supplier_sync_state_resume(void); +void device_link_wait_removal(void); extern __printf(3, 4) int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); From 7b6df050c45a1ea158fd50bc32a8e1447dd1e951 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Mon, 25 Mar 2024 16:21:26 +0100 Subject: [PATCH 109/553] of: dynamic: Synchronize of_changeset_destroy() with the devlink removals commit 8917e7385346bd6584890ed362985c219fe6ae84 upstream. In the following sequence: 1) of_platform_depopulate() 2) of_overlay_remove() During the step 1, devices are destroyed and devlinks are removed. During the step 2, OF nodes are destroyed but __of_changeset_entry_destroy() can raise warnings related to missing of_node_put(): ERROR: memory leak, expected refcount 1 instead of 2 ... Indeed, during the devlink removals performed at step 1, the removal itself releasing the device (and the attached of_node) is done by a job queued in a workqueue and so, it is done asynchronously with respect to function calls. When the warning is present, of_node_put() will be called but wrongly too late from the workqueue job. In order to be sure that any ongoing devlink removals are done before the of_node destruction, synchronize the of_changeset_destroy() with the devlink removals. Fixes: 80dd33cf72d1 ("drivers: base: Fix device link removal") Cc: stable@vger.kernel.org Signed-off-by: Herve Codina Reviewed-by: Saravana Kannan Tested-by: Luca Ceresoli Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240325152140.198219-3-herve.codina@bootlin.com Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/dynamic.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 9bb9fe0fad07..e2a9651014c6 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) "OF: " fmt +#include #include #include #include @@ -679,6 +680,17 @@ void of_changeset_destroy(struct of_changeset *ocs) { struct of_changeset_entry *ce, *cen; + /* + * When a device is deleted, the device links to/from it are also queued + * for deletion. Until these device links are freed, the devices + * themselves aren't freed. If the device being deleted is due to an + * overlay change, this device might be holding a reference to a device + * node that will be freed. So, wait until all already pending device + * links are deleted before freeing a device node. This ensures we don't + * free any device node that has a non-zero reference count. + */ + device_link_wait_removal(); + list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node) __of_changeset_entry_destroy(ce); } From 97e93367e82752e475a33839a80b33bdbef1209f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 3 Apr 2024 23:21:30 +0200 Subject: [PATCH 110/553] x86/mm/pat: fix VM_PAT handling in COW mappings commit 04c35ab3bdae7fefbd7c7a7355f29fa03a035221 upstream. PAT handling won't do the right thing in COW mappings: the first PTE (or, in fact, all PTEs) can be replaced during write faults to point at anon folios. Reliably recovering the correct PFN and cachemode using follow_phys() from PTEs will not work in COW mappings. Using follow_phys(), we might just get the address+protection of the anon folio (which is very wrong), or fail on swap/nonswap entries, failing follow_phys() and triggering a WARN_ON_ONCE() in untrack_pfn() and track_pfn_copy(), not properly calling free_pfn_range(). In free_pfn_range(), we either wouldn't call memtype_free() or would call it with the wrong range, possibly leaking memory. To fix that, let's update follow_phys() to refuse returning anon folios, and fallback to using the stored PFN inside vma->vm_pgoff for COW mappings if we run into that. We will now properly handle untrack_pfn() with COW mappings, where we don't need the cachemode. We'll have to fail fork()->track_pfn_copy() if the first page was replaced by an anon folio, though: we'd have to store the cachemode in the VMA to make this work, likely growing the VMA size. For now, lets keep it simple and let track_pfn_copy() just fail in that case: it would have failed in the past with swap/nonswap entries already, and it would have done the wrong thing with anon folios. Simple reproducer to trigger the WARN_ON_ONCE() in untrack_pfn(): <--- C reproducer ---> #include #include #include #include int main(void) { struct io_uring_params p = {}; int ring_fd; size_t size; char *map; ring_fd = io_uring_setup(1, &p); if (ring_fd < 0) { perror("io_uring_setup"); return 1; } size = p.sq_off.array + p.sq_entries * sizeof(unsigned); /* Map the submission queue ring MAP_PRIVATE */ map = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, ring_fd, IORING_OFF_SQ_RING); if (map == MAP_FAILED) { perror("mmap"); return 1; } /* We have at least one page. Let's COW it. */ *map = 0; pause(); return 0; } <--- C reproducer ---> On a system with 16 GiB RAM and swap configured: # ./iouring & # memhog 16G # killall iouring [ 301.552930] ------------[ cut here ]------------ [ 301.553285] WARNING: CPU: 7 PID: 1402 at arch/x86/mm/pat/memtype.c:1060 untrack_pfn+0xf4/0x100 [ 301.553989] Modules linked in: binfmt_misc nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_g [ 301.558232] CPU: 7 PID: 1402 Comm: iouring Not tainted 6.7.5-100.fc38.x86_64 #1 [ 301.558772] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebu4 [ 301.559569] RIP: 0010:untrack_pfn+0xf4/0x100 [ 301.559893] Code: 75 c4 eb cf 48 8b 43 10 8b a8 e8 00 00 00 3b 6b 28 74 b8 48 8b 7b 30 e8 ea 1a f7 000 [ 301.561189] RSP: 0018:ffffba2c0377fab8 EFLAGS: 00010282 [ 301.561590] RAX: 00000000ffffffea RBX: ffff9208c8ce9cc0 RCX: 000000010455e047 [ 301.562105] RDX: 07fffffff0eb1e0a RSI: 0000000000000000 RDI: ffff9208c391d200 [ 301.562628] RBP: 0000000000000000 R08: ffffba2c0377fab8 R09: 0000000000000000 [ 301.563145] R10: ffff9208d2292d50 R11: 0000000000000002 R12: 00007fea890e0000 [ 301.563669] R13: 0000000000000000 R14: ffffba2c0377fc08 R15: 0000000000000000 [ 301.564186] FS: 0000000000000000(0000) GS:ffff920c2fbc0000(0000) knlGS:0000000000000000 [ 301.564773] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 301.565197] CR2: 00007fea88ee8a20 CR3: 00000001033a8000 CR4: 0000000000750ef0 [ 301.565725] PKRU: 55555554 [ 301.565944] Call Trace: [ 301.566148] [ 301.566325] ? untrack_pfn+0xf4/0x100 [ 301.566618] ? __warn+0x81/0x130 [ 301.566876] ? untrack_pfn+0xf4/0x100 [ 301.567163] ? report_bug+0x171/0x1a0 [ 301.567466] ? handle_bug+0x3c/0x80 [ 301.567743] ? exc_invalid_op+0x17/0x70 [ 301.568038] ? asm_exc_invalid_op+0x1a/0x20 [ 301.568363] ? untrack_pfn+0xf4/0x100 [ 301.568660] ? untrack_pfn+0x65/0x100 [ 301.568947] unmap_single_vma+0xa6/0xe0 [ 301.569247] unmap_vmas+0xb5/0x190 [ 301.569532] exit_mmap+0xec/0x340 [ 301.569801] __mmput+0x3e/0x130 [ 301.570051] do_exit+0x305/0xaf0 ... Link: https://lkml.kernel.org/r/20240403212131.929421-3-david@redhat.com Signed-off-by: David Hildenbrand Reported-by: Wupeng Ma Closes: https://lkml.kernel.org/r/20240227122814.3781907-1-mawupeng1@huawei.com Fixes: b1a86e15dc03 ("x86, pat: remove the dependency on 'vm_pgoff' in track/untrack pfn vma routines") Fixes: 5899329b1910 ("x86: PAT: implement track/untrack of pfnmap regions for x86 - v3") Acked-by: Ingo Molnar Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pat/memtype.c | 49 ++++++++++++++++++++++++++++----------- mm/memory.c | 4 ++++ 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 66a209f7eb86..d6fe9093ea91 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -997,6 +997,38 @@ static void free_pfn_range(u64 paddr, unsigned long size) memtype_free(paddr, paddr + size); } +static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, + pgprot_t *pgprot) +{ + unsigned long prot; + + VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT)); + + /* + * We need the starting PFN and cachemode used for track_pfn_remap() + * that covered the whole VMA. For most mappings, we can obtain that + * information from the page tables. For COW mappings, we might now + * suddenly have anon folios mapped and follow_phys() will fail. + * + * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to + * detect the PFN. If we need the cachemode as well, we're out of luck + * for now and have to fail fork(). + */ + if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) { + if (pgprot) + *pgprot = __pgprot(prot); + return 0; + } + if (is_cow_mapping(vma->vm_flags)) { + if (pgprot) + return -EINVAL; + *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; + return 0; + } + WARN_ON_ONCE(1); + return -EINVAL; +} + /* * track_pfn_copy is called when vma that is covering the pfnmap gets * copied through copy_page_range(). @@ -1007,20 +1039,13 @@ static void free_pfn_range(u64 paddr, unsigned long size) int track_pfn_copy(struct vm_area_struct *vma) { resource_size_t paddr; - unsigned long prot; unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; if (vma->vm_flags & VM_PAT) { - /* - * reserve the whole chunk covered by vma. We need the - * starting address and protection from pte. - */ - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, &pgprot)) return -EINVAL; - } - pgprot = __pgprot(prot); + /* reserve the whole chunk covered by vma. */ return reserve_pfn_range(paddr, vma_size, &pgprot, 1); } @@ -1095,7 +1120,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, unsigned long size) { resource_size_t paddr; - unsigned long prot; if (vma && !(vma->vm_flags & VM_PAT)) return; @@ -1103,11 +1127,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, /* free the chunk starting from pfn or the whole chunk */ paddr = (resource_size_t)pfn << PAGE_SHIFT; if (!paddr && !size) { - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, NULL)) return; - } - size = vma->vm_end - vma->vm_start; } free_pfn_range(paddr, size); diff --git a/mm/memory.c b/mm/memory.c index fb83cf56377a..301c74c44438 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5593,6 +5593,10 @@ int follow_phys(struct vm_area_struct *vma, goto out; pte = *ptep; + /* Never return PFNs of anon folios in COW mappings. */ + if (vm_normal_folio(vma, address, pte)) + goto unlock; + if ((flags & FOLL_WRITE) && !pte_write(pte)) goto unlock; From 20a915154ccb88da08986ab6c9fc4c1cf6259de2 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 13 Mar 2024 14:48:27 +0100 Subject: [PATCH 111/553] x86/mce: Make sure to grab mce_sysfs_mutex in set_bank() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3ddf944b32f88741c303f0b21459dbb3872b8bc5 upstream. Modifying a MCA bank's MCA_CTL bits which control which error types to be reported is done over /sys/devices/system/machinecheck/ ├── machinecheck0 │   ├── bank0 │   ├── bank1 │   ├── bank10 │   ├── bank11 ... sysfs nodes by writing the new bit mask of events to enable. When the write is accepted, the kernel deletes all current timers and reinits all banks. Doing that in parallel can lead to initializing a timer which is already armed and in the timer wheel, i.e., in use already: ODEBUG: init active (active state 0) object: ffff888063a28000 object type: timer_list hint: mce_timer_fn+0x0/0x240 arch/x86/kernel/cpu/mce/core.c:2642 WARNING: CPU: 0 PID: 8120 at lib/debugobjects.c:514 debug_print_object+0x1a0/0x2a0 lib/debugobjects.c:514 Fix that by grabbing the sysfs mutex as the rest of the MCA sysfs code does. Reported by: Yue Sun Reported by: xingwei lee Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/CAEkJfYNiENwQY8yV1LYJ9LjJs%2Bx_-PqMv98gKig55=2vbzffRw@mail.gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mce/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index cad6ea1911e9..359218bc1b34 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -2471,12 +2471,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr, return -EINVAL; b = &per_cpu(mce_banks_array, s->id)[bank]; - if (!b->init) return -ENODEV; b->ctl = new; + + mutex_lock(&mce_sysfs_mutex); mce_restart(); + mutex_unlock(&mce_sysfs_mutex); return size; } From 22943e4fe4b3a2dcbadc3d38d5bf840bbdbfe374 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 26 Mar 2024 17:07:35 +0100 Subject: [PATCH 112/553] x86/coco: Require seeding RNG with RDRAND on CoCo systems commit 99485c4c026f024e7cb82da84c7951dbe3deb584 upstream. There are few uses of CoCo that don't rely on working cryptography and hence a working RNG. Unfortunately, the CoCo threat model means that the VM host cannot be trusted and may actively work against guests to extract secrets or manipulate computation. Since a malicious host can modify or observe nearly all inputs to guests, the only remaining source of entropy for CoCo guests is RDRAND. If RDRAND is broken -- due to CPU hardware fault -- the RNG as a whole is meant to gracefully continue on gathering entropy from other sources, but since there aren't other sources on CoCo, this is catastrophic. This is mostly a concern at boot time when initially seeding the RNG, as after that the consequences of a broken RDRAND are much more theoretical. So, try at boot to seed the RNG using 256 bits of RDRAND output. If this fails, panic(). This will also trigger if the system is booted without RDRAND, as RDRAND is essential for a safe CoCo boot. Add this deliberately to be "just a CoCo x86 driver feature" and not part of the RNG itself. Many device drivers and platforms have some desire to contribute something to the RNG, and add_device_randomness() is specifically meant for this purpose. Any driver can call it with seed data of any quality, or even garbage quality, and it can only possibly make the quality of the RNG better or have no effect, but can never make it worse. Rather than trying to build something into the core of the RNG, consider the particular CoCo issue just a CoCo issue, and therefore separate it all out into driver (well, arch/platform) code. [ bp: Massage commit message. ] Signed-off-by: Jason A. Donenfeld Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Elena Reshetova Reviewed-by: Kirill A. Shutemov Reviewed-by: Theodore Ts'o Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240326160735.73531-1-Jason@zx2c4.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/coco/core.c | 41 +++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/coco.h | 2 ++ arch/x86/kernel/setup.c | 2 ++ 3 files changed, 45 insertions(+) diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 1d3ad275c366..801e943fd2b2 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -3,13 +3,17 @@ * Confidential Computing Platform Capability checks * * Copyright (C) 2021 Advanced Micro Devices, Inc. + * Copyright (C) 2024 Jason A. Donenfeld . All Rights Reserved. * * Author: Tom Lendacky */ #include #include +#include +#include +#include #include #include @@ -128,3 +132,40 @@ u64 cc_mkdec(u64 val) } } EXPORT_SYMBOL_GPL(cc_mkdec); + +__init void cc_random_init(void) +{ + /* + * The seed is 32 bytes (in units of longs), which is 256 bits, which + * is the security level that the RNG is targeting. + */ + unsigned long rng_seed[32 / sizeof(long)]; + size_t i, longs; + + if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + return; + + /* + * Since the CoCo threat model includes the host, the only reliable + * source of entropy that can be neither observed nor manipulated is + * RDRAND. Usually, RDRAND failure is considered tolerable, but since + * CoCo guests have no other unobservable source of entropy, it's + * important to at least ensure the RNG gets some initial random seeds. + */ + for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) { + longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i); + + /* + * A zero return value means that the guest doesn't have RDRAND + * or the CPU is physically broken, and in both cases that + * means most crypto inside of the CoCo instance will be + * broken, defeating the purpose of CoCo in the first place. So + * just panic here because it's absolutely unsafe to continue + * executing. + */ + if (longs == 0) + panic("RDRAND is defective."); + } + add_device_randomness(rng_seed, sizeof(rng_seed)); + memzero_explicit(rng_seed, sizeof(rng_seed)); +} diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 60bb26097da1..1f97d00ad858 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -23,6 +23,7 @@ static inline void cc_set_mask(u64 mask) u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); +void cc_random_init(void); #else static inline u64 cc_mkenc(u64 val) { @@ -33,6 +34,7 @@ static inline u64 cc_mkdec(u64 val) { return val; } +static inline void cc_random_init(void) { } #endif #endif /* _ASM_X86_COCO_H */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d1ffac9ad611..18a034613d94 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1132,6 +1133,7 @@ void __init setup_arch(char **cmdline_p) * memory size. */ sev_setup_arch(); + cc_random_init(); efi_fake_memmap(); efi_find_mirror(); From 447d844a3e10e3751f9a2ba4965d6f48b909671c Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Tue, 26 Mar 2024 18:12:13 +0100 Subject: [PATCH 113/553] s390/entry: align system call table on 8 bytes commit 378ca2d2ad410a1cd5690d06b46c5e2297f4c8c0 upstream. Align system call table on 8 bytes. With sys_call_table entry size of 8 bytes that eliminates the possibility of a system call pointer crossing cache line boundary. Cc: stable@kernel.org Suggested-by: Ulrich Weigand Reviewed-by: Alexander Gordeev Signed-off-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/entry.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index d2a1f2f4f5b8..c9799dec9279 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -699,6 +699,7 @@ ENDPROC(stack_overflow) .Lthis_cpu: .short 0 .Lstosm_tmp: .byte 0 .section .rodata, "a" + .balign 8 #define SYSCALL(esame,emu) .quad __s390x_ ## esame .globl sys_call_table sys_call_table: From c88f7a709512b7ef384ffae20c26743f4f33d904 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 11 Mar 2024 19:19:13 -0700 Subject: [PATCH 114/553] riscv: Fix spurious errors from __get/put_kernel_nofault commit d080a08b06b6266cc3e0e86c5acfd80db937cb6b upstream. These macros did not initialize __kr_err, so they could fail even if the access did not fault. Cc: stable@vger.kernel.org Fixes: d464118cdc41 ("riscv: implement __get_kernel_nofault and __put_user_nofault") Signed-off-by: Samuel Holland Reviewed-by: Alexandre Ghiti Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20240312022030.320789-1-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index ec0cab9fbddd..72ec1d9bd3f3 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -319,7 +319,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) #define __get_kernel_nofault(dst, src, type, err_label) \ do { \ - long __kr_err; \ + long __kr_err = 0; \ \ __get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \ if (unlikely(__kr_err)) \ @@ -328,7 +328,7 @@ do { \ #define __put_kernel_nofault(dst, src, type, err_label) \ do { \ - long __kr_err; \ + long __kr_err = 0; \ \ __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \ if (unlikely(__kr_err)) \ From f6583444d7e78dae750798552b65a2519ff3ca84 Mon Sep 17 00:00:00 2001 From: Stefan O'Rear Date: Wed, 27 Mar 2024 02:12:58 -0400 Subject: [PATCH 115/553] riscv: process: Fix kernel gp leakage commit d14fa1fcf69db9d070e75f1c4425211fa619dfc8 upstream. childregs represents the registers which are active for the new thread in user context. For a kernel thread, childregs->gp is never used since the kernel gp is not touched by switch_to. For a user mode helper, the gp value can be observed in user space after execve or possibly by other means. [From the email thread] The /* Kernel thread */ comment is somewhat inaccurate in that it is also used for user_mode_helper threads, which exec a user process, e.g. /sbin/init or when /proc/sys/kernel/core_pattern is a pipe. Such threads do not have PF_KTHREAD set and are valid targets for ptrace etc. even before they exec. childregs is the *user* context during syscall execution and it is observable from userspace in at least five ways: 1. kernel_execve does not currently clear integer registers, so the starting register state for PID 1 and other user processes started by the kernel has sp = user stack, gp = kernel __global_pointer$, all other integer registers zeroed by the memset in the patch comment. This is a bug in its own right, but I'm unwilling to bet that it is the only way to exploit the issue addressed by this patch. 2. ptrace(PTRACE_GETREGSET): you can PTRACE_ATTACH to a user_mode_helper thread before it execs, but ptrace requires SIGSTOP to be delivered which can only happen at user/kernel boundaries. 3. /proc/*/task/*/syscall: this is perfectly happy to read pt_regs for user_mode_helpers before the exec completes, but gp is not one of the registers it returns. 4. PERF_SAMPLE_REGS_USER: LOCKDOWN_PERF normally prevents access to kernel addresses via PERF_SAMPLE_REGS_INTR, but due to this bug kernel addresses are also exposed via PERF_SAMPLE_REGS_USER which is permitted under LOCKDOWN_PERF. I have not attempted to write exploit code. 5. Much of the tracing infrastructure allows access to user registers. I have not attempted to determine which forms of tracing allow access to user registers without already allowing access to kernel registers. Fixes: 7db91e57a0ac ("RISC-V: Task implementation") Cc: stable@vger.kernel.org Signed-off-by: Stefan O'Rear Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240327061258.2370291-1-sorear@fastmail.com Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/process.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 8955f2432c2d..6906cc0e5787 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -25,8 +25,6 @@ #include #include -register unsigned long gp_in_global __asm__("gp"); - #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include unsigned long __stack_chk_guard __read_mostly; @@ -170,7 +168,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (unlikely(args->fn)) { /* Kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gp = gp_in_global; /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; From 8c99dfb49bdc17edffc7ff3d46b400c8c291686c Mon Sep 17 00:00:00 2001 From: Ritvik Budhiraja Date: Tue, 2 Apr 2024 14:01:28 -0500 Subject: [PATCH 116/553] smb3: retrying on failed server close commit 173217bd73365867378b5e75a86f0049e1069ee8 upstream. In the current implementation, CIFS close sends a close to the server and does not check for the success of the server close. This patch adds functionality to check for server close return status and retries in case of an EBUSY or EAGAIN error. This can help avoid handle leaks Cc: stable@vger.kernel.org Signed-off-by: Ritvik Budhiraja Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cached_dir.c | 6 ++-- fs/smb/client/cifsfs.c | 11 +++++++ fs/smb/client/cifsglob.h | 7 +++-- fs/smb/client/file.c | 63 ++++++++++++++++++++++++++++++++++---- fs/smb/client/smb1ops.c | 4 +-- fs/smb/client/smb2ops.c | 9 +++--- fs/smb/client/smb2pdu.c | 2 +- 7 files changed, 85 insertions(+), 17 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 86fe433b1d32..f4ad343b06c1 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -370,6 +370,7 @@ smb2_close_cached_fid(struct kref *ref) { struct cached_fid *cfid = container_of(ref, struct cached_fid, refcount); + int rc; spin_lock(&cfid->cfids->cfid_list_lock); if (cfid->on_list) { @@ -383,9 +384,10 @@ smb2_close_cached_fid(struct kref *ref) cfid->dentry = NULL; if (cfid->is_open) { - SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, + rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, cfid->fid.volatile_fid); - atomic_dec(&cfid->tcon->num_remote_opens); + if (rc != -EBUSY && rc != -EAGAIN) + atomic_dec(&cfid->tcon->num_remote_opens); } free_cached_dir(cfid); diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 7286a56aebfa..0a79771c8f33 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -154,6 +154,7 @@ struct workqueue_struct *decrypt_wq; struct workqueue_struct *fileinfo_put_wq; struct workqueue_struct *cifsoplockd_wq; struct workqueue_struct *deferredclose_wq; +struct workqueue_struct *serverclose_wq; __u32 cifs_lock_secret; /* @@ -1866,6 +1867,13 @@ init_cifs(void) goto out_destroy_cifsoplockd_wq; } + serverclose_wq = alloc_workqueue("serverclose", + WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + if (!serverclose_wq) { + rc = -ENOMEM; + goto out_destroy_serverclose_wq; + } + rc = cifs_init_inodecache(); if (rc) goto out_destroy_deferredclose_wq; @@ -1940,6 +1948,8 @@ out_destroy_decrypt_wq: destroy_workqueue(decrypt_wq); out_destroy_cifsiod_wq: destroy_workqueue(cifsiod_wq); +out_destroy_serverclose_wq: + destroy_workqueue(serverclose_wq); out_clean_proc: cifs_proc_clean(); return rc; @@ -1969,6 +1979,7 @@ exit_cifs(void) destroy_workqueue(cifsoplockd_wq); destroy_workqueue(decrypt_wq); destroy_workqueue(fileinfo_put_wq); + destroy_workqueue(serverclose_wq); destroy_workqueue(cifsiod_wq); cifs_proc_clean(); } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 58bb54994e22..5cabf144e485 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -389,10 +389,10 @@ struct smb_version_operations { /* set fid protocol-specific info */ void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); /* close a file */ - void (*close)(const unsigned int, struct cifs_tcon *, + int (*close)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* close a file, returning file attributes and timestamps */ - void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, + int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *pfile_info); /* send a flush request to the server */ int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); @@ -1359,6 +1359,7 @@ struct cifsFileInfo { bool invalidHandle:1; /* file closed via session abend */ bool swapfile:1; bool oplock_break_cancelled:1; + bool offload:1; /* offload final part of _put to a wq */ unsigned int oplock_epoch; /* epoch from the lease break */ __u32 oplock_level; /* oplock/lease level from the lease break */ int count; @@ -1367,6 +1368,7 @@ struct cifsFileInfo { struct cifs_search_info srch_inf; struct work_struct oplock_break; /* work for oplock breaks */ struct work_struct put; /* work for the final part of _put */ + struct work_struct serverclose; /* work for serverclose */ struct delayed_work deferred; bool deferred_close_scheduled; /* Flag to indicate close is scheduled */ char *symlink_target; @@ -2005,6 +2007,7 @@ extern struct workqueue_struct *decrypt_wq; extern struct workqueue_struct *fileinfo_put_wq; extern struct workqueue_struct *cifsoplockd_wq; extern struct workqueue_struct *deferredclose_wq; +extern struct workqueue_struct *serverclose_wq; extern __u32 cifs_lock_secret; extern mempool_t *cifs_mid_poolp; diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index c240cea7ca34..d23dfc83de50 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -330,6 +330,7 @@ cifs_down_write(struct rw_semaphore *sem) } static void cifsFileInfo_put_work(struct work_struct *work); +void serverclose_work(struct work_struct *work); struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, __u32 oplock, @@ -376,6 +377,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, cfile->tlink = cifs_get_tlink(tlink); INIT_WORK(&cfile->oplock_break, cifs_oplock_break); INIT_WORK(&cfile->put, cifsFileInfo_put_work); + INIT_WORK(&cfile->serverclose, serverclose_work); INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); mutex_init(&cfile->fh_mutex); spin_lock_init(&cfile->file_info_lock); @@ -467,6 +469,40 @@ static void cifsFileInfo_put_work(struct work_struct *work) cifsFileInfo_put_final(cifs_file); } +void serverclose_work(struct work_struct *work) +{ + struct cifsFileInfo *cifs_file = container_of(work, + struct cifsFileInfo, serverclose); + + struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); + + struct TCP_Server_Info *server = tcon->ses->server; + int rc = 0; + int retries = 0; + int MAX_RETRIES = 4; + + do { + if (server->ops->close_getattr) + rc = server->ops->close_getattr(0, tcon, cifs_file); + else if (server->ops->close) + rc = server->ops->close(0, tcon, &cifs_file->fid); + + if (rc == -EBUSY || rc == -EAGAIN) { + retries++; + msleep(250); + } + } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES) + ); + + if (retries == MAX_RETRIES) + pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES); + + if (cifs_file->offload) + queue_work(fileinfo_put_wq, &cifs_file->put); + else + cifsFileInfo_put_final(cifs_file); +} + /** * cifsFileInfo_put - release a reference of file priv data * @@ -507,10 +543,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, struct cifs_fid fid = {}; struct cifs_pending_open open; bool oplock_break_cancelled; + bool serverclose_offloaded = false; spin_lock(&tcon->open_file_lock); spin_lock(&cifsi->open_file_lock); spin_lock(&cifs_file->file_info_lock); + + cifs_file->offload = offload; if (--cifs_file->count > 0) { spin_unlock(&cifs_file->file_info_lock); spin_unlock(&cifsi->open_file_lock); @@ -552,13 +591,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, if (!tcon->need_reconnect && !cifs_file->invalidHandle) { struct TCP_Server_Info *server = tcon->ses->server; unsigned int xid; + int rc = 0; xid = get_xid(); if (server->ops->close_getattr) - server->ops->close_getattr(xid, tcon, cifs_file); + rc = server->ops->close_getattr(xid, tcon, cifs_file); else if (server->ops->close) - server->ops->close(xid, tcon, &cifs_file->fid); + rc = server->ops->close(xid, tcon, &cifs_file->fid); _free_xid(xid); + + if (rc == -EBUSY || rc == -EAGAIN) { + // Server close failed, hence offloading it as an async op + queue_work(serverclose_wq, &cifs_file->serverclose); + serverclose_offloaded = true; + } } if (oplock_break_cancelled) @@ -566,10 +612,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, cifs_del_pending_open(&open); - if (offload) - queue_work(fileinfo_put_wq, &cifs_file->put); - else - cifsFileInfo_put_final(cifs_file); + // if serverclose has been offloaded to wq (on failure), it will + // handle offloading put as well. If serverclose not offloaded, + // we need to handle offloading put here. + if (!serverclose_offloaded) { + if (offload) + queue_work(fileinfo_put_wq, &cifs_file->put); + else + cifsFileInfo_put_final(cifs_file); + } } int cifs_open(struct inode *inode, struct file *file) diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 7d1b3fc014d9..d4045925f857 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -750,11 +750,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode); } -static void +static int cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid) { - CIFSSMBClose(xid, tcon, fid->netfid); + return CIFSSMBClose(xid, tcon, fid->netfid); } static int diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 34d1262004df..3c471dc90659 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1392,14 +1392,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) memcpy(cfile->fid.create_guid, fid->create_guid, 16); } -static void +static int smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid) { - SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); + return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); } -static void +static int smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *cfile) { @@ -1410,7 +1410,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, &file_inf); if (rc) - return; + return rc; inode = d_inode(cfile->dentry); @@ -1436,6 +1436,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, /* End of file and Attributes should not have to be updated on close */ spin_unlock(&inode->i_lock); + return rc; } static int diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 4c1231496a72..cc425a616899 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -3452,9 +3452,9 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, memcpy(&pbuf->network_open_info, &rsp->network_open_info, sizeof(pbuf->network_open_info)); + atomic_dec(&tcon->num_remote_opens); } - atomic_dec(&tcon->num_remote_opens); close_exit: SMB2_close_free(&rqst); free_rsp_buf(resp_buftype, rsp); From 229042314602db62559ecacba127067c22ee7b88 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 2 Apr 2024 16:33:53 -0300 Subject: [PATCH 117/553] smb: client: fix potential UAF in cifs_debug_files_proc_show() commit ca545b7f0823f19db0f1148d59bc5e1a56634502 upstream. Skip sessions that are being teared down (status == SES_EXITING) to avoid UAF. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifs_debug.c | 2 ++ fs/smb/client/cifsglob.h | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 5df8d9323337..fa45b3e7efe8 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -186,6 +186,8 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); list_for_each_entry(cfile, &tcon->openFileList, tlist) { diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 5cabf144e485..e5a72f9c793e 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -2178,4 +2178,14 @@ static inline struct scatterlist *cifs_sg_set_buf(struct scatterlist *sg, return sg; } +static inline bool cifs_ses_exiting(struct cifs_ses *ses) +{ + bool ret; + + spin_lock(&ses->ses_lock); + ret = ses->ses_status == SES_EXITING; + spin_unlock(&ses->ses_lock); + return ret; +} + #endif /* _CIFS_GLOB_H */ From 8fefd166fcb368c5fcf48238e3f7c8af829e0a72 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 2 Apr 2024 16:33:55 -0300 Subject: [PATCH 118/553] smb: client: fix potential UAF in cifs_stats_proc_write() commit d3da25c5ac84430f89875ca7485a3828150a7e0a upstream. Skip sessions that are being teared down (status == SES_EXITING) to avoid UAF. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifs_debug.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index fa45b3e7efe8..6a9320a747e2 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -568,6 +568,8 @@ static ssize_t cifs_stats_proc_write(struct file *file, } #endif /* CONFIG_CIFS_STATS2 */ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { atomic_set(&tcon->num_smbs_sent, 0); spin_lock(&tcon->stat_lock); From 16b7d785775eb03929766819415055e367398f49 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 2 Apr 2024 16:33:56 -0300 Subject: [PATCH 119/553] smb: client: fix potential UAF in cifs_stats_proc_show() commit 0865ffefea197b437ba78b5dd8d8e256253efd65 upstream. Skip sessions that are being teared down (status == SES_EXITING) to avoid UAF. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifs_debug.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 6a9320a747e2..a2afdf9c5f80 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -648,6 +648,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v) } #endif /* STATS2 */ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { i++; seq_printf(m, "\n%d) %s", i, tcon->tree_name); From 84488466b7a69570bdbf76dd9576847ab97d54e7 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 2 Apr 2024 16:33:59 -0300 Subject: [PATCH 120/553] smb: client: fix potential UAF in smb2_is_valid_oplock_break() commit 22863485a4626ec6ecf297f4cc0aef709bc862e4 upstream. Skip sessions that are being teared down (status == SES_EXITING) to avoid UAF. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2misc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 15fa022e7999..4d2e8b390e10 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -697,6 +697,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); From c868cabdf6fdd61bea54532271f4708254e57fc5 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 2 Apr 2024 16:33:58 -0300 Subject: [PATCH 121/553] smb: client: fix potential UAF in smb2_is_valid_lease_break() commit 705c76fbf726c7a2f6ff9143d4013b18daaaebf1 upstream. Skip sessions that are being teared down (status == SES_EXITING) to avoid UAF. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2misc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 4d2e8b390e10..8c149cb531d3 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -622,6 +622,8 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); cifs_stats_inc( From 494c91e1e9413b407d12166a61b84200d4d54fac Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 2 Apr 2024 16:34:00 -0300 Subject: [PATCH 122/553] smb: client: fix potential UAF in is_valid_oplock_break() commit 69ccf040acddf33a3a85ec0f6b45ef84b0f7ec29 upstream. Skip sessions that are being teared down (status == SES_EXITING) to avoid UAF. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/misc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 41290c12d0bc..3826f7176608 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -476,6 +476,8 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid != buf->Tid) continue; From f9414004798d9742c1af23a1d839fe6a9503751c Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 2 Apr 2024 16:34:02 -0300 Subject: [PATCH 123/553] smb: client: fix potential UAF in smb2_is_network_name_deleted() commit 63981561ffd2d4987807df4126f96a11e18b0c1d upstream. Skip sessions that are being teared down (status == SES_EXITING) to avoid UAF. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 3c471dc90659..2291081653a8 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2437,6 +2437,8 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) { spin_lock(&tcon->tc_lock); From 7e8360ac8774e19b0b25f44fff84a105bb2417e4 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 2 Apr 2024 16:34:04 -0300 Subject: [PATCH 124/553] smb: client: fix potential UAF in cifs_signal_cifsd_for_reconnect() commit e0e50401cc3921c9eaf1b0e667db174519ea939f upstream. Skip sessions that are being teared down (status == SES_EXITING) to avoid UAF. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/connect.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 49fdc6dfdcf8..8c2a784200ec 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -216,6 +216,8 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; spin_lock(&ses->chan_lock); for (i = 0; i < ses->chan_count; i++) { spin_lock(&ses->chans[i].server->srv_lock); From 08ef93ebc73c12d8c6249b28bf729924e8ff9142 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 29 Mar 2024 13:08:53 +0100 Subject: [PATCH 125/553] selftests: mptcp: join: fix dev in check_endpoint commit 40061817d95bce6dd5634a61a65cd5922e6ccc92 upstream. There's a bug in pm_nl_check_endpoint(), 'dev' didn't be parsed correctly. If calling it in the 2nd test of endpoint_tests() too, it fails with an error like this: creation [FAIL] expected '10.0.2.2 id 2 subflow dev dev' \ found '10.0.2.2 id 2 subflow dev ns2eth2' The reason is '$2' should be set to 'dev', not '$1'. This patch fixes it. Fixes: 69c6ce7b6eca ("selftests: mptcp: add implicit endpoint test case") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240329-upstream-net-20240329-fallback-mib-v1-2-324a8981da48@kernel.org Signed-off-by: Jakub Kicinski [ Conflicts in mptcp_join.sh: only the fix has been added, not the verification because this modified subtest is quite different in v6.1: to add this verification, we would need to change a bit the subtest: pm_nl_check_endpoint() takes an extra argument for the title, the next chk_subflow_nr() will no longer need the title, etc. Easier with only the fix without the extra test. ] Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index a20dca9d26d6..0b433606a298 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -725,7 +725,7 @@ pm_nl_check_endpoint() [ -n "$_flags" ]; flags="flags $_flags" shift elif [ $1 = "dev" ]; then - [ -n "$2" ]; dev="dev $1" + [ -n "$2" ]; dev="dev $2" shift elif [ $1 = "id" ]; then _id=$2 From d1fefedc1a0e31cc8001f7702a2c51d07343a499 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 29 Mar 2024 13:08:52 +0100 Subject: [PATCH 126/553] mptcp: don't account accept() of non-MPC client as fallback to TCP commit 7a1b3490f47e88ec4cbde65f1a77a0f4bc972282 upstream. Current MPTCP servers increment MPTcpExtMPCapableFallbackACK when they accept non-MPC connections. As reported by Christoph, this is "surprising" because the counter might become greater than MPTcpExtMPCapableSYNRX. MPTcpExtMPCapableFallbackACK counter's name suggests it should only be incremented when a connection was seen using MPTCP options, then a fallback to TCP has been done. Let's do that by incrementing it when the subflow context of an inbound MPC connection attempt is dropped. Also, update mptcp_connect.sh kselftest, to ensure that the above MIB does not increment in case a pure TCP client connects to a MPTCP server. Fixes: fc518953bc9c ("mptcp: add and use MIB counter infrastructure") Cc: stable@vger.kernel.org Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/449 Signed-off-by: Davide Caratti Reviewed-by: Mat Martineau Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240329-upstream-net-20240329-fallback-mib-v1-1-324a8981da48@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 3 --- net/mptcp/subflow.c | 2 ++ tools/testing/selftests/net/mptcp/mptcp_connect.sh | 7 +++++++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3bc21581486a..c652c8ca765c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3349,9 +3349,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, newsk = new_mptcp_sock; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK); - } else { - MPTCP_INC_STATS(sock_net(sk), - MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); } out: diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 891c2f4fed08..f1d422396b28 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -816,6 +816,8 @@ dispose_child: return child; fallback: + if (fallback) + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); mptcp_subflow_drop_ctx(child); return child; } diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 18c9b00ca058..dacf4cf2246d 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -439,6 +439,7 @@ do_transfer() local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr") local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr") + local stat_tcpfb_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") timeout ${timeout_test} \ ip netns exec ${listener_ns} \ @@ -504,6 +505,7 @@ do_transfer() local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue") + local stat_tcpfb_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -548,6 +550,11 @@ do_transfer() fi fi + if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then + printf "[ FAIL ]\nunexpected fallback to TCP" + rets=1 + fi + if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then printf "[ OK ]" fi From a6dc534c073b529dfd39683f63dd87ab30261f4b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 25 Oct 2023 16:37:11 -0700 Subject: [PATCH 127/553] selftests: mptcp: display simult in extra_msg commit 629b35a225b0d49fbcff3b5c22e3b983c7c7b36f upstream. Just like displaying "invert" after "Info: ", "simult" should be displayed too when rm_subflow_nr doesn't match the expect value in chk_rm_nr(): syn [ ok ] synack [ ok ] ack [ ok ] add [ ok ] echo [ ok ] rm [ ok ] rmsf [ ok ] 3 in [2:4] Info: invert simult syn [ ok ] synack [ ok ] ack [ ok ] add [ ok ] echo [ ok ] rm [ ok ] rmsf [ ok ] Info: invert Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231025-send-net-next-20231025-v1-10-db8f25f798eb@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 0b433606a298..635a1624b47d 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1771,7 +1771,10 @@ chk_rm_nr() # in case of simult flush, the subflow removal count on each side is # unreliable count=$((count + cnt)) - [ "$count" != "$rm_subflow_nr" ] && suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" + if [ "$count" != "$rm_subflow_nr" ]; then + suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" + extra_msg="$extra_msg simult" + fi if [ $count -ge "$rm_subflow_nr" ] && \ [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then echo -n "[ ok ] $suffix" From 9c2b4b657739ecda38e3b383354a29566955ac48 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 26 Mar 2024 15:32:08 +0100 Subject: [PATCH 128/553] mm/secretmem: fix GUP-fast succeeding on secretmem folios commit 65291dcfcf8936e1b23cfd7718fdfde7cfaf7706 upstream. folio_is_secretmem() currently relies on secretmem folios being LRU folios, to save some cycles. However, folios might reside in a folio batch without the LRU flag set, or temporarily have their LRU flag cleared. Consequently, the LRU flag is unreliable for this purpose. In particular, this is the case when secretmem_fault() allocates a fresh page and calls filemap_add_folio()->folio_add_lru(). The folio might be added to the per-cpu folio batch and won't get the LRU flag set until the batch was drained using e.g., lru_add_drain(). Consequently, folio_is_secretmem() might not detect secretmem folios and GUP-fast can succeed in grabbing a secretmem folio, crashing the kernel when we would later try reading/writing to the folio, because the folio has been unmapped from the directmap. Fix it by removing that unreliable check. Link: https://lkml.kernel.org/r/20240326143210.291116-2-david@redhat.com Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas") Signed-off-by: David Hildenbrand Reported-by: xingwei lee Reported-by: yue sun Closes: https://lore.kernel.org/lkml/CABOYnLyevJeravW=QrH0JUPYEcDN160aZFb7kwndm-J2rmz0HQ@mail.gmail.com/ Debugged-by: Miklos Szeredi Tested-by: Miklos Szeredi Reviewed-by: Mike Rapoport (IBM) Cc: Lorenzo Stoakes Cc: Signed-off-by: Andrew Morton Signed-off-by: David Hildenbrand Signed-off-by: Greg Kroah-Hartman --- include/linux/secretmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h index 988528b5da43..48ffe325184c 100644 --- a/include/linux/secretmem.h +++ b/include/linux/secretmem.h @@ -14,10 +14,10 @@ static inline bool page_is_secretmem(struct page *page) * Using page_mapping() is quite slow because of the actual call * instruction and repeated compound_head(page) inside the * page_mapping() function. - * We know that secretmem pages are not compound and LRU so we can + * We know that secretmem pages are not compound, so we can * save a couple of cycles here. */ - if (PageCompound(page) || !PageLRU(page)) + if (PageCompound(page)) return false; mapping = (struct address_space *) From 5e6898b8544dbb52755cee40c0c360a4bfde69e6 Mon Sep 17 00:00:00 2001 From: "min15.li" Date: Fri, 26 May 2023 17:06:56 +0000 Subject: [PATCH 129/553] nvme: fix miss command type check commit 31a5978243d24d77be4bacca56c78a0fbc43b00d upstream. In the function nvme_passthru_end(), only the value of the command opcode is checked, without checking the command type (IO command or Admin command). When we send a Dataset Management command (The opcode of the Dataset Management command is the same as the Set Feature command), kernel thinks it is a set feature command, then sets the controller's keep alive interval, and calls nvme_keep_alive_work(). Signed-off-by: min15.li Reviewed-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Fixes: b58da2d270db ("nvme: update keep alive interval when kato is modified") Signed-off-by: Tokunori Ikegami Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/core.c | 4 +++- drivers/nvme/host/ioctl.c | 3 ++- drivers/nvme/host/nvme.h | 2 +- drivers/nvme/target/passthru.c | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d7516e99275b..20160683e868 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1151,7 +1151,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return effects; } -void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, +void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects, struct nvme_command *cmd, int status) { if (effects & NVME_CMD_EFFECTS_CSE_MASK) { @@ -1167,6 +1167,8 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, nvme_queue_scan(ctrl); flush_work(&ctrl->scan_work); } + if (ns) + return; switch (cmd->common.opcode) { case nvme_admin_set_features: diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 91e6d0347579..b3e322e4ade3 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -147,6 +147,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, u64 *result, unsigned timeout, bool vec) { + struct nvme_ns *ns = q->queuedata; struct nvme_ctrl *ctrl; struct request *req; void *meta = NULL; @@ -181,7 +182,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, blk_mq_free_request(req); if (effects) - nvme_passthru_end(ctrl, effects, cmd, ret); + nvme_passthru_end(ctrl, ns, effects, cmd, ret); return ret; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a892d679e338..8e28d2de45c0 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1063,7 +1063,7 @@ static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {}; u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); int nvme_execute_passthru_rq(struct request *rq, u32 *effects); -void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, +void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects, struct nvme_command *cmd, int status); struct nvme_ctrl *nvme_ctrl_from_file(struct file *file); struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid); diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index adc0958755d6..a0a292d49588 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -216,6 +216,7 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) struct nvmet_req *req = container_of(w, struct nvmet_req, p.work); struct request *rq = req->p.rq; struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl; + struct nvme_ns *ns = rq->q->queuedata; u32 effects; int status; @@ -242,7 +243,7 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) blk_mq_free_request(rq); if (effects) - nvme_passthru_end(ctrl, effects, req->cmd, status); + nvme_passthru_end(ctrl, ns, effects, req->cmd, status); } static enum rq_end_io_ret nvmet_passthru_req_done(struct request *rq, From fd52c0397b53ebcd4931981b3bc38f3b760b74df Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 5 Apr 2024 11:14:13 -0700 Subject: [PATCH 130/553] x86/bugs: Change commas to semicolons in 'spectre_v2' sysfs file commit 0cd01ac5dcb1e18eb18df0f0d05b5de76522a437 upstream. Change the format of the 'spectre_v2' vulnerabilities sysfs file slightly by converting the commas to semicolons, so that mitigations for future variants can be grouped together and separated by commas. Signed-off-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e3fec47a800b..4a1b4dee1425 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2674,15 +2674,15 @@ static char *stibp_state(void) switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: - return ", STIBP: disabled"; + return "; STIBP: disabled"; case SPECTRE_V2_USER_STRICT: - return ", STIBP: forced"; + return "; STIBP: forced"; case SPECTRE_V2_USER_STRICT_PREFERRED: - return ", STIBP: always-on"; + return "; STIBP: always-on"; case SPECTRE_V2_USER_PRCTL: case SPECTRE_V2_USER_SECCOMP: if (static_key_enabled(&switch_to_cond_stibp)) - return ", STIBP: conditional"; + return "; STIBP: conditional"; } return ""; } @@ -2691,10 +2691,10 @@ static char *ibpb_state(void) { if (boot_cpu_has(X86_FEATURE_IBPB)) { if (static_key_enabled(&switch_mm_always_ibpb)) - return ", IBPB: always-on"; + return "; IBPB: always-on"; if (static_key_enabled(&switch_mm_cond_ibpb)) - return ", IBPB: conditional"; - return ", IBPB: disabled"; + return "; IBPB: conditional"; + return "; IBPB: disabled"; } return ""; } @@ -2704,11 +2704,11 @@ static char *pbrsb_eibrs_state(void) if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) - return ", PBRSB-eIBRS: SW sequence"; + return "; PBRSB-eIBRS: SW sequence"; else - return ", PBRSB-eIBRS: Vulnerable"; + return "; PBRSB-eIBRS: Vulnerable"; } else { - return ", PBRSB-eIBRS: Not affected"; + return "; PBRSB-eIBRS: Not affected"; } } @@ -2727,9 +2727,9 @@ static ssize_t spectre_v2_show_state(char *buf) return sysfs_emit(buf, "%s%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "", stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "", pbrsb_eibrs_state(), spectre_v2_module_string()); } From 74fcb181772e5b8a8f1244c7393c56ae6d03c330 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 3 Apr 2024 16:36:44 -0700 Subject: [PATCH 131/553] x86/syscall: Don't force use of indirect calls for system calls commit 1e3ad78334a69b36e107232e337f9d693dcc9df2 upstream. Make build a switch statement instead, and the compiler can either decide to generate an indirect jump, or - more likely these days due to mitigations - just a series of conditional branches. Yes, the conditional branches also have branch prediction, but the branch prediction is much more controlled, in that it just causes speculatively running the wrong system call (harmless), rather than speculatively running possibly wrong random less controlled code gadgets. This doesn't mitigate other indirect calls, but the system call indirection is the first and most easily triggered case. Signed-off-by: Linus Torvalds Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/common.c | 6 +++--- arch/x86/entry/syscall_32.c | 21 +++++++++++++++++++-- arch/x86/entry/syscall_64.c | 19 +++++++++++++++++-- arch/x86/entry/syscall_x32.c | 10 +++++++--- arch/x86/include/asm/syscall.h | 10 ++++------ 5 files changed, 50 insertions(+), 16 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 9c0b26ae5106..a60d19228890 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -48,7 +48,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr) if (likely(unr < NR_syscalls)) { unr = array_index_nospec(unr, NR_syscalls); - regs->ax = sys_call_table[unr](regs); + regs->ax = x64_sys_call(regs, unr); return true; } return false; @@ -65,7 +65,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { xnr = array_index_nospec(xnr, X32_NR_syscalls); - regs->ax = x32_sys_call_table[xnr](regs); + regs->ax = x32_sys_call(regs, xnr); return true; } return false; @@ -114,7 +114,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) if (likely(unr < IA32_NR_syscalls)) { unr = array_index_nospec(unr, IA32_NR_syscalls); - regs->ax = ia32_sys_call_table[unr](regs); + regs->ax = ia32_sys_call(regs, unr); } else if (nr != -1) { regs->ax = __ia32_sys_ni_syscall(regs); } diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 8cfc9bc73e7f..c2235bae17ef 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -18,8 +18,25 @@ #include #undef __SYSCALL +/* + * The sys_call_table[] is no longer used for system calls, but + * kernel/trace/trace_syscalls.c still wants to know the system + * call address. + */ +#ifdef CONFIG_X86_32 #define __SYSCALL(nr, sym) __ia32_##sym, - -__visible const sys_call_ptr_t ia32_sys_call_table[] = { +const sys_call_ptr_t sys_call_table[] = { #include }; +#undef __SYSCALL +#endif + +#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs); + +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include + default: return __ia32_sys_ni_syscall(regs); + } +}; diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index be120eec1fc9..33b3f09e6f15 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -11,8 +11,23 @@ #include #undef __SYSCALL +/* + * The sys_call_table[] is no longer used for system calls, but + * kernel/trace/trace_syscalls.c still wants to know the system + * call address. + */ #define __SYSCALL(nr, sym) __x64_##sym, - -asmlinkage const sys_call_ptr_t sys_call_table[] = { +const sys_call_ptr_t sys_call_table[] = { #include }; +#undef __SYSCALL + +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); + +long x64_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include + default: return __x64_sys_ni_syscall(regs); + } +}; diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c index bdd0e03a1265..03de4a932131 100644 --- a/arch/x86/entry/syscall_x32.c +++ b/arch/x86/entry/syscall_x32.c @@ -11,8 +11,12 @@ #include #undef __SYSCALL -#define __SYSCALL(nr, sym) __x64_##sym, +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); -asmlinkage const sys_call_ptr_t x32_sys_call_table[] = { -#include +long x32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include + default: return __x64_sys_ni_syscall(regs); + } }; diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 5b85987a5e97..8f0da479c77d 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -16,19 +16,17 @@ #include /* for TS_COMPAT */ #include +/* This is used purely for kernel/trace/trace_syscalls.c */ typedef long (*sys_call_ptr_t)(const struct pt_regs *); extern const sys_call_ptr_t sys_call_table[]; -#if defined(CONFIG_X86_32) -#define ia32_sys_call_table sys_call_table -#else /* * These may not exist, but still put the prototypes in so we * can use IS_ENABLED(). */ -extern const sys_call_ptr_t ia32_sys_call_table[]; -extern const sys_call_ptr_t x32_sys_call_table[]; -#endif +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x64_sys_call(const struct pt_regs *, unsigned int nr); /* * Only the low 32 bits of orig_ax are meaningful, so we return int. From 07dbb10f153f483e8249acebdffedf922e2ec2e1 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 08:56:58 -0700 Subject: [PATCH 132/553] x86/bhi: Add support for clearing branch history at syscall entry commit 7390db8aea0d64e9deb28b8e1ce716f5020c7ee5 upstream. Branch History Injection (BHI) attacks may allow a malicious application to influence indirect branch prediction in kernel by poisoning the branch history. eIBRS isolates indirect branch targets in ring0. The BHB can still influence the choice of indirect branch predictor entry, and although branch predictor entries are isolated between modes when eIBRS is enabled, the BHB itself is not isolated between modes. Alder Lake and new processors supports a hardware control BHI_DIS_S to mitigate BHI. For older processors Intel has released a software sequence to clear the branch history on parts that don't support BHI_DIS_S. Add support to execute the software sequence at syscall entry and VMexit to overwrite the branch history. For now, branch history is not cleared at interrupt entry, as malicious applications are not believed to have sufficient control over the registers, since previous register state is cleared at interrupt entry. Researchers continue to poke at this area and it may become necessary to clear at interrupt entry as well in the future. This mitigation is only defined here. It is enabled later. Signed-off-by: Pawan Gupta Co-developed-by: Daniel Sneddon Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/common.c | 4 +- arch/x86/entry/entry_64.S | 61 ++++++++++++++++++++++++++++ arch/x86/entry/entry_64_compat.S | 16 ++++++++ arch/x86/include/asm/cpufeatures.h | 3 +- arch/x86/include/asm/nospec-branch.h | 12 ++++++ arch/x86/include/asm/syscall.h | 1 + arch/x86/kvm/vmx/vmenter.S | 2 + 7 files changed, 96 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index a60d19228890..e72dac092245 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -141,7 +141,7 @@ static __always_inline bool int80_is_external(void) } /** - * int80_emulation - 32-bit legacy syscall entry + * do_int80_emulation - 32-bit legacy syscall C entry from asm * * This entry point can be used by 32-bit and 64-bit programs to perform * 32-bit system calls. Instances of INT $0x80 can be found inline in @@ -159,7 +159,7 @@ static __always_inline bool int80_is_external(void) * eax: system call number * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 */ -DEFINE_IDTENTRY_RAW(int80_emulation) +__visible noinstr void do_int80_emulation(struct pt_regs *regs) { int nr; diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index c2383c2880ec..6624806e6904 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) /* clobbers %rax, make sure it is after saving the syscall nr */ IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY call do_syscall_64 /* returns with IRQs disabled */ @@ -1539,3 +1540,63 @@ SYM_CODE_START(rewind_stack_and_make_dead) call make_task_dead SYM_CODE_END(rewind_stack_and_make_dead) .popsection + +/* + * This sequence executes branches in order to remove user branch information + * from the branch history tracker in the Branch Predictor, therefore removing + * user influence on subsequent BTB lookups. + * + * It should be used on parts prior to Alder Lake. Newer parts should use the + * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being + * virtualized on newer hardware the VMM should protect against BHI attacks by + * setting BHI_DIS_S for the guests. + * + * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging + * and not clearing the branch history. The call tree looks like: + * + * call 1 + * call 2 + * call 2 + * call 2 + * call 2 + * call 2 + * ret + * ret + * ret + * ret + * ret + * ret + * + * This means that the stack is non-constant and ORC can't unwind it with %rsp + * alone. Therefore we unconditionally set up the frame pointer, which allows + * ORC to unwind properly. + * + * The alignment is for performance and not for safety, and may be safely + * refactored in the future if needed. + */ +SYM_FUNC_START(clear_bhb_loop) + push %rbp + mov %rsp, %rbp + movl $5, %ecx + ANNOTATE_INTRA_FUNCTION_CALL + call 1f + jmp 5f + .align 64, 0xcc + ANNOTATE_INTRA_FUNCTION_CALL +1: call 2f + RET + .align 64, 0xcc +2: movl $5, %eax +3: jmp 4f + nop +4: sub $1, %eax + jnz 3b + sub $1, %ecx + jnz 1b + RET +5: lfence + pop %rbp + RET +SYM_FUNC_END(clear_bhb_loop) +EXPORT_SYMBOL_GPL(clear_bhb_loop) +STACK_FRAME_NON_STANDARD(clear_bhb_loop) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 4bcd009a232b..b14b8cd85eb2 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY /* * SYSENTER doesn't filter flags, so we need to clear NT and AC @@ -210,6 +211,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY movq %rsp, %rdi call do_fast_syscall_32 @@ -278,3 +280,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR int3 SYM_CODE_END(entry_SYSCALL_compat) + +/* + * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries + * point to C routines, however since this is a system call interface the branch + * history needs to be scrubbed to protect against BHI attacks, and that + * scrubbing needs to take place in assembly code prior to entering any C + * routines. + */ +SYM_CODE_START(int80_emulation) + ANNOTATE_NOENDBR + UNWIND_HINT_FUNC + CLEAR_BRANCH_HISTORY + jmp do_int80_emulation +SYM_CODE_END(int80_emulation) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 1280daa72975..d1e179653821 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -434,11 +434,12 @@ /* * Extended auxiliary flags: Linux defined - for features scattered in various - * CPUID levels like 0x80000022, etc. + * CPUID levels like 0x80000022, etc and Linux defined features. * * Reuse free bits when adding new feature flags! */ #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ /* * BUG word(s) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 47e4e06a47d7..33e365cbe15b 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -215,6 +215,14 @@ .Lskip_verw_\@: .endm +#ifdef CONFIG_X86_64 +.macro CLEAR_BRANCH_HISTORY + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP +.endm +#else +#define CLEAR_BRANCH_HISTORY +#endif + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -243,6 +251,10 @@ extern void srso_alias_untrain_ret(void); extern void entry_untrain_ret(void); extern void entry_ibpb(void); +#ifdef CONFIG_X86_64 +extern void clear_bhb_loop(void); +#endif + extern void (*x86_return_thunk)(void); #ifdef CONFIG_RETPOLINE diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 8f0da479c77d..2725a4502321 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -127,6 +127,7 @@ static inline int syscall_get_arch(struct task_struct *task) void do_syscall_64(struct pt_regs *regs, int nr); void do_int80_syscall_32(struct pt_regs *regs); long do_fast_syscall_32(struct pt_regs *regs); +void do_int80_emulation(struct pt_regs *regs); #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 0b2cad66dee1..de4490e331fa 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -242,6 +242,8 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) call vmx_spec_ctrl_restore_host + CLEAR_BRANCH_HISTORY + /* Put return value in AX */ mov %_ASM_BX, %_ASM_AX From 29c50bb6fbe4598d313ddb7ddb183e8b3d7bdf80 Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Wed, 13 Mar 2024 09:47:57 -0700 Subject: [PATCH 133/553] x86/bhi: Define SPEC_CTRL_BHI_DIS_S commit 0f4a837615ff925ba62648d280a861adf1582df7 upstream. Newer processors supports a hardware control BHI_DIS_S to mitigate Branch History Injection (BHI). Setting BHI_DIS_S protects the kernel from userspace BHI attacks without having to manually overwrite the branch history. Define MSR_SPEC_CTRL bit BHI_DIS_S and its enumeration CPUID.BHI_CTRL. Mitigation is enabled later. Signed-off-by: Daniel Sneddon Signed-off-by: Pawan Gupta Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 5 ++++- arch/x86/kernel/cpu/scattered.c | 1 + arch/x86/kvm/reverse_cpuid.h | 3 ++- 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index d1e179653821..20ba328d2527 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -440,6 +440,7 @@ */ #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ #define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ /* * BUG word(s) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 33a19ef23644..9b3d17d90bde 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -55,10 +55,13 @@ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ #define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) +#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */ +#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT) /* A mask for bits which the kernel toggles when controlling mitigations */ #define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \ - | SPEC_CTRL_RRSBA_DIS_S) + | SPEC_CTRL_RRSBA_DIS_S \ + | SPEC_CTRL_BHI_DIS_S) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 94e0a42528dc..28c357cf7c75 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, + { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index d2f6703a2633..e43909d6504a 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -42,7 +42,7 @@ enum kvm_only_cpuid_leafs { #define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1) #define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2) #define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3) -#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) +#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) #define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5) struct cpuid_reg { @@ -106,6 +106,7 @@ static __always_inline u32 __feature_translate(int x86_feature) KVM_X86_TRANSLATE_FEATURE(SGX1); KVM_X86_TRANSLATE_FEATURE(SGX2); KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); + KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); default: return x86_feature; } From 42196bdec0824900b02bc21e02e9bb139197ca14 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 08:57:03 -0700 Subject: [PATCH 134/553] x86/bhi: Enumerate Branch History Injection (BHI) bug commit be482ff9500999f56093738f9219bbabc729d163 upstream. Mitigation for BHI is selected based on the bug enumeration. Add bits needed to enumerate BHI bug. Signed-off-by: Pawan Gupta Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 4 ++++ arch/x86/kernel/cpu/common.c | 24 ++++++++++++++++-------- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 20ba328d2527..0f1e0c73c474 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -489,4 +489,5 @@ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 9b3d17d90bde..681e8401b8a3 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -160,6 +160,10 @@ * are restricted to targets in * kernel. */ +#define ARCH_CAP_BHI_NO BIT(20) /* + * CPU is not affected by Branch + * History Injection. + */ #define ARCH_CAP_PBRSB_NO BIT(24) /* * Not susceptible to Post-Barrier * Return Stack Buffer Predictions. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ca243d7ba0ea..08fe77d2a3f9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1144,6 +1144,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SPECTRE_V2 BIT(8) #define NO_MMIO BIT(9) #define NO_EIBRS_PBRSB BIT(10) +#define NO_BHI BIT(11) #define VULNWL(vendor, family, model, whitelist) \ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) @@ -1206,18 +1207,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), /* Zhaoxin Family 7 */ - VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), - VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), + VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), + VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), {} }; @@ -1454,6 +1455,13 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (vulnerable_to_rfds(ia32_cap)) setup_force_cpu_bug(X86_BUG_RFDS); + /* When virtualized, eIBRS could be hidden, assume vulnerable */ + if (!(ia32_cap & ARCH_CAP_BHI_NO) && + !cpu_matches(cpu_vuln_whitelist, NO_BHI) && + (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || + boot_cpu_has(X86_FEATURE_HYPERVISOR))) + setup_force_cpu_bug(X86_BUG_BHI); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; From bb8384b6dfbc49be230071d1e844a8741982b1ec Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 08:57:05 -0700 Subject: [PATCH 135/553] x86/bhi: Add BHI mitigation knob commit ec9404e40e8f36421a2b66ecb76dc2209fe7f3ef upstream. Branch history clearing software sequences and hardware control BHI_DIS_S were defined to mitigate Branch History Injection (BHI). Add cmdline spectre_bhi={on|off|auto} to control BHI mitigation: auto - Deploy the hardware mitigation BHI_DIS_S, if available. on - Deploy the hardware mitigation BHI_DIS_S, if available, otherwise deploy the software sequence at syscall entry and VMexit. off - Turn off BHI mitigation. The default is auto mode which does not deploy the software sequence mitigation. This is because of the hardening done in the syscall dispatch path, which is the likely target of BHI. Signed-off-by: Pawan Gupta Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 45 ++++++++-- .../admin-guide/kernel-parameters.txt | 11 +++ arch/x86/Kconfig | 25 ++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 90 ++++++++++++++++++- 5 files changed, 165 insertions(+), 7 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 32a8893e5617..62c7902c66fd 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically, the BHB might be shared across privilege levels even in the presence of Enhanced IBRS. -Currently the only known real-world BHB attack vector is via -unprivileged eBPF. Therefore, it's highly recommended to not enable -unprivileged eBPF, especially when eIBRS is used (without retpolines). -For a full mitigation against BHB attacks, it's recommended to use -retpolines (or eIBRS combined with retpolines). +Previously the only known real-world BHB attack vector was via unprivileged +eBPF. Further research has found attacks that don't require unprivileged eBPF. +For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or +use the BHB clearing sequence. Attack scenarios ---------------- @@ -430,6 +429,21 @@ The possible values in this file are: 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB =========================== ======================================================= + - Branch History Injection (BHI) protection status: + +.. list-table:: + + * - BHI: Not affected + - System is not affected + * - BHI: Retpoline + - System is protected by retpoline + * - BHI: BHI_DIS_S + - System is protected by BHI_DIS_S + * - BHI: SW loop + - System is protected by software clearing sequence + * - BHI: Syscall hardening + - Syscalls are hardened against BHI + Full mitigation might require a microcode update from the CPU vendor. When the necessary microcode is not available, the kernel will report vulnerability. @@ -484,7 +498,11 @@ Spectre variant 2 Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at boot, by setting the IBRS bit, and they're automatically protected against - Spectre v2 variant attacks. + some Spectre v2 variant attacks. The BHB can still influence the choice of + indirect branch predictor entry, and although branch predictor entries are + isolated between modes when eIBRS is enabled, the BHB itself is not isolated + between modes. Systems which support BHI_DIS_S will set it to protect against + BHI attacks. On Intel's enhanced IBRS systems, this includes cross-thread branch target injections on SMT systems (STIBP). In other words, Intel eIBRS enables @@ -638,6 +656,21 @@ kernel command line. spectre_v2=off. Spectre variant 1 mitigations cannot be disabled. + spectre_bhi= + + [X86] Control mitigation of Branch History Injection + (BHI) vulnerability. Syscalls are hardened against BHI + regardless of this setting. This setting affects the deployment + of the HW BHI control and the SW BHB clearing sequence. + + on + unconditionally enable. + off + unconditionally disable. + auto + enable if hardware mitigation + control(BHI_DIS_S) is available. + For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt Mitigation selection guide diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 88dffaf8f0a9..13866591af4d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5733,6 +5733,17 @@ sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/admin-guide/laptops/sonypi.rst + spectre_bhi= [X86] Control mitigation of Branch History Injection + (BHI) vulnerability. Syscalls are hardened against BHI + reglardless of this setting. This setting affects the + deployment of the HW BHI control and the SW BHB + clearing sequence. + + on - unconditionally enable. + off - unconditionally disable. + auto - (default) enable only if hardware mitigation + control(BHI_DIS_S) is available. + spectre_v2= [X86] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. The default operation protects the kernel from diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bea53385d31e..35ba10c28ca3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2563,6 +2563,31 @@ config MITIGATION_RFDS stored in floating point, vector and integer registers. See also +choice + prompt "Clear branch history" + depends on CPU_SUP_INTEL + default SPECTRE_BHI_AUTO + help + Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks + where the branch history buffer is poisoned to speculatively steer + indirect branches. + See + +config SPECTRE_BHI_ON + bool "on" + help + Equivalent to setting spectre_bhi=on command line parameter. +config SPECTRE_BHI_OFF + bool "off" + help + Equivalent to setting spectre_bhi=off command line parameter. +config SPECTRE_BHI_AUTO + bool "auto" + help + Equivalent to setting spectre_bhi=auto command line parameter. + +endchoice + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0f1e0c73c474..a00b541644c7 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -441,6 +441,7 @@ #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ #define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ /* * BUG word(s) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4a1b4dee1425..cfd73f5921fe 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1584,6 +1584,74 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ dump_stack(); } +/* + * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by + * branch history in userspace. Not needed if BHI_NO is set. + */ +static bool __init spec_ctrl_bhi_dis(void) +{ + if (!boot_cpu_has(X86_FEATURE_BHI_CTRL)) + return false; + + x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW); + + return true; +} + +enum bhi_mitigations { + BHI_MITIGATION_OFF, + BHI_MITIGATION_ON, + BHI_MITIGATION_AUTO, +}; + +static enum bhi_mitigations bhi_mitigation __ro_after_init = + IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON : + IS_ENABLED(CONFIG_SPECTRE_BHI_OFF) ? BHI_MITIGATION_OFF : + BHI_MITIGATION_AUTO; + +static int __init spectre_bhi_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + bhi_mitigation = BHI_MITIGATION_OFF; + else if (!strcmp(str, "on")) + bhi_mitigation = BHI_MITIGATION_ON; + else if (!strcmp(str, "auto")) + bhi_mitigation = BHI_MITIGATION_AUTO; + else + pr_err("Ignoring unknown spectre_bhi option (%s)", str); + + return 0; +} +early_param("spectre_bhi", spectre_bhi_parse_cmdline); + +static void __init bhi_select_mitigation(void) +{ + if (bhi_mitigation == BHI_MITIGATION_OFF) + return; + + /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && + !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) + return; + + if (spec_ctrl_bhi_dis()) + return; + + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + if (bhi_mitigation == BHI_MITIGATION_AUTO) + return; + + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); + pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -1694,6 +1762,9 @@ static void __init spectre_v2_select_mitigation(void) mode == SPECTRE_V2_RETPOLINE) spec_ctrl_disable_kernel_rrsba(); + if (boot_cpu_has(X86_BUG_BHI)) + bhi_select_mitigation(); + spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); @@ -2712,6 +2783,21 @@ static char *pbrsb_eibrs_state(void) } } +static const char * const spectre_bhi_state(void) +{ + if (!boot_cpu_has_bug(X86_BUG_BHI)) + return "; BHI: Not affected"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) + return "; BHI: BHI_DIS_S"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) + return "; BHI: SW loop"; + else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) + return "; BHI: Retpoline"; + + return "; BHI: Vulnerable (Syscall hardening enabled)"; +} + static ssize_t spectre_v2_show_state(char *buf) { if (spectre_v2_enabled == SPECTRE_V2_LFENCE) @@ -2724,13 +2810,15 @@ static ssize_t spectre_v2_show_state(char *buf) spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); - return sysfs_emit(buf, "%s%s%s%s%s%s%s\n", + return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ibpb_state(), boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "", stibp_state(), boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "", pbrsb_eibrs_state(), + spectre_bhi_state(), + /* this should always be at the end */ spectre_v2_module_string()); } From 43704e993ae54b8caf821501229dd2534ecb0e56 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 11 Mar 2024 08:57:09 -0700 Subject: [PATCH 136/553] x86/bhi: Mitigate KVM by default commit 95a6ccbdc7199a14b71ad8901cb788ba7fb5167b upstream. BHI mitigation mode spectre_bhi=auto does not deploy the software mitigation by default. In a cloud environment, it is a likely scenario where userspace is trusted but the guests are not trusted. Deploying system wide mitigation in such cases is not desirable. Update the auto mode to unconditionally mitigate against malicious guests. Deploy the software sequence at VMexit in auto mode also, when hardware mitigation is not available. Unlike the force =on mode, software sequence is not deployed at syscalls in auto mode. Suggested-by: Alexandre Chartre Signed-off-by: Pawan Gupta Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 7 +++++-- Documentation/admin-guide/kernel-parameters.txt | 5 +++-- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/nospec-branch.h | 5 +++++ arch/x86/kernel/cpu/bugs.c | 9 ++++++++- arch/x86/kvm/vmx/vmenter.S | 2 +- 6 files changed, 23 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 62c7902c66fd..9edb2860a3e1 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -439,10 +439,12 @@ The possible values in this file are: - System is protected by retpoline * - BHI: BHI_DIS_S - System is protected by BHI_DIS_S - * - BHI: SW loop + * - BHI: SW loop; KVM SW loop - System is protected by software clearing sequence * - BHI: Syscall hardening - Syscalls are hardened against BHI + * - BHI: Syscall hardening; KVM: SW loop + - System is protected from userspace attacks by syscall hardening; KVM is protected by software clearing sequence Full mitigation might require a microcode update from the CPU vendor. When the necessary microcode is not available, the kernel will @@ -669,7 +671,8 @@ kernel command line. unconditionally disable. auto enable if hardware mitigation - control(BHI_DIS_S) is available. + control(BHI_DIS_S) is available, otherwise + enable alternate mitigation in KVM. For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 13866591af4d..b2c7b2f012e9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5741,8 +5741,9 @@ on - unconditionally enable. off - unconditionally disable. - auto - (default) enable only if hardware mitigation - control(BHI_DIS_S) is available. + auto - (default) enable hardware mitigation + (BHI_DIS_S) if available, otherwise enable + alternate mitigation in KVM. spectre_v2= [X86] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a00b541644c7..7ded92672414 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -442,6 +442,7 @@ #define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ +#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ /* * BUG word(s) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 33e365cbe15b..1e481d308e18 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -219,8 +219,13 @@ .macro CLEAR_BRANCH_HISTORY ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP .endm + +.macro CLEAR_BRANCH_HISTORY_VMEXIT + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT +.endm #else #define CLEAR_BRANCH_HISTORY +#define CLEAR_BRANCH_HISTORY_VMEXIT #endif #else /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index cfd73f5921fe..96bd3ee83a48 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1645,9 +1645,14 @@ static void __init bhi_select_mitigation(void) if (!IS_ENABLED(CONFIG_X86_64)) return; + /* Mitigate KVM by default */ + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); + pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n"); + if (bhi_mitigation == BHI_MITIGATION_AUTO) return; + /* Mitigate syscalls when the mitigation is forced =on */ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); } @@ -2790,10 +2795,12 @@ static const char * const spectre_bhi_state(void) else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) return "; BHI: BHI_DIS_S"; else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) - return "; BHI: SW loop"; + return "; BHI: SW loop, KVM: SW loop"; else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) return "; BHI: Retpoline"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) + return "; BHI: Syscall hardening, KVM: SW loop"; return "; BHI: Vulnerable (Syscall hardening enabled)"; } diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index de4490e331fa..b4f8937226c2 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -242,7 +242,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) call vmx_spec_ctrl_restore_host - CLEAR_BRANCH_HISTORY + CLEAR_BRANCH_HISTORY_VMEXIT /* Put return value in AX */ mov %_ASM_BX, %_ASM_AX From 3e4283b77107d1105a378859eb196e3ba5661270 Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Wed, 13 Mar 2024 09:49:17 -0700 Subject: [PATCH 137/553] KVM: x86: Add BHI_NO Intel processors that aren't vulnerable to BHI will set commit ed2e8d49b54d677f3123668a21a57822d679651f upstream. MSR_IA32_ARCH_CAPABILITIES[BHI_NO] = 1;. Guests may use this BHI_NO bit to determine if they need to implement BHI mitigations or not. Allow this bit to be passed to the guests. Signed-off-by: Daniel Sneddon Signed-off-by: Pawan Gupta Signed-off-by: Daniel Sneddon Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Reviewed-by: Josh Poimboeuf Signed-off-by: Daniel Sneddon Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0e6e63a8f094..f724765032bc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1614,7 +1614,7 @@ static unsigned int num_msr_based_features; ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ - ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR) + ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO) static u64 kvm_get_arch_capabilities(void) { From e21838dfd0844b093a92d4cdd4db836b473c912d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 9 Apr 2024 19:32:41 +0200 Subject: [PATCH 138/553] x86: set SPECTRE_BHI_ON as default commit 2bb69f5fc72183e1c62547d900f560d0e9334925 upstream. Part of a merge commit from Linus that adjusted the default setting of SPECTRE_BHI_ON. Cc: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 35ba10c28ca3..ba815ac474a1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2566,7 +2566,7 @@ config MITIGATION_RFDS choice prompt "Clear branch history" depends on CPU_SUP_INTEL - default SPECTRE_BHI_AUTO + default SPECTRE_BHI_ON help Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks where the branch history buffer is poisoned to speculatively steer From bf1e3b1cb1e002ed1590c91f1a24433b59322368 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 10 Apr 2024 16:28:36 +0200 Subject: [PATCH 139/553] Linux 6.1.85 Link: https://lore.kernel.org/r/20240408125256.218368873@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Kelsey Steele Tested-by: Salvatore Bonaccorso Tested-by: Ron Economos Tested-by: Jon Hunter Tested-by: Pavel Machek (CIP) Tested-by: Conor Dooley Tested-by: Mark Brown Tested-by: Sven Joachim Tested-by: Shuah Khan Link: https://lore.kernel.org/r/20240409172805.638917723@linuxfoundation.org Tested-by: kernelci.org bot Link: https://lore.kernel.org/r/20240409173524.517362803@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0e33150db2bf..5dff9ff99998 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 84 +SUBLEVEL = 85 EXTRAVERSION = NAME = Curry Ramen From e6768c6737f4c02cba193a3339f0cc2907f0b86a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 12 Apr 2024 06:11:25 +1000 Subject: [PATCH 140/553] amdkfd: use calloc instead of kzalloc to avoid integer overflow commit 3b0daecfeac0103aba8b293df07a0cbaf8b43f29 upstream. This uses calloc instead of doing the multiplication which might overflow. Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index e191d38f3da6..3f403afd6de8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -765,8 +765,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, * nodes, but not more than args->num_of_nodes as that is * the amount of memory allocated by user */ - pa = kzalloc((sizeof(struct kfd_process_device_apertures) * - args->num_of_nodes), GFP_KERNEL); + pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures), + GFP_KERNEL); if (!pa) return -ENOMEM; From 7dd4831c3ecb5e8f7f26fe69edd56ed791e06243 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Sun, 17 Dec 2023 13:29:03 +0200 Subject: [PATCH 141/553] wifi: ath9k: fix LNA selection in ath_ant_try_scan() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d6b27eb997ef9a2aa51633b3111bc4a04748e6d3 ] In 'ath_ant_try_scan()', (most likely) the 2nd LNA's signal strength should be used in comparison against RSSI when selecting first LNA as the main one. Compile tested only. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Dmitry Antipov Acked-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://msgid.link/20231211172502.25202-1-dmantipov@yandex.ru Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/antenna.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/antenna.c b/drivers/net/wireless/ath/ath9k/antenna.c index 988222cea9df..acc84e6711b0 100644 --- a/drivers/net/wireless/ath/ath9k/antenna.c +++ b/drivers/net/wireless/ath/ath9k/antenna.c @@ -643,7 +643,7 @@ static void ath_ant_try_scan(struct ath_ant_comb *antcomb, conf->main_lna_conf = ATH_ANT_DIV_COMB_LNA1; conf->alt_lna_conf = ATH_ANT_DIV_COMB_LNA1_PLUS_LNA2; } else if (antcomb->rssi_sub > - antcomb->rssi_lna1) { + antcomb->rssi_lna2) { /* set to A-B */ conf->main_lna_conf = ATH_ANT_DIV_COMB_LNA1; conf->alt_lna_conf = ATH_ANT_DIV_COMB_LNA1_MINUS_LNA2; From d6b0472d5fbda3b1b28d325a21aee1f435063270 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 25 Jan 2024 20:10:48 -0800 Subject: [PATCH 142/553] bnx2x: Fix firmware version string character counts [ Upstream commit 5642c82b9463c3263c086efb002516244bd4c668 ] A potential string truncation was reported in bnx2x_fill_fw_str(), when a long bp->fw_ver and a long phy_fw_ver might coexist, but seems unlikely with real-world hardware. Use scnprintf() to indicate the intent that truncations are tolerated. While reading this code, I found a collection of various buffer size counting issues. None looked like they might lead to a buffer overflow with current code (the small buffers are 20 bytes and might only ever consume 10 bytes twice with a trailing %NUL). However, early truncation (due to a %NUL in the middle of the string) might be happening under likely rare conditions. Regardless fix the formatters and related functions: - Switch from a separate strscpy() to just adding an additional "%s" to the format string that immediately follows it in bnx2x_fill_fw_str(). - Use sizeof() universally instead of using unbound defines. - Fix bnx2x_7101_format_ver() and bnx2x_null_format_ver() to report the number of characters written, not including the trailing %NUL (as already done with the other firmware formatting functions). - Require space for at least 1 byte in bnx2x_get_ext_phy_fw_version() for the trailing %NUL. - Correct the needed buffer size in bnx2x_3_seq_format_ver(). Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401260858.jZN6vD1k-lkp@intel.com/ Cc: Ariel Elior Cc: Sudarsana Kalluru Cc: Manish Chopra Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20240126041044.work.220-kees@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 9 +++++---- .../net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 14 +++++++------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 4950fde82d17..b04c5b51eb59 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -147,10 +147,11 @@ void bnx2x_fill_fw_str(struct bnx2x *bp, char *buf, size_t buf_len) phy_fw_ver[0] = '\0'; bnx2x_get_ext_phy_fw_version(&bp->link_params, - phy_fw_ver, PHY_FW_VER_LEN); - strscpy(buf, bp->fw_ver, buf_len); - snprintf(buf + strlen(bp->fw_ver), 32 - strlen(bp->fw_ver), - "bc %d.%d.%d%s%s", + phy_fw_ver, sizeof(phy_fw_ver)); + /* This may become truncated. */ + scnprintf(buf, buf_len, + "%sbc %d.%d.%d%s%s", + bp->fw_ver, (bp->common.bc_ver & 0xff0000) >> 16, (bp->common.bc_ver & 0xff00) >> 8, (bp->common.bc_ver & 0xff), diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index bda3ccc28eca..f920976c36f0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -1132,7 +1132,7 @@ static void bnx2x_get_drvinfo(struct net_device *dev, } memset(version, 0, sizeof(version)); - bnx2x_fill_fw_str(bp, version, ETHTOOL_FWVERS_LEN); + bnx2x_fill_fw_str(bp, version, sizeof(version)); strlcat(info->fw_version, version, sizeof(info->fw_version)); strscpy(info->bus_info, pci_name(bp->pdev), sizeof(info->bus_info)); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index 02808513ffe4..ea310057fe3a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -6163,8 +6163,8 @@ static void bnx2x_link_int_ack(struct link_params *params, static int bnx2x_null_format_ver(u32 spirom_ver, u8 *str, u16 *len) { - str[0] = '\0'; - (*len)--; + if (*len) + str[0] = '\0'; return 0; } @@ -6173,7 +6173,7 @@ static int bnx2x_format_ver(u32 num, u8 *str, u16 *len) u16 ret; if (*len < 10) { - /* Need more than 10chars for this format */ + /* Need more than 10 chars for this format */ bnx2x_null_format_ver(num, str, len); return -EINVAL; } @@ -6188,8 +6188,8 @@ static int bnx2x_3_seq_format_ver(u32 num, u8 *str, u16 *len) { u16 ret; - if (*len < 10) { - /* Need more than 10chars for this format */ + if (*len < 9) { + /* Need more than 9 chars for this format */ bnx2x_null_format_ver(num, str, len); return -EINVAL; } @@ -6208,7 +6208,7 @@ int bnx2x_get_ext_phy_fw_version(struct link_params *params, u8 *version, int status = 0; u8 *ver_p = version; u16 remain_len = len; - if (version == NULL || params == NULL) + if (version == NULL || params == NULL || len == 0) return -EINVAL; bp = params->bp; @@ -11546,7 +11546,7 @@ static int bnx2x_7101_format_ver(u32 spirom_ver, u8 *str, u16 *len) str[2] = (spirom_ver & 0xFF0000) >> 16; str[3] = (spirom_ver & 0xFF000000) >> 24; str[4] = '\0'; - *len -= 5; + *len -= 4; return 0; } From fd5fdacae93a00a0c80c9a190ee623e26cd8a0a3 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Sun, 21 Jan 2024 15:18:26 +0800 Subject: [PATCH 143/553] wifi: rtw89: pci: enlarge RX DMA buffer to consider size of RX descriptor [ Upstream commit c108b4a50dd7650941d4f4ec5c161655a73711db ] Hardware puts RX descriptor and packet in RX DMA buffer, so it could be over one buffer size if packet size is 11454, and then it will be split into two segments. WiFi 7 chips use larger size of RX descriptor, so enlarge DMA buffer size according to RX descriptor to have better performance and simple flow. Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://msgid.link/20240121071826.10159-5-pkshih@realtek.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw89/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/pci.h b/drivers/net/wireless/realtek/rtw89/pci.h index 179740607778..d982c5dc0889 100644 --- a/drivers/net/wireless/realtek/rtw89/pci.h +++ b/drivers/net/wireless/realtek/rtw89/pci.h @@ -546,7 +546,7 @@ #define RTW89_PCI_TXWD_NUM_MAX 512 #define RTW89_PCI_TXWD_PAGE_SIZE 128 #define RTW89_PCI_ADDRINFO_MAX 4 -#define RTW89_PCI_RX_BUF_SIZE 11460 +#define RTW89_PCI_RX_BUF_SIZE (11454 + 40) /* +40 for rtw89_rxdesc_long_v2 */ #define RTW89_PCI_POLL_BDRAM_RST_CNT 100 #define RTW89_PCI_MULTITAG 8 From feacd430b42bbfa9ab3ed9e4f38b86c43e348c75 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 5 Jan 2024 08:40:00 -0800 Subject: [PATCH 144/553] VMCI: Fix memcpy() run-time warning in dg_dispatch_as_host() [ Upstream commit 19b070fefd0d024af3daa7329cbc0d00de5302ec ] Syzkaller hit 'WARNING in dg_dispatch_as_host' bug. memcpy: detected field-spanning write (size 56) of single field "&dg_info->msg" at drivers/misc/vmw_vmci/vmci_datagram.c:237 (size 24) WARNING: CPU: 0 PID: 1555 at drivers/misc/vmw_vmci/vmci_datagram.c:237 dg_dispatch_as_host+0x88e/0xa60 drivers/misc/vmw_vmci/vmci_datagram.c:237 Some code commentry, based on my understanding: 544 #define VMCI_DG_SIZE(_dg) (VMCI_DG_HEADERSIZE + (size_t)(_dg)->payload_size) /// This is 24 + payload_size memcpy(&dg_info->msg, dg, dg_size); Destination = dg_info->msg ---> this is a 24 byte structure(struct vmci_datagram) Source = dg --> this is a 24 byte structure (struct vmci_datagram) Size = dg_size = 24 + payload_size {payload_size = 56-24 =32} -- Syzkaller managed to set payload_size to 32. 35 struct delayed_datagram_info { 36 struct datagram_entry *entry; 37 struct work_struct work; 38 bool in_dg_host_queue; 39 /* msg and msg_payload must be together. */ 40 struct vmci_datagram msg; 41 u8 msg_payload[]; 42 }; So those extra bytes of payload are copied into msg_payload[], a run time warning is seen while fuzzing with Syzkaller. One possible way to fix the warning is to split the memcpy() into two parts -- one -- direct assignment of msg and second taking care of payload. Gustavo quoted: "Under FORTIFY_SOURCE we should not copy data across multiple members in a structure." Reported-by: syzkaller Suggested-by: Vegard Nossum Suggested-by: Gustavo A. R. Silva Signed-off-by: Harshit Mogalapalli Reviewed-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20240105164001.2129796-2-harshit.m.mogalapalli@oracle.com Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- drivers/misc/vmw_vmci/vmci_datagram.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/vmw_vmci/vmci_datagram.c b/drivers/misc/vmw_vmci/vmci_datagram.c index f50d22882476..d1d8224c8800 100644 --- a/drivers/misc/vmw_vmci/vmci_datagram.c +++ b/drivers/misc/vmw_vmci/vmci_datagram.c @@ -234,7 +234,8 @@ static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg) dg_info->in_dg_host_queue = true; dg_info->entry = dst_entry; - memcpy(&dg_info->msg, dg, dg_size); + dg_info->msg = *dg; + memcpy(&dg_info->msg_payload, dg + 1, dg->payload_size); INIT_WORK(&dg_info->work, dg_delayed_dispatch); schedule_work(&dg_info->work); From 8d80e092027a82dd58a30fb46a98c075135ba1fd Mon Sep 17 00:00:00 2001 From: Mukesh Sisodiya Date: Mon, 29 Jan 2024 21:22:00 +0200 Subject: [PATCH 145/553] wifi: iwlwifi: pcie: Add the PCI device id for new hardware [ Upstream commit 6770eee75148ba10c0c051885379714773e00b48 ] Add the support for a new PCI device id. Signed-off-by: Mukesh Sisodiya Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240129211905.fde32107e0a3.I597cff4f340e4bed12b7568a0ad504bd4b2c1cf8@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 4d4db5f6836b..7f30e6add993 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -505,6 +505,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { /* Bz devices */ {IWL_PCI_DEVICE(0x2727, PCI_ANY_ID, iwl_bz_trans_cfg)}, + {IWL_PCI_DEVICE(0x272D, PCI_ANY_ID, iwl_bz_trans_cfg)}, {IWL_PCI_DEVICE(0x272b, PCI_ANY_ID, iwl_bz_trans_cfg)}, {IWL_PCI_DEVICE(0xA840, PCI_ANY_ID, iwl_bz_trans_cfg)}, {IWL_PCI_DEVICE(0x7740, PCI_ANY_ID, iwl_bz_trans_cfg)}, From 94b016b28bdd417cf19f57da818bed28e7f3aaf1 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 7 Feb 2024 14:47:02 +0106 Subject: [PATCH 146/553] panic: Flush kernel log buffer at the end [ Upstream commit d988d9a9b9d180bfd5c1d353b3b176cb90d6861b ] If the kernel crashes in a context where printk() calls always defer printing (such as in NMI or inside a printk_safe section) then the final panic messages will be deferred to irq_work. But if irq_work is not available, the messages will not get printed unless explicitly flushed. The result is that the final "end Kernel panic" banner does not get printed. Add one final flush after the last printk() call to make sure the final panic messages make it out as well. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240207134103.1357162-14-john.ogness@linutronix.de Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin --- kernel/panic.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/panic.c b/kernel/panic.c index 63e94f3bd8dc..e6c2bf04a32c 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -441,6 +441,14 @@ void panic(const char *fmt, ...) /* Do not scroll important messages printed above */ suppress_printk = 1; + + /* + * The final messages may not have been printed if in a context that + * defers printing (such as NMI) and irq_work is not available. + * Explicitly flush the kernel log buffer one last time. + */ + console_flush_on_panic(CONSOLE_FLUSH_PENDING); + local_irq_enable(); for (i = 0; ; i += PANIC_TIMER_STEP) { touch_softlockup_watchdog(); From b510fbe3a7bf26a6b8fcaa23d0275bb383f95dfd Mon Sep 17 00:00:00 2001 From: C Cheng Date: Tue, 19 Dec 2023 11:14:42 +0800 Subject: [PATCH 147/553] cpuidle: Avoid potential overflow in integer multiplication [ Upstream commit 88390dd788db485912ee7f9a8d3d56fc5265d52f ] In detail: In C language, when you perform a multiplication operation, if both operands are of int type, the multiplication operation is performed on the int type, and then the result is converted to the target type. This means that if the product of int type multiplication exceeds the range that int type can represent, an overflow will occur even if you store the result in a variable of int64_t type. For a multiplication of two int values, it is better to use mul_u32_u32() rather than s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC to avoid potential overflow happenning. Signed-off-by: C Cheng Signed-off-by: Bo Ye Reviewed-by: AngeloGioacchino Del Regno [ rjw: New subject ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpuidle/driver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index f70aa17e2a8e..c594e28adddf 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "cpuidle.h" @@ -185,7 +186,7 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv) s->target_residency_ns = 0; if (s->exit_latency > 0) - s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC; + s->exit_latency_ns = mul_u32_u32(s->exit_latency, NSEC_PER_USEC); else if (s->exit_latency_ns < 0) s->exit_latency_ns = 0; } From a2920489347653d586d8710ed1d315ed5c2bef75 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Wed, 31 Jan 2024 22:17:08 +0100 Subject: [PATCH 148/553] arm64: dts: rockchip: fix rk3328 hdmi ports node [ Upstream commit 1d00ba4700d1e0f88ae70d028d2e17e39078fa1c ] Fix rk3328 hdmi ports node so that it matches the rockchip,dw-hdmi.yaml binding. Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/e5dea3b7-bf84-4474-9530-cc2da3c41104@gmail.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 905a50aa5dc3..d42846efff2f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -741,11 +741,20 @@ status = "disabled"; ports { - hdmi_in: port { + #address-cells = <1>; + #size-cells = <0>; + + hdmi_in: port@0 { + reg = <0>; + hdmi_in_vop: endpoint { remote-endpoint = <&vop_out_hdmi>; }; }; + + hdmi_out: port@1 { + reg = <1>; + }; }; }; From 6925d11fbda285c1b9613c43b30c3614a5552fc0 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Wed, 31 Jan 2024 22:17:31 +0100 Subject: [PATCH 149/553] arm64: dts: rockchip: fix rk3399 hdmi ports node [ Upstream commit f051b6ace7ffcc48d6d1017191f167c0a85799f6 ] Fix rk3399 hdmi ports node so that it matches the rockchip,dw-hdmi.yaml binding. Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/a6ab6f75-3b80-40b1-bd30-3113e14becdd@gmail.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index a7e6eccb14cc..8363cc13ec51 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1906,6 +1906,7 @@ hdmi: hdmi@ff940000 { compatible = "rockchip,rk3399-dw-hdmi"; reg = <0x0 0xff940000 0x0 0x20000>; + reg-io-width = <4>; interrupts = ; clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_SFR>, @@ -1914,13 +1915,16 @@ <&cru PLL_VPLL>; clock-names = "iahb", "isfr", "cec", "grf", "ref"; power-domains = <&power RK3399_PD_HDCP>; - reg-io-width = <4>; rockchip,grf = <&grf>; #sound-dai-cells = <0>; status = "disabled"; ports { - hdmi_in: port { + #address-cells = <1>; + #size-cells = <0>; + + hdmi_in: port@0 { + reg = <0>; #address-cells = <1>; #size-cells = <0>; @@ -1933,6 +1937,10 @@ remote-endpoint = <&vopl_out_hdmi>; }; }; + + hdmi_out: port@1 { + reg = <1>; + }; }; }; From b8ca15861c88cbc85e2fbc418dc829803b76a859 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Wed, 14 Feb 2024 09:59:01 -0800 Subject: [PATCH 150/553] ionic: set adminq irq affinity [ Upstream commit c699f35d658f3c21b69ed24e64b2ea26381e941d ] We claim to have the AdminQ on our irq0 and thus cpu id 0, but we need to be sure we set the affinity hint to try to keep it there. Signed-off-by: Shannon Nelson Reviewed-by: Brett Creeley Reviewed-by: Jacob Keller Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index fcc3faecb060..d33cf8ee7c33 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -3216,9 +3216,12 @@ static int ionic_lif_adminq_init(struct ionic_lif *lif) napi_enable(&qcq->napi); - if (qcq->flags & IONIC_QCQ_F_INTR) + if (qcq->flags & IONIC_QCQ_F_INTR) { + irq_set_affinity_hint(qcq->intr.vector, + &qcq->intr.affinity_mask); ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, IONIC_INTR_MASK_CLEAR); + } qcq->flags |= IONIC_QCQ_F_INITED; From 8af60bb2b215f478b886f1d6d302fefa7f0b917d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 16 Feb 2024 12:36:57 +0100 Subject: [PATCH 151/553] net: skbuff: add overflow debug check to pull/push helpers [ Upstream commit 219eee9c0d16f1b754a8b85275854ab17df0850a ] syzbot managed to trigger following splat: BUG: KASAN: use-after-free in __skb_flow_dissect+0x4a3b/0x5e50 Read of size 1 at addr ffff888208a4000e by task a.out/2313 [..] __skb_flow_dissect+0x4a3b/0x5e50 __skb_get_hash+0xb4/0x400 ip_tunnel_xmit+0x77e/0x26f0 ipip_tunnel_xmit+0x298/0x410 .. Analysis shows that the skb has a valid ->head, but bogus ->data pointer. skb->data gets its bogus value via the neigh layer, which does: 1556 __skb_pull(skb, skb_network_offset(skb)); ... and the skb was already dodgy at this point: skb_network_offset(skb) returns a negative value due to an earlier overflow of skb->network_header (u16). __skb_pull thus "adjusts" skb->data by a huge offset, pointing outside skb->head area. Allow debug builds to splat when we try to pull/push more than INT_MAX bytes. After this, the syzkaller reproducer yields a more precise splat before the flow dissector attempts to read off skb->data memory: WARNING: CPU: 5 PID: 2313 at include/linux/skbuff.h:2653 neigh_connected_output+0x28e/0x400 ip_finish_output2+0xb25/0xed0 iptunnel_xmit+0x4ff/0x870 ipgre_xmit+0x78e/0xbb0 Signed-off-by: Florian Westphal Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240216113700.23013-1-fw@strlen.de Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c4a8520dc748..1326a935b6fa 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2603,6 +2603,8 @@ static inline void skb_put_u8(struct sk_buff *skb, u8 val) void *skb_push(struct sk_buff *skb, unsigned int len); static inline void *__skb_push(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb->data -= len; skb->len += len; return skb->data; @@ -2611,6 +2613,8 @@ static inline void *__skb_push(struct sk_buff *skb, unsigned int len) void *skb_pull(struct sk_buff *skb, unsigned int len); static inline void *__skb_pull(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb->len -= len; if (unlikely(skb->len < skb->data_len)) { #if defined(CONFIG_DEBUG_NET) @@ -2634,6 +2638,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta); static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + if (likely(len <= skb_headlen(skb))) return true; if (unlikely(len > skb->len)) From 161d6b803778c5a825d50bed14cfafe9b29e52ad Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 25 Dec 2023 20:03:56 +0100 Subject: [PATCH 152/553] firmware: tegra: bpmp: Return directly after a failed kzalloc() in get_filename() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1315848f1f8a0100cb6f8a7187bc320c5d98947f ] The kfree() function was called in one case by the get_filename() function during error handling even if the passed variable contained a null pointer. This issue was detected by using the Coccinelle software. Thus return directly after a call of the function “kzalloc” failed at the beginning. Signed-off-by: Markus Elfring Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/firmware/tegra/bpmp-debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/tegra/bpmp-debugfs.c b/drivers/firmware/tegra/bpmp-debugfs.c index 9d3874cdaaee..34e4152477f3 100644 --- a/drivers/firmware/tegra/bpmp-debugfs.c +++ b/drivers/firmware/tegra/bpmp-debugfs.c @@ -81,7 +81,7 @@ static const char *get_filename(struct tegra_bpmp *bpmp, root_path_buf = kzalloc(root_path_buf_len, GFP_KERNEL); if (!root_path_buf) - goto out; + return NULL; root_path = dentry_path(bpmp->debugfs_mirror, root_path_buf, root_path_buf_len); From 8ead0a04a7f3dc1b475759599b31f450fa64ad3d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 16 Feb 2024 22:36:49 +0100 Subject: [PATCH 153/553] wifi: brcmfmac: Add DMI nvram filename quirk for ACEPC W5 Pro [ Upstream commit 32167707aa5e7ae4b160c18be79d85a7b4fdfcfb ] The ACEPC W5 Pro HDMI stick contains quite generic names in the sys_vendor and product_name DMI strings, without this patch brcmfmac will try to load: "brcmfmac43455-sdio.$(DEFAULT_STRING)-$(DEFAULT_STRING).txt" as nvram file which is both too generic and messy with the $ symbols in the name. The ACEPC W5 Pro uses the same Ampak AP6255 module as the ACEPC T8 and the nvram for the T8 is already in linux-firmware, so point the new DMI nvram filename quirk to the T8 nvram file. Signed-off-by: Hans de Goede Acked-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://msgid.link/20240216213649.251718-1-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c index 86ff174936a9..c3a602197662 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c @@ -82,6 +82,15 @@ static const struct dmi_system_id dmi_platform_data[] = { }, .driver_data = (void *)&acepc_t8_data, }, + { + /* ACEPC W5 Pro Cherry Trail Z8350 HDMI stick, same wifi as the T8 */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "T3 MRD"), + DMI_MATCH(DMI_CHASSIS_TYPE, "3"), + DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), + }, + .driver_data = (void *)&acepc_t8_data, + }, { /* Chuwi Hi8 Pro with D2D3_Hi8Pro.233 BIOS */ .matches = { From 635594cca59f9d7a8e96187600c34facb8bc0682 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Thu, 18 Jan 2024 18:02:06 +0800 Subject: [PATCH 154/553] pstore/zone: Add a null pointer check to the psz_kmsg_read [ Upstream commit 98bc7e26e14fbb26a6abf97603d59532475e97f8 ] kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Signed-off-by: Kunwu Chan Link: https://lore.kernel.org/r/20240118100206.213928-1-chentao@kylinos.cn Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- fs/pstore/zone.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index 2770746bb7aa..abca117725c8 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -973,6 +973,8 @@ static ssize_t psz_kmsg_read(struct pstore_zone *zone, char *buf = kasprintf(GFP_KERNEL, "%s: Total %d times\n", kmsg_dump_reason_str(record->reason), record->count); + if (!buf) + return -ENOMEM; hlen = strlen(buf); record->buf = krealloc(buf, hlen + size, GFP_KERNEL); if (!record->buf) { From 51a9b20a047dd9d8f5dda99d55d5c3e15f92e2a4 Mon Sep 17 00:00:00 2001 From: Samasth Norway Ananda Date: Tue, 13 Feb 2024 16:19:56 -0800 Subject: [PATCH 155/553] tools/power x86_energy_perf_policy: Fix file leak in get_pkg_num() [ Upstream commit f85450f134f0b4ca7e042dc3dc89155656a2299d ] In function get_pkg_num() if fopen_or_die() succeeds it returns a file pointer to be used. But fclose() is never called before returning from the function. Signed-off-by: Samasth Norway Ananda Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 5fd9e594079c..ebda9c366b2b 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -1241,6 +1241,7 @@ unsigned int get_pkg_num(int cpu) retval = fscanf(fp, "%d\n", &pkg); if (retval != 1) errx(1, "%s: failed to parse", pathname); + fclose(fp); return pkg; } From a3f6045ce3c947093305c91889415cdca412c33c Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Thu, 22 Feb 2024 20:58:22 +0300 Subject: [PATCH 156/553] net: pcs: xpcs: Return EINVAL in the internal methods [ Upstream commit f5151005d379d9ce42e327fd3b2d2aaef61cda81 ] In particular the xpcs_soft_reset() and xpcs_do_config() functions currently return -1 if invalid auto-negotiation mode is specified. That value might be then passed to the generic kernel subsystems which require a standard kernel errno value. Even though the erroneous conditions are very specific (memory corruption or buggy driver implementation) using a hard-coded -1 literal doesn't seem correct anyway especially when it comes to passing it higher to the network subsystem or printing to the system log. Convert the hard-coded error values to -EINVAL then. Signed-off-by: Serge Semin Tested-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/pcs/pcs-xpcs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 3f882bce37f4..d126273daab4 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -262,7 +262,7 @@ static int xpcs_soft_reset(struct dw_xpcs *xpcs, dev = MDIO_MMD_VEND2; break; default: - return -1; + return -EINVAL; } ret = xpcs_write(xpcs, dev, MDIO_CTRL1, MDIO_CTRL1_RESET); @@ -904,7 +904,7 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, return ret; break; default: - return -1; + return -EINVAL; } if (compat->pma_config) { From 4e0cfb25d49da2e6261ad582f58ffa5b5dd8c8e9 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Wed, 21 Feb 2024 16:17:21 -0800 Subject: [PATCH 157/553] dma-direct: Leak pages on dma_set_decrypted() failure [ Upstream commit b9fa16949d18e06bdf728a560f5c8af56d2bdcaf ] On TDX it is possible for the untrusted host to cause set_memory_encrypted() or set_memory_decrypted() to fail such that an error is returned and the resulting memory is shared. Callers need to take care to handle these errors to avoid returning decrypted (shared) memory to the page allocator, which could lead to functional or security issues. DMA could free decrypted/shared pages if dma_set_decrypted() fails. This should be a rare case. Just leak the pages in this case instead of freeing them. Signed-off-by: Rick Edgecombe Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- kernel/dma/direct.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 63859a101ed8..d4215739efc7 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -296,7 +296,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, } else { ret = page_address(page); if (dma_set_decrypted(dev, ret, size)) - goto out_free_pages; + goto out_leak_pages; } memset(ret, 0, size); @@ -317,6 +317,8 @@ out_encrypt_pages: out_free_pages: __dma_direct_free_pages(dev, page, size); return NULL; +out_leak_pages: + return NULL; } void dma_direct_free(struct device *dev, size_t size, @@ -379,12 +381,11 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, ret = page_address(page); if (dma_set_decrypted(dev, ret, size)) - goto out_free_pages; + goto out_leak_pages; memset(ret, 0, size); *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return page; -out_free_pages: - __dma_direct_free_pages(dev, page, size); +out_leak_pages: return NULL; } From 6597a6687af54e2cb58371cf8f6ee4dd85c537de Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Fri, 23 Feb 2024 13:31:11 +0800 Subject: [PATCH 158/553] wifi: ath11k: decrease MHI channel buffer length to 8KB [ Upstream commit 1cca1bddf9ef080503c15378cecf4877f7510015 ] Currently buf_len field of ath11k_mhi_config_qca6390 is assigned with 0, making MHI use a default size, 64KB, to allocate channel buffers. This is likely to fail in some scenarios where system memory is highly fragmented and memory compaction or reclaim is not allowed. There is a fail report which is caused by it: kworker/u32:45: page allocation failure: order:4, mode:0x40c00(GFP_NOIO|__GFP_COMP), nodemask=(null),cpuset=/,mems_allowed=0 CPU: 0 PID: 19318 Comm: kworker/u32:45 Not tainted 6.8.0-rc3-1.gae4495f-default #1 openSUSE Tumbleweed (unreleased) 493b6d5b382c603654d7a81fc3c144d59a1dfceb Workqueue: events_unbound async_run_entry_fn Call Trace: dump_stack_lvl+0x47/0x60 warn_alloc+0x13a/0x1b0 ? srso_alias_return_thunk+0x5/0xfbef5 ? __alloc_pages_direct_compact+0xab/0x210 __alloc_pages_slowpath.constprop.0+0xd3e/0xda0 __alloc_pages+0x32d/0x350 ? mhi_prepare_channel+0x127/0x2d0 [mhi 40df44e07c05479f7a6e7b90fba9f0e0031a7814] __kmalloc_large_node+0x72/0x110 __kmalloc+0x37c/0x480 ? mhi_map_single_no_bb+0x77/0xf0 [mhi 40df44e07c05479f7a6e7b90fba9f0e0031a7814] ? mhi_prepare_channel+0x127/0x2d0 [mhi 40df44e07c05479f7a6e7b90fba9f0e0031a7814] mhi_prepare_channel+0x127/0x2d0 [mhi 40df44e07c05479f7a6e7b90fba9f0e0031a7814] __mhi_prepare_for_transfer+0x44/0x80 [mhi 40df44e07c05479f7a6e7b90fba9f0e0031a7814] ? __pfx_____mhi_prepare_for_transfer+0x10/0x10 [mhi 40df44e07c05479f7a6e7b90fba9f0e0031a7814] device_for_each_child+0x5c/0xa0 ? __pfx_pci_pm_resume+0x10/0x10 ath11k_core_resume+0x65/0x100 [ath11k a5094e22d7223135c40d93c8f5321cf09fd85e4e] ? srso_alias_return_thunk+0x5/0xfbef5 ath11k_pci_pm_resume+0x32/0x60 [ath11k_pci 830b7bfc3ea80ebef32e563cafe2cb55e9cc73ec] ? srso_alias_return_thunk+0x5/0xfbef5 dpm_run_callback+0x8c/0x1e0 device_resume+0x104/0x340 ? __pfx_dpm_watchdog_handler+0x10/0x10 async_resume+0x1d/0x30 async_run_entry_fn+0x32/0x120 process_one_work+0x168/0x330 worker_thread+0x2f5/0x410 ? __pfx_worker_thread+0x10/0x10 kthread+0xe8/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Actually those buffers are used only by QMI target -> host communication. And for WCN6855 and QCA6390, the largest packet size for that is less than 6KB. So change buf_len field to 8KB, which results in order 1 allocation if page size is 4KB. In this way, we can at least save some memory, and as well as decrease the possibility of allocation failure in those scenarios. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.30 Reported-by: Vlastimil Babka Closes: https://lore.kernel.org/ath11k/96481a45-3547-4d23-ad34-3a8f1d90c1cd@suse.cz/ Signed-off-by: Baochen Qiang Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://msgid.link/20240223053111.29170-1-quic_bqiang@quicinc.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath11k/mhi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c index a62ee05c5409..4bea36cc7108 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.c +++ b/drivers/net/wireless/ath/ath11k/mhi.c @@ -105,7 +105,7 @@ static struct mhi_controller_config ath11k_mhi_config_qca6390 = { .max_channels = 128, .timeout_ms = 2000, .use_bounce_buf = false, - .buf_len = 0, + .buf_len = 8192, .num_channels = ARRAY_SIZE(ath11k_mhi_channels_qca6390), .ch_cfg = ath11k_mhi_channels_qca6390, .num_events = ARRAY_SIZE(ath11k_mhi_events_qca6390), From 50bd749c60ceb5180f5bf9e37782b5afa04169b6 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 29 Feb 2024 13:42:07 +0530 Subject: [PATCH 159/553] cpufreq: Don't unregister cpufreq cooling on CPU hotplug [ Upstream commit c4d61a529db788d2e52654f5b02c8d1de4952c5b ] Offlining a CPU and bringing it back online is a common operation and it happens frequently during system suspend/resume, where the non-boot CPUs are hotplugged out during suspend and brought back at resume. The cpufreq core already tries to make this path as fast as possible as the changes are only temporary in nature and full cleanup of resources isn't required in this case. For example the drivers can implement online()/offline() callbacks to avoid a lot of tear down of resources. On similar lines, there is no need to unregister the cpufreq cooling device during suspend / resume, but only while the policy is getting removed. Moreover, unregistering the cpufreq cooling device is resulting in an unwanted outcome, where the system suspend is eventually aborted in the process. Currently, during system suspend the cpufreq core unregisters the cooling device, which in turn removes a kobject using device_del() and that generates a notification to the userspace via uevent broadcast. This causes system suspend to abort in some setups. This was also earlier reported (indirectly) by Roman [1]. Maybe there is another way around to fixing that problem properly, but this change makes sense anyways. Move the registering and unregistering of the cooling device to policy creation and removal times onlyy. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218521 Reported-by: Manaf Meethalavalappu Pallikunhi Reported-by: Roman Stratiienko Link: https://patchwork.kernel.org/project/linux-pm/patch/20220710164026.541466-1-r.stratiienko@gmail.com/ [1] Tested-by: Manaf Meethalavalappu Pallikunhi Signed-off-by: Viresh Kumar Reviewed-by: Dhruva Gole Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpufreq/cpufreq.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c8912756fc06..91efa23e0e8f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1525,7 +1525,8 @@ static int cpufreq_online(unsigned int cpu) if (cpufreq_driver->ready) cpufreq_driver->ready(policy); - if (cpufreq_thermal_control_enabled(cpufreq_driver)) + /* Register cpufreq cooling only for a new policy */ + if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver)) policy->cdev = of_cpufreq_cooling_register(policy); pr_debug("initialization complete\n"); @@ -1609,11 +1610,6 @@ static void __cpufreq_offline(unsigned int cpu, struct cpufreq_policy *policy) else policy->last_policy = policy->policy; - if (cpufreq_thermal_control_enabled(cpufreq_driver)) { - cpufreq_cooling_unregister(policy->cdev); - policy->cdev = NULL; - } - if (has_target()) cpufreq_exit_governor(policy); @@ -1674,6 +1670,15 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) return; } + /* + * Unregister cpufreq cooling once all the CPUs of the policy are + * removed. + */ + if (cpufreq_thermal_control_enabled(cpufreq_driver)) { + cpufreq_cooling_unregister(policy->cdev); + policy->cdev = NULL; + } + /* We did light-weight exit earlier, do full tear down now */ if (cpufreq_driver->offline) cpufreq_driver->exit(policy); From 36c2a2863bc3896243eb724dc3fd4cf9aea633f2 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 23 Jan 2024 23:42:29 +0100 Subject: [PATCH 160/553] btrfs: handle chunk tree lookup error in btrfs_relocate_sys_chunks() [ Upstream commit 7411055db5ce64f836aaffd422396af0075fdc99 ] The unhandled case in btrfs_relocate_sys_chunks() loop is a corruption, as it could be caused only by two impossible conditions: - at first the search key is set up to look for a chunk tree item, with offset -1, this is an inexact search and the key->offset will contain the correct offset upon a successful search, a valid chunk tree item cannot have an offset -1 - after first successful search, the found_key corresponds to a chunk item, the offset is decremented by 1 before the next loop, it's impossible to find a chunk item there due to alignment and size constraints Reviewed-by: Josef Bacik Reviewed-by: Anand Jain Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/volumes.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 03cfb425ea4e..ab5d410d560e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3381,7 +3381,17 @@ again: mutex_unlock(&fs_info->reclaim_bgs_lock); goto error; } - BUG_ON(ret == 0); /* Corruption */ + if (ret == 0) { + /* + * On the first search we would find chunk tree with + * offset -1, which is not possible. On subsequent + * loops this would find an existing item on an invalid + * offset (one less than the previous one, wrong + * alignment and size). + */ + ret = -EUCLEAN; + goto error; + } ret = btrfs_previous_item(chunk_root, path, key.objectid, key.type); From 0f30f95b918eb63c4ba6c687d843683d3850bf3a Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 19 Jan 2024 21:19:18 +0100 Subject: [PATCH 161/553] btrfs: export: handle invalid inode or root reference in btrfs_get_parent() [ Upstream commit 26b66d1d366a375745755ca7365f67110bbf6bd5 ] The get_parent handler looks up a parent of a given dentry, this can be either a subvolume or a directory. The search is set up with offset -1 but it's never expected to find such item, as it would break allowed range of inode number or a root id. This means it's a corruption (ext4 also returns this error code). Reviewed-by: Josef Bacik Reviewed-by: Anand Jain Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/export.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index fab7eb76e53b..58b0f04d7123 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -161,8 +161,15 @@ struct dentry *btrfs_get_parent(struct dentry *child) ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto fail; + if (ret == 0) { + /* + * Key with offset of -1 found, there would have to exist an + * inode with such number or a root with such id. + */ + ret = -EUCLEAN; + goto fail; + } - BUG_ON(ret == 0); /* Key with offset of -1 found */ if (path->slots[0] == 0) { ret = -ENOENT; goto fail; From 9ae356c627b493323e1433dcb27a26917668c07c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 6 Feb 2024 22:47:13 +0100 Subject: [PATCH 162/553] btrfs: send: handle path ref underflow in header iterate_inode_ref() [ Upstream commit 3c6ee34c6f9cd12802326da26631232a61743501 ] Change BUG_ON to proper error handling if building the path buffer fails. The pointers are not printed so we don't accidentally leak kernel addresses. Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/send.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 9f7ffd9ef6fd..754a9fb0165f 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1015,7 +1015,15 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path, ret = PTR_ERR(start); goto out; } - BUG_ON(start < p->buf); + if (unlikely(start < p->buf)) { + btrfs_err(root->fs_info, + "send: path ref buffer underflow for key (%llu %u %llu)", + found_key->objectid, + found_key->type, + found_key->offset); + ret = -EINVAL; + goto out; + } } p->start = start; } else { From 5693dd6d3d01f0eea24401f815c98b64cb315b67 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 16 Feb 2024 14:06:37 -0800 Subject: [PATCH 163/553] ice: use relative VSI index for VFs instead of PF VSI number [ Upstream commit 11fbb1bfb5bc8c98b2d7db9da332b5e568f4aaab ] When initializing over virtchnl, the PF is required to pass a VSI ID to the VF as part of its capabilities exchange. The VF driver reports this value back to the PF in a variety of commands. The PF driver validates that this value matches the value it sent to the VF. Some hardware families such as the E700 series could use this value when reading RSS registers or communicating directly with firmware over the Admin Queue. However, E800 series hardware does not support any of these interfaces and the VF's only use for this value is to report it back to the PF. Thus, there is no requirement that this value be an actual VSI ID value of any kind. The PF driver already does not trust that the VF sends it a real VSI ID. The VSI structure is always looked up from the VF structure. The PF does validate that the VSI ID provided matches a VSI associated with the VF, but otherwise does not use the VSI ID for any purpose. Instead of reporting the VSI number relative to the PF space, report a fixed value of 1. When communicating with the VF over virtchnl, validate that the VSI number is returned appropriately. This avoids leaking information about the firmware of the PF state. Currently the ice driver only supplies a VF with a single VSI. However, it appears that virtchnl has some support for allowing multiple VSIs. I did not attempt to implement this. However, space is left open to allow further relative indexes if additional VSIs are provided in future feature development. For this reason, keep the ice_vc_isvalid_vsi_id function in place to allow extending it for multiple VSIs in the future. This change will also simplify handling of live migration in a future series. Since we no longer will provide a real VSI number to the VF, there will be no need to keep track of this number when migrating to a new host. Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 9 ++------- drivers/net/ethernet/intel/ice/ice_virtchnl.h | 9 +++++++++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 4b71392f60df..e64bef490a17 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -493,7 +493,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->rss_lut_size = ICE_VSIQF_HLUT_ARRAY_SIZE; vfres->max_mtu = ice_vc_get_max_frame_size(vf); - vfres->vsi_res[0].vsi_id = vf->lan_vsi_num; + vfres->vsi_res[0].vsi_id = ICE_VF_VSI_ID; vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV; vfres->vsi_res[0].num_queue_pairs = vsi->num_txq; ether_addr_copy(vfres->vsi_res[0].default_mac_addr, @@ -539,12 +539,7 @@ static void ice_vc_reset_vf_msg(struct ice_vf *vf) */ bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) { - struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi; - - vsi = ice_find_vsi(pf, vsi_id); - - return (vsi && (vsi->vf == vf)); + return vsi_id == ICE_VF_VSI_ID; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h index b5a3fd8adbb4..6073d3b2d2d6 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h @@ -18,6 +18,15 @@ */ #define ICE_MAX_MACADDR_PER_VF 18 +/* VFs only get a single VSI. For ice hardware, the VF does not need to know + * its VSI index. However, the virtchnl interface requires a VSI number, + * mainly due to legacy hardware. + * + * Since the VF doesn't need this information, report a static value to the VF + * instead of leaking any information about the PF or hardware setup. + */ +#define ICE_VF_VSI_ID 1 + struct ice_virtchnl_ops { int (*get_ver_msg)(struct ice_vf *vf, u8 *msg); int (*get_vf_res_msg)(struct ice_vf *vf, u8 *msg); From d7ee3bf0caf599c14db0bf4af7aacd6206ef8a23 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 2 Mar 2024 10:07:44 +0000 Subject: [PATCH 164/553] net/smc: reduce rtnl pressure in smc_pnet_create_pnetids_list() [ Upstream commit 00af2aa93b76b1bade471ad0d0525d4d29ca5cc0 ] Many syzbot reports show extreme rtnl pressure, and many of them hint that smc acquires rtnl in netns creation for no good reason [1] This patch returns early from smc_pnet_net_init() if there is no netdevice yet. I am not even sure why smc_pnet_create_pnetids_list() even exists, because smc_pnet_netdev_event() is also calling smc_pnet_add_base_pnetid() when handling NETDEV_UP event. [1] extract of typical syzbot reports 2 locks held by syz-executor.3/12252: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.4/12253: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.1/12257: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.2/12261: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.0/12265: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.3/12268: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.4/12271: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.1/12274: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.2/12280: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 Signed-off-by: Eric Dumazet Cc: Wenjia Zhang Cc: Jan Karcher Cc: "D. Wythe" Cc: Tony Lu Cc: Wen Gu Reviewed-by: Wenjia Zhang Link: https://lore.kernel.org/r/20240302100744.3868021-1-edumazet@google.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/smc/smc_pnet.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 25fb2fd186e2..21b8bf23e4ee 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -802,6 +802,16 @@ static void smc_pnet_create_pnetids_list(struct net *net) u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; struct net_device *dev; + /* Newly created netns do not have devices. + * Do not even acquire rtnl. + */ + if (list_empty(&net->dev_base_head)) + return; + + /* Note: This might not be needed, because smc_pnet_netdev_event() + * is also calling smc_pnet_add_base_pnetid() when handling + * NETDEV_UP event. + */ rtnl_lock(); for_each_netdev(net, dev) smc_pnet_add_base_pnetid(net, dev, ndev_pnetid); From b19fe5eea619d54eea59bb8a37c0f8d00ef0e912 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Thu, 18 Jan 2024 12:40:34 +0800 Subject: [PATCH 165/553] Bluetooth: btintel: Fix null ptr deref in btintel_read_version [ Upstream commit b79e040910101b020931ba0c9a6b77e81ab7f645 ] If hci_cmd_sync_complete() is triggered and skb is NULL, then hdev->req_skb is NULL, which will cause this issue. Reported-and-tested-by: syzbot+830d9e3fa61968246abd@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/btintel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index bbad1207cdfd..c77c06b84d86 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -405,7 +405,7 @@ int btintel_read_version(struct hci_dev *hdev, struct intel_version *ver) struct sk_buff *skb; skb = __hci_cmd_sync(hdev, 0xfc05, 0, NULL, HCI_CMD_TIMEOUT); - if (IS_ERR(skb)) { + if (IS_ERR_OR_NULL(skb)) { bt_dev_err(hdev, "Reading Intel version information failed (%ld)", PTR_ERR(skb)); return PTR_ERR(skb); From a83a7728e4871b69e40ccc255e075700820c87bf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 27 Feb 2024 11:29:14 +0100 Subject: [PATCH 166/553] Bluetooth: btmtk: Add MODULE_FIRMWARE() for MT7922 [ Upstream commit 3e465a07cdf444140f16bc57025c23fcafdde997 ] Since dracut refers to the module info for defining the required firmware files and btmtk driver doesn't provide the firmware info for MT7922, the generate initrd misses the firmware, resulting in the broken Bluetooth. This patch simply adds the MODULE_FIRMWARE() for the missing entry for covering that. Link: https://bugzilla.suse.com/show_bug.cgi?id=1214133 Signed-off-by: Takashi Iwai Reviewed-by: Paul Menzel Reviewed-by: Matthias Brugger Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/btmtk.c | 1 + drivers/bluetooth/btmtk.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index 809762d64fc6..b77e337778a4 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -288,4 +288,5 @@ MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FIRMWARE_MT7622); MODULE_FIRMWARE(FIRMWARE_MT7663); MODULE_FIRMWARE(FIRMWARE_MT7668); +MODULE_FIRMWARE(FIRMWARE_MT7922); MODULE_FIRMWARE(FIRMWARE_MT7961); diff --git a/drivers/bluetooth/btmtk.h b/drivers/bluetooth/btmtk.h index 2a88ea8e475e..ee0b1d27aa5c 100644 --- a/drivers/bluetooth/btmtk.h +++ b/drivers/bluetooth/btmtk.h @@ -4,6 +4,7 @@ #define FIRMWARE_MT7622 "mediatek/mt7622pr2h.bin" #define FIRMWARE_MT7663 "mediatek/mt7663pr2h.bin" #define FIRMWARE_MT7668 "mediatek/mt7668pr2h.bin" +#define FIRMWARE_MT7922 "mediatek/BT_RAM_CODE_MT7922_1_1_hdr.bin" #define FIRMWARE_MT7961 "mediatek/BT_RAM_CODE_MT7961_1_2_hdr.bin" #define HCI_EV_WMT 0xe4 From 48bfb4b03c5ff6e1fa1dc73fb915e150b0968c40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Fri, 5 Jan 2024 14:58:36 -0300 Subject: [PATCH 167/553] drm/vc4: don't check if plane->state->fb == state->fb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5ee0d47dcf33efd8950b347dcf4d20bab12a3fa9 ] Currently, when using non-blocking commits, we can see the following kernel warning: [ 110.908514] ------------[ cut here ]------------ [ 110.908529] refcount_t: underflow; use-after-free. [ 110.908620] WARNING: CPU: 0 PID: 1866 at lib/refcount.c:87 refcount_dec_not_one+0xb8/0xc0 [ 110.908664] Modules linked in: rfcomm snd_seq_dummy snd_hrtimer snd_seq snd_seq_device cmac algif_hash aes_arm64 aes_generic algif_skcipher af_alg bnep hid_logitech_hidpp vc4 brcmfmac hci_uart btbcm brcmutil bluetooth snd_soc_hdmi_codec cfg80211 cec drm_display_helper drm_dma_helper drm_kms_helper snd_soc_core snd_compress snd_pcm_dmaengine fb_sys_fops sysimgblt syscopyarea sysfillrect raspberrypi_hwmon ecdh_generic ecc rfkill libaes i2c_bcm2835 binfmt_misc joydev snd_bcm2835(C) bcm2835_codec(C) bcm2835_isp(C) v4l2_mem2mem videobuf2_dma_contig snd_pcm bcm2835_v4l2(C) raspberrypi_gpiomem bcm2835_mmal_vchiq(C) videobuf2_v4l2 snd_timer videobuf2_vmalloc videobuf2_memops videobuf2_common snd videodev vc_sm_cma(C) mc hid_logitech_dj uio_pdrv_genirq uio i2c_dev drm fuse dm_mod drm_panel_orientation_quirks backlight ip_tables x_tables ipv6 [ 110.909086] CPU: 0 PID: 1866 Comm: kodi.bin Tainted: G C 6.1.66-v8+ #32 [ 110.909104] Hardware name: Raspberry Pi 3 Model B Rev 1.2 (DT) [ 110.909114] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 110.909132] pc : refcount_dec_not_one+0xb8/0xc0 [ 110.909152] lr : refcount_dec_not_one+0xb4/0xc0 [ 110.909170] sp : ffffffc00913b9c0 [ 110.909177] x29: ffffffc00913b9c0 x28: 000000556969bbb0 x27: 000000556990df60 [ 110.909205] x26: 0000000000000002 x25: 0000000000000004 x24: ffffff8004448480 [ 110.909230] x23: ffffff800570b500 x22: ffffff802e03a7bc x21: ffffffecfca68c78 [ 110.909257] x20: ffffff8002b42000 x19: ffffff802e03a600 x18: 0000000000000000 [ 110.909283] x17: 0000000000000011 x16: ffffffffffffffff x15: 0000000000000004 [ 110.909308] x14: 0000000000000fff x13: ffffffed577e47e0 x12: 0000000000000003 [ 110.909333] x11: 0000000000000000 x10: 0000000000000027 x9 : c912d0d083728c00 [ 110.909359] x8 : c912d0d083728c00 x7 : 65646e75203a745f x6 : 746e756f63666572 [ 110.909384] x5 : ffffffed579f62ee x4 : ffffffed579eb01e x3 : 0000000000000000 [ 110.909409] x2 : 0000000000000000 x1 : ffffffc00913b750 x0 : 0000000000000001 [ 110.909434] Call trace: [ 110.909441] refcount_dec_not_one+0xb8/0xc0 [ 110.909461] vc4_bo_dec_usecnt+0x4c/0x1b0 [vc4] [ 110.909903] vc4_cleanup_fb+0x44/0x50 [vc4] [ 110.910315] drm_atomic_helper_cleanup_planes+0x88/0xa4 [drm_kms_helper] [ 110.910669] vc4_atomic_commit_tail+0x390/0x9dc [vc4] [ 110.911079] commit_tail+0xb0/0x164 [drm_kms_helper] [ 110.911397] drm_atomic_helper_commit+0x1d0/0x1f0 [drm_kms_helper] [ 110.911716] drm_atomic_commit+0xb0/0xdc [drm] [ 110.912569] drm_mode_atomic_ioctl+0x348/0x4b8 [drm] [ 110.913330] drm_ioctl_kernel+0xec/0x15c [drm] [ 110.914091] drm_ioctl+0x24c/0x3b0 [drm] [ 110.914850] __arm64_sys_ioctl+0x9c/0xd4 [ 110.914873] invoke_syscall+0x4c/0x114 [ 110.914897] el0_svc_common+0xd0/0x118 [ 110.914917] do_el0_svc+0x38/0xd0 [ 110.914936] el0_svc+0x30/0x8c [ 110.914958] el0t_64_sync_handler+0x84/0xf0 [ 110.914979] el0t_64_sync+0x18c/0x190 [ 110.914996] ---[ end trace 0000000000000000 ]--- This happens because, although `prepare_fb` and `cleanup_fb` are perfectly balanced, we cannot guarantee consistency in the check plane->state->fb == state->fb. This means that sometimes we can increase the refcount in `prepare_fb` and don't decrease it in `cleanup_fb`. The opposite can also be true. In fact, the struct drm_plane .state shouldn't be accessed directly but instead, the `drm_atomic_get_new_plane_state()` helper function should be used. So, we could stick to this check, but using `drm_atomic_get_new_plane_state()`. But actually, this check is not really needed. We can increase and decrease the refcount symmetrically without problems. This is going to make the code more simple and consistent. Signed-off-by: Maíra Canal Acked-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240105175908.242000-1-mcanal@igalia.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vc4/vc4_plane.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index eb08020154f3..7e6648b277b2 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -1415,9 +1415,6 @@ static int vc4_prepare_fb(struct drm_plane *plane, drm_gem_plane_helper_prepare_fb(plane, state); - if (plane->state->fb == state->fb) - return 0; - return vc4_bo_inc_usecnt(bo); } @@ -1426,7 +1423,7 @@ static void vc4_cleanup_fb(struct drm_plane *plane, { struct vc4_bo *bo; - if (plane->state->fb == state->fb || !state->fb) + if (!state->fb) return; bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base); From ac1e0f080a768e085c35878397e7caeafc48b47d Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Thu, 18 Jan 2024 11:37:59 -0800 Subject: [PATCH 168/553] Input: synaptics-rmi4 - fail probing if memory allocation for "phys" fails [ Upstream commit bc4996184d56cfaf56d3811ac2680c8a0e2af56e ] While input core can work with input->phys set to NULL userspace might depend on it, so better fail probing if allocation fails. The system must be in a pretty bad shape for it to happen anyway. Signed-off-by: Kunwu Chan Link: https://lore.kernel.org/r/20240117073124.143636-1-chentao@kylinos.cn Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/rmi4/rmi_driver.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index 258d5fe3d395..aa32371f04af 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -1196,7 +1196,11 @@ static int rmi_driver_probe(struct device *dev) } rmi_driver_set_input_params(rmi_dev, data->input); data->input->phys = devm_kasprintf(dev, GFP_KERNEL, - "%s/input0", dev_name(dev)); + "%s/input0", dev_name(dev)); + if (!data->input->phys) { + retval = -ENOMEM; + goto err; + } } retval = rmi_init_functions(data); From c87dd159189a582906ba6a7302d966ac56b80f13 Mon Sep 17 00:00:00 2001 From: Samuel Dionne-Riel Date: Thu, 21 Dec 2023 22:01:50 -0500 Subject: [PATCH 169/553] drm: panel-orientation-quirks: Add quirk for GPD Win Mini [ Upstream commit 2f862fdc0fd802e728b6ca96bc78ec3f01bf161e ] This adds a DMI orientation quirk for the GPD Win Mini panel. Signed-off-by: Samuel Dionne-Riel Signed-off-by: Linus Walleij Link: https://patchwork.freedesktop.org/patch/msgid/20231222030149.3740815-2-samuel@dionne-riel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index d5c15292ae93..3fe5e6439c40 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -117,6 +117,12 @@ static const struct drm_dmi_panel_orientation_data lcd1080x1920_leftside_up = { .orientation = DRM_MODE_PANEL_ORIENTATION_LEFT_UP, }; +static const struct drm_dmi_panel_orientation_data lcd1080x1920_rightside_up = { + .width = 1080, + .height = 1920, + .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, +}; + static const struct drm_dmi_panel_orientation_data lcd1200x1920_rightside_up = { .width = 1200, .height = 1920, @@ -279,6 +285,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1618-03") }, .driver_data = (void *)&lcd720x1280_rightside_up, + }, { /* GPD Win Mini */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1617-01") + }, + .driver_data = (void *)&lcd1080x1920_rightside_up, }, { /* I.T.Works TW891 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."), From 5238e1c2bd3142c10fd41c16891244b20b928894 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 22 Jan 2024 14:43:38 +0100 Subject: [PATCH 170/553] pinctrl: renesas: checker: Limit cfg reg enum checks to provided IDs [ Upstream commit 3803584a4e9b65bb5b013f862f55c5055aa86c25 ] If the number of provided enum IDs in a variable width config register description does not match the expected number, the checker uses the expected number for validating the individual enum IDs. However, this may cause out-of-bounds accesses on the array holding the enum IDs, leading to bogus enum_id conflict warnings. Worse, if the bug is an incorrect bit field description (e.g. accidentally using "12" instead of "-12" for a reserved field), thousands of warnings may be printed, overflowing the kernel log buffer. Fix this by limiting the enum ID check to the number of provided enum IDs. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/c7385f44f2faebb8856bcbb4e908d846fc1531fb.1705930809.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- drivers/pinctrl/renesas/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/renesas/core.c b/drivers/pinctrl/renesas/core.c index c91102d3f1d1..1c7f8caf7f7c 100644 --- a/drivers/pinctrl/renesas/core.c +++ b/drivers/pinctrl/renesas/core.c @@ -921,9 +921,11 @@ static void __init sh_pfc_check_cfg_reg(const char *drvname, sh_pfc_err("reg 0x%x: var_field_width declares %u instead of %u bits\n", cfg_reg->reg, rw, cfg_reg->reg_width); - if (n != cfg_reg->nr_enum_ids) + if (n != cfg_reg->nr_enum_ids) { sh_pfc_err("reg 0x%x: enum_ids[] has %u instead of %u values\n", cfg_reg->reg, cfg_reg->nr_enum_ids, n); + n = cfg_reg->nr_enum_ids; + } check_enum_ids: sh_pfc_check_reg_enums(drvname, cfg_reg->reg, cfg_reg->enum_ids, n); From 53cb1e52c9db618c08335984d1ca80db220ccf09 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 10 Apr 2023 21:04:50 +0900 Subject: [PATCH 171/553] sysv: don't call sb_bread() with pointers_lock held [ Upstream commit f123dc86388cb669c3d6322702dc441abc35c31e ] syzbot is reporting sleep in atomic context in SysV filesystem [1], for sb_bread() is called with rw_spinlock held. A "write_lock(&pointers_lock) => read_lock(&pointers_lock) deadlock" bug and a "sb_bread() with write_lock(&pointers_lock)" bug were introduced by "Replace BKL for chain locking with sysvfs-private rwlock" in Linux 2.5.12. Then, "[PATCH] err1-40: sysvfs locking fix" in Linux 2.6.8 fixed the former bug by moving pointers_lock lock to the callers, but instead introduced a "sb_bread() with read_lock(&pointers_lock)" bug (which made this problem easier to hit). Al Viro suggested that why not to do like get_branch()/get_block()/ find_shared() in Minix filesystem does. And doing like that is almost a revert of "[PATCH] err1-40: sysvfs locking fix" except that get_branch() from with find_shared() is called without write_lock(&pointers_lock). Reported-by: syzbot Link: https://syzkaller.appspot.com/bug?extid=69b40dc5fd40f32c199f Suggested-by: Al Viro Signed-off-by: Tetsuo Handa Link: https://lore.kernel.org/r/0d195f93-a22a-49a2-0020-103534d6f7f6@I-love.SAKURA.ne.jp Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/sysv/itree.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 9925cfe57159..17c7d76770a0 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -82,9 +82,6 @@ static inline sysv_zone_t *block_end(struct buffer_head *bh) return (sysv_zone_t*)((char*)bh->b_data + bh->b_size); } -/* - * Requires read_lock(&pointers_lock) or write_lock(&pointers_lock) - */ static Indirect *get_branch(struct inode *inode, int depth, int offsets[], @@ -104,15 +101,18 @@ static Indirect *get_branch(struct inode *inode, bh = sb_bread(sb, block); if (!bh) goto failure; + read_lock(&pointers_lock); if (!verify_chain(chain, p)) goto changed; add_chain(++p, bh, (sysv_zone_t*)bh->b_data + *++offsets); + read_unlock(&pointers_lock); if (!p->key) goto no_block; } return NULL; changed: + read_unlock(&pointers_lock); brelse(bh); *err = -EAGAIN; goto no_block; @@ -218,9 +218,7 @@ static int get_block(struct inode *inode, sector_t iblock, struct buffer_head *b goto out; reread: - read_lock(&pointers_lock); partial = get_branch(inode, depth, offsets, chain, &err); - read_unlock(&pointers_lock); /* Simplest case - block found, no allocation needed */ if (!partial) { @@ -290,9 +288,9 @@ static Indirect *find_shared(struct inode *inode, *top = 0; for (k = depth; k > 1 && !offsets[k-1]; k--) ; + partial = get_branch(inode, k, offsets, chain, &err); write_lock(&pointers_lock); - partial = get_branch(inode, k, offsets, chain, &err); if (!partial) partial = chain + k-1; /* From ee0b5f96b6d66a1e6698228dcb41df11ec7f352f Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Wed, 31 Jan 2024 10:50:57 -0800 Subject: [PATCH 172/553] scsi: lpfc: Fix possible memory leak in lpfc_rcv_padisc() [ Upstream commit 2ae917d4bcab80ab304b774d492e2fcd6c52c06b ] The call to lpfc_sli4_resume_rpi() in lpfc_rcv_padisc() may return an unsuccessful status. In such cases, the elsiocb is not issued, the completion is not called, and thus the elsiocb resource is leaked. Check return value after calling lpfc_sli4_resume_rpi() and conditionally release the elsiocb resource. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20240131185112.149731-3-justintee8345@gmail.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_nportdisc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index b86ff9fcdf0c..f21396a0ba9d 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -748,8 +748,10 @@ lpfc_rcv_padisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, /* Save the ELS cmd */ elsiocb->drvrTimeout = cmd; - lpfc_sli4_resume_rpi(ndlp, - lpfc_mbx_cmpl_resume_rpi, elsiocb); + if (lpfc_sli4_resume_rpi(ndlp, + lpfc_mbx_cmpl_resume_rpi, + elsiocb)) + kfree(elsiocb); goto out; } } From 13701b0f01b6524cfd405bca7ed7587859fd57fa Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Wed, 7 Feb 2024 19:21:32 -0700 Subject: [PATCH 173/553] isofs: handle CDs with bad root inode but good Joliet root directory [ Upstream commit 4243bf80c79211a8ca2795401add9c4a3b1d37ca ] I have a CD copy of the original Tom Clancy's Ghost Recon game from 2001. The disc mounts without error on Windows, but on Linux mounting fails with the message "isofs_fill_super: get root inode failed". The error originates in isofs_read_inode, which returns -EIO because de_len is 0. The superblock on this disc appears to be intentionally corrupt as a form of copy protection. When the root inode is unusable, instead of giving up immediately, try to continue with the Joliet file table. This fixes the Ghost Recon CD and probably other copy-protected CDs too. Signed-off-by: Alex Henrie Signed-off-by: Jan Kara Message-Id: <20240208022134.451490-1-alexhenrie24@gmail.com> Signed-off-by: Sasha Levin --- fs/isofs/inode.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index df9d70588b60..8a6c7fdc1d5f 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -908,8 +908,22 @@ root_found: * we then decide whether to use the Joliet descriptor. */ inode = isofs_iget(s, sbi->s_firstdatazone, 0); - if (IS_ERR(inode)) - goto out_no_root; + + /* + * Fix for broken CDs with a corrupt root inode but a correct Joliet + * root directory. + */ + if (IS_ERR(inode)) { + if (joliet_level && sbi->s_firstdatazone != first_data_zone) { + printk(KERN_NOTICE + "ISOFS: root inode is unusable. " + "Disabling Rock Ridge and switching to Joliet."); + sbi->s_rock = 0; + inode = NULL; + } else { + goto out_no_root; + } + } /* * Fix for broken CDs with Rock Ridge and empty ISO root directory but From eae948ecd5f019b65e78ed9e6f75945cf4e793e3 Mon Sep 17 00:00:00 2001 From: mosomate Date: Thu, 8 Feb 2024 10:55:40 -0600 Subject: [PATCH 174/553] ASoC: Intel: common: DMI remap for rebranded Intel NUC M15 (LAPRC710) laptops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c13e03126a5be90781084437689724254c8226e1 ] Added DMI quirk to handle the rebranded variants of Intel NUC M15 (LAPRC710) laptops. The DMI matching is based on motherboard attributes. Link: https://github.com/thesofproject/linux/issues/4218 Signed-off-by: Máté Mosonyi Reviewed-by: Bard Liao Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20240208165545.93811-20-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/soundwire/dmi-quirks.c | 8 ++++++++ sound/soc/intel/boards/sof_sdw.c | 11 +++++++++++ 2 files changed, 19 insertions(+) diff --git a/drivers/soundwire/dmi-quirks.c b/drivers/soundwire/dmi-quirks.c index 9ebdd0cd0b1c..91ab97a456fa 100644 --- a/drivers/soundwire/dmi-quirks.c +++ b/drivers/soundwire/dmi-quirks.c @@ -130,6 +130,14 @@ static const struct dmi_system_id adr_remap_quirk_table[] = { }, .driver_data = (void *)intel_rooks_county, }, + { + /* quirk used for NUC15 LAPRC710 skew */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "LAPRC710"), + }, + .driver_data = (void *)intel_rooks_county, + }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 985012f2003e..d1e6e4208c37 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -224,6 +224,17 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { SOF_SDW_PCH_DMIC | RT711_JD2_100K), }, + { + /* NUC15 LAPRC710 skews */ + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "LAPRC710"), + }, + .driver_data = (void *)(SOF_SDW_TGL_HDMI | + SOF_SDW_PCH_DMIC | + RT711_JD2_100K), + }, /* TigerLake-SDCA devices */ { .callback = sof_sdw_quirk_cb, From 3f3c1e735d3e9169df46f724eeb41a0c0e458051 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Dec 2023 09:33:29 -0800 Subject: [PATCH 175/553] rcu-tasks: Repair RCU Tasks Trace quiescence check [ Upstream commit 2eb52fa8900e642b3b5054c4bf9776089d2a935f ] The context-switch-time check for RCU Tasks Trace quiescence expects current->trc_reader_special.b.need_qs to be zero, and if so, updates it to TRC_NEED_QS_CHECKED. This is backwards, because if this value is zero, there is no RCU Tasks Trace grace period in flight, an thus no need for a quiescent state. Instead, when a grace period starts, this field is set to TRC_NEED_QS. This commit therefore changes the check from zero to TRC_NEED_QS. Reported-by: Steven Rostedt Signed-off-by: Paul E. McKenney Tested-by: Steven Rostedt (Google) Signed-off-by: Boqun Feng Signed-off-by: Sasha Levin --- include/linux/rcupdate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 319698087d66..6858cae98da9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -205,9 +205,9 @@ void rcu_tasks_trace_qs_blkd(struct task_struct *t); do { \ int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \ \ - if (likely(!READ_ONCE((t)->trc_reader_special.b.need_qs)) && \ + if (unlikely(READ_ONCE((t)->trc_reader_special.b.need_qs) == TRC_NEED_QS) && \ likely(!___rttq_nesting)) { \ - rcu_trc_cmpxchg_need_qs((t), 0, TRC_NEED_QS_CHECKED); \ + rcu_trc_cmpxchg_need_qs((t), TRC_NEED_QS, TRC_NEED_QS_CHECKED); \ } else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \ !READ_ONCE((t)->trc_reader_special.b.blocked)) { \ rcu_tasks_trace_qs_blkd(t); \ From 2e2177f94c0e0bc41323d7b6975a5f4820ed347e Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 14 Feb 2024 15:57:53 -0500 Subject: [PATCH 176/553] Julia Lawall reported this null pointer dereference, this should fix it. [ Upstream commit 9bf93dcfc453fae192fe5d7874b89699e8f800ac ] Signed-off-by: Mike Marshall Signed-off-by: Sasha Levin --- fs/orangefs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 5254256a224d..4ca8ed410c3c 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -527,7 +527,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, sb->s_fs_info = kzalloc(sizeof(struct orangefs_sb_info_s), GFP_KERNEL); if (!ORANGEFS_SB(sb)) { d = ERR_PTR(-ENOMEM); - goto free_sb_and_op; + goto free_op; } ret = orangefs_fill_sb(sb, From 84ed33a08218582ecda3c82d93d1efa9aadf7770 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 10:54:47 +0100 Subject: [PATCH 177/553] media: sta2x11: fix irq handler cast [ Upstream commit 3de49ae81c3a0f83a554ecbce4c08e019f30168e ] clang-16 warns about casting incompatible function pointers: drivers/media/pci/sta2x11/sta2x11_vip.c:1057:6: error: cast from 'irqreturn_t (*)(int, struct sta2x11_vip *)' (aka 'enum irqreturn (*)(int, struct sta2x11_vip *)') to 'irq_handler_t' (aka 'enum irqreturn (*)(int, void *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] Change the prototype of the irq handler to the regular version with a local variable to adjust the argument type. Signed-off-by: Arnd Bergmann Signed-off-by: Hans Verkuil [hverkuil: update argument documentation] Signed-off-by: Sasha Levin --- drivers/media/pci/sta2x11/sta2x11_vip.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/media/pci/sta2x11/sta2x11_vip.c b/drivers/media/pci/sta2x11/sta2x11_vip.c index 8535e49a4c4f..1f7ab56de4a0 100644 --- a/drivers/media/pci/sta2x11/sta2x11_vip.c +++ b/drivers/media/pci/sta2x11/sta2x11_vip.c @@ -756,7 +756,7 @@ static const struct video_device video_dev_template = { /** * vip_irq - interrupt routine * @irq: Number of interrupt ( not used, correct number is assumed ) - * @vip: local data structure containing all information + * @data: local data structure containing all information * * check for both frame interrupts set ( top and bottom ). * check FIFO overflow, but limit number of log messages after open. @@ -766,8 +766,9 @@ static const struct video_device video_dev_template = { * * IRQ_HANDLED, interrupt done. */ -static irqreturn_t vip_irq(int irq, struct sta2x11_vip *vip) +static irqreturn_t vip_irq(int irq, void *data) { + struct sta2x11_vip *vip = data; unsigned int status; status = reg_read(vip, DVP_ITS); @@ -1049,9 +1050,7 @@ static int sta2x11_vip_init_one(struct pci_dev *pdev, spin_lock_init(&vip->slock); - ret = request_irq(pdev->irq, - (irq_handler_t) vip_irq, - IRQF_SHARED, KBUILD_MODNAME, vip); + ret = request_irq(pdev->irq, vip_irq, IRQF_SHARED, KBUILD_MODNAME, vip); if (ret) { dev_err(&pdev->dev, "request_irq failed\n"); ret = -ENODEV; From b75395ec4b7a63840cd7c7071158f70d508a5965 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 18 Feb 2024 16:41:27 +0900 Subject: [PATCH 178/553] ALSA: firewire-lib: handle quirk to calculate payload quadlets as data block counter [ Upstream commit 4a486439d2ca85752c46711f373b6ddc107bb35d ] Miglia Harmony Audio (OXFW970) has a quirk to put the number of accumulated quadlets in CIP payload into the dbc field of CIP header. This commit handles the quirk in the packet processing layer. Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20240218074128.95210-4-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/firewire/amdtp-stream.c | 12 ++++++++---- sound/firewire/amdtp-stream.h | 4 ++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index f8b644cb9157..875312568369 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -771,10 +771,14 @@ static int check_cip_header(struct amdtp_stream *s, const __be32 *buf, } else { unsigned int dbc_interval; - if (*data_blocks > 0 && s->ctx_data.tx.dbc_interval > 0) - dbc_interval = s->ctx_data.tx.dbc_interval; - else - dbc_interval = *data_blocks; + if (!(s->flags & CIP_DBC_IS_PAYLOAD_QUADLETS)) { + if (*data_blocks > 0 && s->ctx_data.tx.dbc_interval > 0) + dbc_interval = s->ctx_data.tx.dbc_interval; + else + dbc_interval = *data_blocks; + } else { + dbc_interval = payload_length / sizeof(__be32); + } lost = dbc != ((*data_block_counter + dbc_interval) & 0xff); } diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h index 1f957c946c95..cf9ab347277f 100644 --- a/sound/firewire/amdtp-stream.h +++ b/sound/firewire/amdtp-stream.h @@ -37,6 +37,9 @@ * the value of current SYT_INTERVAL; e.g. initial value is not zero. * @CIP_UNAWARE_SYT: For outgoing packet, the value in SYT field of CIP is 0xffff. * For incoming packet, the value in SYT field of CIP is not handled. + * @CIP_DBC_IS_PAYLOAD_QUADLETS: Available for incoming packet, and only effective with + * CIP_DBC_IS_END_EVENT flag. The value of dbc field is the number of accumulated quadlets + * in CIP payload, instead of the number of accumulated data blocks. */ enum cip_flags { CIP_NONBLOCKING = 0x00, @@ -51,6 +54,7 @@ enum cip_flags { CIP_NO_HEADER = 0x100, CIP_UNALIGHED_DBC = 0x200, CIP_UNAWARE_SYT = 0x400, + CIP_DBC_IS_PAYLOAD_QUADLETS = 0x800, }; /** From baba35106855929782841b4126669b890f9cbdcb Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Fri, 19 Jan 2024 14:11:54 +0800 Subject: [PATCH 179/553] ext4: add a hint for block bitmap corrupt state in mb_groups [ Upstream commit 68ee261fb15457ecb17e3683cb4e6a4792ca5b71 ] If one group is marked as block bitmap corrupted, its free blocks cannot be used and its free count is also deducted from the global sbi->s_freeclusters_counter. User might be confused about the absent free space because we can't query the information about corrupted block groups except unreliable error messages in syslog. So add a hint to show block bitmap corrupted groups in mb_groups. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240119061154.1525781-1-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bc0ca45a5d81..a843f964332c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2905,7 +2905,10 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) for (i = 0; i <= 13; i++) seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ? sg.info.bb_counters[i] : 0); - seq_puts(seq, " ]\n"); + seq_puts(seq, " ]"); + if (EXT4_MB_GRP_BBITMAP_CORRUPT(&sg.info)) + seq_puts(seq, " Block bitmap corrupted!"); + seq_puts(seq, "\n"); return 0; } From aba664845af3dab8280338e1ed9751a58c9e5130 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Fri, 19 Jan 2024 14:29:08 +0800 Subject: [PATCH 180/553] ext4: forbid commit inconsistent quota data when errors=remount-ro [ Upstream commit d8b945fa475f13d787df00c26a6dc45a3e2e1d1d ] There's issue as follows When do IO fault injection test: Quota error (device dm-3): find_block_dqentry: Quota for id 101 referenced but not present Quota error (device dm-3): qtree_read_dquot: Can't read quota structure for id 101 Quota error (device dm-3): do_check_range: Getting block 2021161007 out of range 1-186 Quota error (device dm-3): qtree_read_dquot: Can't read quota structure for id 661 Now, ext4_write_dquot()/ext4_acquire_dquot()/ext4_release_dquot() may commit inconsistent quota data even if process failed. This may lead to filesystem corruption. To ensure filesystem consistent when errors=remount-ro there is need to call ext4_handle_error() to abort journal. Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240119062908.3598806-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/super.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 601e097e1720..274542d869d0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6751,6 +6751,10 @@ static int ext4_write_dquot(struct dquot *dquot) if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit(dquot); + if (ret < 0) + ext4_error_err(dquot->dq_sb, -ret, + "Failed to commit dquot type %d", + dquot->dq_id.type); err = ext4_journal_stop(handle); if (!ret) ret = err; @@ -6767,6 +6771,10 @@ static int ext4_acquire_dquot(struct dquot *dquot) if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_acquire(dquot); + if (ret < 0) + ext4_error_err(dquot->dq_sb, -ret, + "Failed to acquire dquot type %d", + dquot->dq_id.type); err = ext4_journal_stop(handle); if (!ret) ret = err; @@ -6786,6 +6794,10 @@ static int ext4_release_dquot(struct dquot *dquot) return PTR_ERR(handle); } ret = dquot_release(dquot); + if (ret < 0) + ext4_error_err(dquot->dq_sb, -ret, + "Failed to release dquot type %d", + dquot->dq_id.type); err = ext4_journal_stop(handle); if (!ret) ret = err; From 354a5d7bb740781079e79cab692a94dd9dd38f36 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Thu, 29 Aug 2019 11:53:52 -0400 Subject: [PATCH 181/553] drm/amd/display: Fix nanosec stat overflow [ Upstream commit 14d68acfd04b39f34eea7bea65dda652e6db5bf6 ] [Why] Nanosec stats can overflow on long running systems potentially causing statistic logging issues. [How] Use 64bit types for nanosec stats to ensure no overflow. Reviewed-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Aric Cyr Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/modules/inc/mod_stats.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h b/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h index 4220fd8fdd60..54cd86060f4d 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h @@ -57,10 +57,10 @@ void mod_stats_update_event(struct mod_stats *mod_stats, unsigned int length); void mod_stats_update_flip(struct mod_stats *mod_stats, - unsigned long timestamp_in_ns); + unsigned long long timestamp_in_ns); void mod_stats_update_vupdate(struct mod_stats *mod_stats, - unsigned long timestamp_in_ns); + unsigned long long timestamp_in_ns); void mod_stats_update_freesync(struct mod_stats *mod_stats, unsigned int v_total_min, From 3a94feab045f51bdb3991a248f0a84f339f17a65 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 23 Feb 2024 17:08:16 +0530 Subject: [PATCH 182/553] drm/amd/amdgpu: Fix potential ioremap() memory leaks in amdgpu_device_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit eb4f139888f636614dab3bcce97ff61cefc4b3a7 ] This ensures that the memory mapped by ioremap for adev->rmmio, is properly handled in amdgpu_device_init(). If the function exits early due to an error, the memory is unmapped. If the function completes successfully, the memory remains mapped. Reported by smatch: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:4337 amdgpu_device_init() warn: 'adev->rmmio' from ioremap() not released on lines: 4035,4045,4051,4058,4068,4337 Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b11690a816e7..e4eb906806a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3713,8 +3713,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, * early on during init and before calling to RREG32. */ adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev"); - if (!adev->reset_domain) - return -ENOMEM; + if (!adev->reset_domain) { + r = -ENOMEM; + goto unmap_memory; + } /* detect hw virtualization here */ amdgpu_detect_virtualization(adev); @@ -3722,18 +3724,18 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_device_get_job_timeout_settings(adev); if (r) { dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); - return r; + goto unmap_memory; } /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) - return r; + goto unmap_memory; /* Get rid of things like offb */ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); if (r) - return r; + goto unmap_memory; /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); @@ -3743,7 +3745,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (adev->gmc.xgmi.supported) { r = adev->gfxhub.funcs->get_xgmi_info(adev); if (r) - return r; + goto unmap_memory; } /* enable PCIE atomic ops */ @@ -3999,6 +4001,8 @@ release_ras_con: failed: amdgpu_vf_error_trans_all(adev); +unmap_memory: + iounmap(adev->rmmio); return r; } From 43be051f35f934a2f348ea7d83de70acee578f05 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Tue, 30 Jan 2024 11:38:25 -0800 Subject: [PATCH 183/553] SUNRPC: increase size of rpc_wait_queue.qlen from unsigned short to unsigned int [ Upstream commit 2c35f43b5a4b9cdfaa6fdd946f5a212615dac8eb ] When the NFS client is under extreme load the rpc_wait_queue.qlen counter can be overflowed. Here is an instant of the backlog queue overflow in a real world environment shown by drgn helper: rpc_task_stats(rpc_clnt): ------------------------- rpc_clnt: 0xffff92b65d2bae00 rpc_xprt: 0xffff9275db64f000 Queue: sending[64887] pending[524] backlog[30441] binding[0] XMIT task: 0xffff925c6b1d8e98 WRITE: 750654 __dta_call_status_580: 65463 __dta_call_transmit_status_579: 1 call_reserveresult: 685189 nfs_client_init_is_complete: 1 COMMIT: 584 call_reserveresult: 573 __dta_call_status_580: 11 ACCESS: 1 __dta_call_status_580: 1 GETATTR: 10 __dta_call_status_580: 4 call_reserveresult: 6 751249 tasks for server 111.222.333.444 Total tasks: 751249 count_rpc_wait_queues(xprt): ---------------------------- **** rpc_xprt: 0xffff9275db64f000 num_reqs: 65511 wait_queue: xprt_binding[0] cnt: 0 wait_queue: xprt_binding[1] cnt: 0 wait_queue: xprt_binding[2] cnt: 0 wait_queue: xprt_binding[3] cnt: 0 rpc_wait_queue[xprt_binding].qlen: 0 maxpriority: 0 wait_queue: xprt_sending[0] cnt: 0 wait_queue: xprt_sending[1] cnt: 64887 wait_queue: xprt_sending[2] cnt: 0 wait_queue: xprt_sending[3] cnt: 0 rpc_wait_queue[xprt_sending].qlen: 64887 maxpriority: 3 wait_queue: xprt_pending[0] cnt: 524 wait_queue: xprt_pending[1] cnt: 0 wait_queue: xprt_pending[2] cnt: 0 wait_queue: xprt_pending[3] cnt: 0 rpc_wait_queue[xprt_pending].qlen: 524 maxpriority: 0 wait_queue: xprt_backlog[0] cnt: 0 wait_queue: xprt_backlog[1] cnt: 685801 wait_queue: xprt_backlog[2] cnt: 0 wait_queue: xprt_backlog[3] cnt: 0 rpc_wait_queue[xprt_backlog].qlen: 30441 maxpriority: 3 [task cnt mismatch] There is no effect on operations when this overflow occurs. However it causes confusion when trying to diagnose the performance problem. Signed-off-by: Dai Ngo Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- include/linux/sunrpc/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 8ada7dc802d3..8f9bee0e21c3 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -186,7 +186,7 @@ struct rpc_wait_queue { unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char priority; /* current priority */ unsigned char nr; /* # tasks remaining for cookie */ - unsigned short qlen; /* total # tasks waiting in queue */ + unsigned int qlen; /* total # tasks waiting in queue */ struct rpc_timer timer_list; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) const char * name; From aaefa79c1532cfe6bd3702a113ed3eba921094fe Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Wed, 28 Feb 2024 08:53:16 +0100 Subject: [PATCH 184/553] Revert "ACPI: PM: Block ASUS B1400CEAE from suspend to idle by default" [ Upstream commit cb98555fcd8eee98c30165537c7e394f3a66e809 ] This reverts commit d52848620de00cde4a3a5df908e231b8c8868250, which was originally put in place to work around a s2idle failure on this platform where the NVMe device was inaccessible upon resume. After extended testing, we found that the firmware's implementation of S3 is buggy and intermittently fails to wake up the system. We need to revert to s2idle mode. The NVMe issue has now been solved more precisely in the commit titled "PCI: Disable D3cold on Asus B1400 PCI-NVMe bridge" Link: https://bugzilla.kernel.org/show_bug.cgi?id=215742 Link: https://lore.kernel.org/r/20240228075316.7404-2-drake@endlessos.org Signed-off-by: Daniel Drake Signed-off-by: Bjorn Helgaas Acked-by: Jian-Hong Pan Acked-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/sleep.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 539c12fbd2f1..6026e20f022a 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -385,18 +385,6 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "20GGA00L00"), }, }, - /* - * ASUS B1400CEAE hangs on resume from suspend (see - * https://bugzilla.kernel.org/show_bug.cgi?id=215742). - */ - { - .callback = init_default_s3, - .ident = "ASUS B1400CEAE", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "ASUS EXPERTBOOK B1400CEAE"), - }, - }, {}, }; From b7d153bfba62f816c3bc0afa862b3e869f838d55 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 28 Feb 2024 23:07:57 -0800 Subject: [PATCH 185/553] libperf evlist: Avoid out-of-bounds access [ Upstream commit 1947b92464c3268381604bbe2ac977a3fd78192f ] Parallel testing appears to show a race between allocating and setting evsel ids. As there is a bounds check on the xyarray it yields a segv like: ``` AddressSanitizer:DEADLYSIGNAL ================================================================= ==484408==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000010 ==484408==The signal is caused by a WRITE memory access. ==484408==Hint: address points to the zero page. #0 0x55cef5d4eff4 in perf_evlist__id_hash tools/lib/perf/evlist.c:256 #1 0x55cef5d4f132 in perf_evlist__id_add tools/lib/perf/evlist.c:274 #2 0x55cef5d4f545 in perf_evlist__id_add_fd tools/lib/perf/evlist.c:315 #3 0x55cef5a1923f in store_evsel_ids util/evsel.c:3130 #4 0x55cef5a19400 in evsel__store_ids util/evsel.c:3147 #5 0x55cef5888204 in __run_perf_stat tools/perf/builtin-stat.c:832 #6 0x55cef5888c06 in run_perf_stat tools/perf/builtin-stat.c:960 #7 0x55cef58932db in cmd_stat tools/perf/builtin-stat.c:2878 ... ``` Avoid this crash by early exiting the perf_evlist__id_add_fd and perf_evlist__id_add is the access is out-of-bounds. Signed-off-by: Ian Rogers Cc: Yang Jihong Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240229070757.796244-1-irogers@google.com Signed-off-by: Sasha Levin --- tools/lib/perf/evlist.c | 18 ++++++++++++------ tools/lib/perf/include/internal/evlist.h | 4 ++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 61b637f29b82..b871923c7e5c 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -233,10 +233,10 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist) static void perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id) + int cpu_map_idx, int thread, u64 id) { int hash; - struct perf_sample_id *sid = SID(evsel, cpu, thread); + struct perf_sample_id *sid = SID(evsel, cpu_map_idx, thread); sid->id = id; sid->evsel = evsel; @@ -254,21 +254,27 @@ void perf_evlist__reset_id_hash(struct perf_evlist *evlist) void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id) + int cpu_map_idx, int thread, u64 id) { - perf_evlist__id_hash(evlist, evsel, cpu, thread, id); + if (!SID(evsel, cpu_map_idx, thread)) + return; + + perf_evlist__id_hash(evlist, evsel, cpu_map_idx, thread, id); evsel->id[evsel->ids++] = id; } int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, int fd) + int cpu_map_idx, int thread, int fd) { u64 read_data[4] = { 0, }; int id_idx = 1; /* The first entry is the counter value */ u64 id; int ret; + if (!SID(evsel, cpu_map_idx, thread)) + return -1; + ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); if (!ret) goto add; @@ -297,7 +303,7 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, id = read_data[id_idx]; add: - perf_evlist__id_add(evlist, evsel, cpu, thread, id); + perf_evlist__id_add(evlist, evsel, cpu_map_idx, thread, id); return 0; } diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 850f07070036..cf77db75291b 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -127,11 +127,11 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist); void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id); + int cpu_map_idx, int thread, u64 id); int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, int fd); + int cpu_map_idx, int thread, int fd); void perf_evlist__reset_id_hash(struct perf_evlist *evlist); From c87e811cae7e1865046f64ce130e58f2af0a72c0 Mon Sep 17 00:00:00 2001 From: Markuss Broks Date: Fri, 1 Mar 2024 17:41:00 +0100 Subject: [PATCH 186/553] input/touchscreen: imagis: Correct the maximum touch area value [ Upstream commit 54a62ed17a705ef1ac80ebca2b62136b19243e19 ] As specified in downstream IST3038B driver and proved by testing, the correct maximum reported value of touch area is 16. Signed-off-by: Markuss Broks Signed-off-by: Karel Balej Link: https://lore.kernel.org/r/20240301164659.13240-2-karelb@gimli.ms.mff.cuni.cz Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/imagis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/imagis.c b/drivers/input/touchscreen/imagis.c index e2697e6c6d2a..b667914a44f1 100644 --- a/drivers/input/touchscreen/imagis.c +++ b/drivers/input/touchscreen/imagis.c @@ -210,7 +210,7 @@ static int imagis_init_input_dev(struct imagis_ts *ts) input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_X); input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_Y); - input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0); + input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, 16, 0, 0); touchscreen_parse_properties(input_dev, true, &ts->prop); if (!ts->prop.max_x || !ts->prop.max_y) { From 512a01da7134bac8f8b373506011e8aaa3283854 Mon Sep 17 00:00:00 2001 From: Roman Smirnov Date: Tue, 5 Mar 2024 16:45:09 +0300 Subject: [PATCH 187/553] block: prevent division by zero in blk_rq_stat_sum() [ Upstream commit 93f52fbeaf4b676b21acfe42a5152620e6770d02 ] The expression dst->nr_samples + src->nr_samples may have zero value on overflow. It is necessary to add a check to avoid division by zero. Found by Linux Verification Center (linuxtesting.org) with Svace. Signed-off-by: Roman Smirnov Reviewed-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20240305134509.23108-1-r.smirnov@omp.ru Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-stat.c b/block/blk-stat.c index da9407b7d4ab..41be89ecaf20 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -28,7 +28,7 @@ void blk_rq_stat_init(struct blk_rq_stat *stat) /* src is a per-cpu stat, mean isn't initialized */ void blk_rq_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src) { - if (!src->nr_samples) + if (dst->nr_samples + src->nr_samples <= dst->nr_samples) return; dst->min = min(dst->min, src->min); From 4097b1f10f044e97f71b960312d9894de0d0929b Mon Sep 17 00:00:00 2001 From: Manjunath Patil Date: Fri, 8 Mar 2024 22:33:23 -0800 Subject: [PATCH 188/553] RDMA/cm: add timeout to cm_destroy_id wait [ Upstream commit 96d9cbe2f2ff7abde021bac75eafaceabe9a51fa ] Add timeout to cm_destroy_id, so that userspace can trigger any data collection that would help in analyzing the cause of delay in destroying the cm_id. New noinline function helps dtrace/ebpf programs to hook on to it. Existing functionality isn't changed except triggering a probe-able new function at every timeout interval. We have seen cases where CM messages stuck with MAD layer (either due to software bug or faulty HCA), leading to cm_id getting stuck in the following call stack. This patch helps in resolving such issues faster. kernel: ... INFO: task XXXX:56778 blocked for more than 120 seconds. ... Call Trace: __schedule+0x2bc/0x895 schedule+0x36/0x7c schedule_timeout+0x1f6/0x31f ? __slab_free+0x19c/0x2ba wait_for_completion+0x12b/0x18a ? wake_up_q+0x80/0x73 cm_destroy_id+0x345/0x610 [ib_cm] ib_destroy_cm_id+0x10/0x20 [ib_cm] rdma_destroy_id+0xa8/0x300 [rdma_cm] ucma_destroy_id+0x13e/0x190 [rdma_ucm] ucma_write+0xe0/0x160 [rdma_ucm] __vfs_write+0x3a/0x16d vfs_write+0xb2/0x1a1 ? syscall_trace_enter+0x1ce/0x2b8 SyS_write+0x5c/0xd3 do_syscall_64+0x79/0x1b9 entry_SYSCALL_64_after_hwframe+0x16d/0x0 Signed-off-by: Manjunath Patil Link: https://lore.kernel.org/r/20240309063323.458102-1-manjunath.b.patil@oracle.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/core/cm.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index b7f902344289..462a10d6a576 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -34,6 +34,7 @@ MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("InfiniBand CM"); MODULE_LICENSE("Dual BSD/GPL"); +#define CM_DESTROY_ID_WAIT_TIMEOUT 10000 /* msecs */ static const char * const ibcm_rej_reason_strs[] = { [IB_CM_REJ_NO_QP] = "no QP", [IB_CM_REJ_NO_EEC] = "no EEC", @@ -1025,10 +1026,20 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) } } +static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id) +{ + struct cm_id_private *cm_id_priv; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + pr_err("%s: cm_id=%p timed out. state=%d refcnt=%d\n", __func__, + cm_id, cm_id->state, refcount_read(&cm_id_priv->refcount)); +} + static void cm_destroy_id(struct ib_cm_id *cm_id, int err) { struct cm_id_private *cm_id_priv; struct cm_work *work; + int ret; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irq(&cm_id_priv->lock); @@ -1135,7 +1146,14 @@ retest: xa_erase(&cm.local_id_table, cm_local_id(cm_id->local_id)); cm_deref_id(cm_id_priv); - wait_for_completion(&cm_id_priv->comp); + do { + ret = wait_for_completion_timeout(&cm_id_priv->comp, + msecs_to_jiffies( + CM_DESTROY_ID_WAIT_TIMEOUT)); + if (!ret) /* timeout happened */ + cm_destroy_id_wait_timeout(cm_id); + } while (!ret); + while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); From 055e406d7b0eed9ab874fb2e62ab8141ffb9bf38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duje=20Mihanovi=C4=87?= Date: Sat, 9 Mar 2024 21:18:05 -0800 Subject: [PATCH 189/553] Input: imagis - use FIELD_GET where applicable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c0ca3dbd03d66c6b9e044f48720e6ab5cef37ae5 ] Instead of manually extracting certain bits from registers with binary ANDs and shifts, the FIELD_GET macro can be used. With this in mind, the *_SHIFT macros can be dropped. Signed-off-by: Duje Mihanović Link: https://lore.kernel.org/r/20240306-b4-imagis-keys-v3-1-2c429afa8420@skole.hr Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/imagis.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/input/touchscreen/imagis.c b/drivers/input/touchscreen/imagis.c index b667914a44f1..2636e1c9435d 100644 --- a/drivers/input/touchscreen/imagis.c +++ b/drivers/input/touchscreen/imagis.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include #include #include @@ -23,12 +24,9 @@ #define IST3038C_I2C_RETRY_COUNT 3 #define IST3038C_MAX_FINGER_NUM 10 #define IST3038C_X_MASK GENMASK(23, 12) -#define IST3038C_X_SHIFT 12 #define IST3038C_Y_MASK GENMASK(11, 0) #define IST3038C_AREA_MASK GENMASK(27, 24) -#define IST3038C_AREA_SHIFT 24 #define IST3038C_FINGER_COUNT_MASK GENMASK(15, 12) -#define IST3038C_FINGER_COUNT_SHIFT 12 #define IST3038C_FINGER_STATUS_MASK GENMASK(9, 0) struct imagis_ts { @@ -92,8 +90,7 @@ static irqreturn_t imagis_interrupt(int irq, void *dev_id) goto out; } - finger_count = (intr_message & IST3038C_FINGER_COUNT_MASK) >> - IST3038C_FINGER_COUNT_SHIFT; + finger_count = FIELD_GET(IST3038C_FINGER_COUNT_MASK, intr_message); if (finger_count > IST3038C_MAX_FINGER_NUM) { dev_err(&ts->client->dev, "finger count %d is more than maximum supported\n", @@ -101,7 +98,7 @@ static irqreturn_t imagis_interrupt(int irq, void *dev_id) goto out; } - finger_pressed = intr_message & IST3038C_FINGER_STATUS_MASK; + finger_pressed = FIELD_GET(IST3038C_FINGER_STATUS_MASK, intr_message); for (i = 0; i < finger_count; i++) { error = imagis_i2c_read_reg(ts, @@ -118,12 +115,11 @@ static irqreturn_t imagis_interrupt(int irq, void *dev_id) input_mt_report_slot_state(ts->input_dev, MT_TOOL_FINGER, finger_pressed & BIT(i)); touchscreen_report_pos(ts->input_dev, &ts->prop, - (finger_status & IST3038C_X_MASK) >> - IST3038C_X_SHIFT, - finger_status & IST3038C_Y_MASK, 1); + FIELD_GET(IST3038C_X_MASK, finger_status), + FIELD_GET(IST3038C_Y_MASK, finger_status), + true); input_report_abs(ts->input_dev, ABS_MT_TOUCH_MAJOR, - (finger_status & IST3038C_AREA_MASK) >> - IST3038C_AREA_SHIFT); + FIELD_GET(IST3038C_AREA_MASK, finger_status)); } input_mt_sync_frame(ts->input_dev); From 70310e55b52922afa4d9dfa4d60ba35602828455 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Sun, 10 Mar 2024 12:31:41 +0100 Subject: [PATCH 190/553] Input: allocate keycode for Display refresh rate toggle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cfeb98b95fff25c442f78a6f616c627bc48a26b7 ] Newer Lenovo Yogas and Legions with 60Hz/90Hz displays send a wmi event when Fn + R is pressed. This is intended for use to switch between the two refresh rates. Allocate a new KEY_REFRESH_RATE_TOGGLE keycode for it. Signed-off-by: Gergo Koteles Acked-by: Dmitry Torokhov Link: https://lore.kernel.org/r/15a5d08c84cf4d7b820de34ebbcf8ae2502fb3ca.1710065750.git.soyer@irl.hu Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- include/uapi/linux/input-event-codes.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 7ad931a32970..1ce8a91349e9 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -602,6 +602,7 @@ #define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */ #define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */ +#define KEY_REFRESH_RATE_TOGGLE 0x232 /* Display refresh rate toggle */ #define KEY_BUTTONCONFIG 0x240 /* AL Button Configuration */ #define KEY_TASKMANAGER 0x241 /* AL Task/Project Manager */ From 44479c7fefc825f91b43ae024372e0511b105328 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alban=20Boy=C3=A9?= Date: Tue, 27 Feb 2024 22:40:17 +0000 Subject: [PATCH 191/553] platform/x86: touchscreen_dmi: Add an extra entry for a variant of the Chuwi Vi8 tablet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1266e2efb7512dbf20eac820ca2ed34de6b1c3e7 ] Signed-off-by: Alban Boyé Link: https://lore.kernel.org/r/20240227223919.11587-1-alban.boye@protonmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/x86/touchscreen_dmi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 11d72a353355..399b97b54dd0 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -1177,6 +1177,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_BIOS_VERSION, "CHUWI.D86JLBNR"), }, }, + { + /* Chuwi Vi8 dual-boot (CWI506) */ + .driver_data = (void *)&chuwi_vi8_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_MATCH(DMI_PRODUCT_NAME, "i86"), + DMI_MATCH(DMI_BIOS_VERSION, "CHUWI2.D86JHBNR02"), + }, + }, { /* Chuwi Vi8 Plus (CWI519) */ .driver_data = (void *)&chuwi_vi8_plus_data, From 39da708cb201d899fa4027af30f59987c2244624 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 29 Jan 2024 16:36:25 +0530 Subject: [PATCH 192/553] perf/x86/amd/lbr: Discard erroneous branch entries [ Upstream commit 29297ffffb0bf388778bd4b581a43cee6929ae65 ] The Revision Guide for AMD Family 19h Model 10-1Fh processors declares Erratum 1452 which states that non-branch entries may erroneously be recorded in the Last Branch Record (LBR) stack with the valid and spec bits set. Such entries can be recognized by inspecting bit 61 of the corresponding LastBranchStackToIp register. This bit is currently reserved but if found to be set, the associated branch entry should be discarded. Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Link: https://bugzilla.kernel.org/attachment.cgi?id=305518 Link: https://lore.kernel.org/r/3ad2aa305f7396d41a40e3f054f740d464b16b7f.1706526029.git.sandipan.das@amd.com Signed-off-by: Sasha Levin --- arch/x86/events/amd/lbr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c index b8fe74e8e0a6..48f4095f500d 100644 --- a/arch/x86/events/amd/lbr.c +++ b/arch/x86/events/amd/lbr.c @@ -173,9 +173,11 @@ void amd_pmu_lbr_read(void) /* * Check if a branch has been logged; if valid = 0, spec = 0 - * then no branch was recorded + * then no branch was recorded; if reserved = 1 then an + * erroneous branch was recorded (see Erratum 1452) */ - if (!entry.to.split.valid && !entry.to.split.spec) + if ((!entry.to.split.valid && !entry.to.split.spec) || + entry.to.split.reserved) continue; perf_clear_branch_entry_bitfields(br + out); From 5e7da5bb2d34b1f83d38fb69c5d044130a1136a0 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Fri, 15 Mar 2024 12:28:08 -0300 Subject: [PATCH 193/553] ktest: force $buildonly = 1 for 'make_warnings_file' test type [ Upstream commit 07283c1873a4d0eaa0e822536881bfdaea853910 ] The test type "make_warnings_file" should have no mandatory configuration parameters other than the ones required by the "build" test type, because its purpose is to create a file with build warnings that may or may not be used by other subsequent tests. Currently, the only way to use it as a stand-alone test is by setting POWER_CYCLE, CONSOLE, SSH_USER, BUILD_TARGET, TARGET_IMAGE, REBOOT_TYPE and GRUB_MENU. Link: https://lkml.kernel.org/r/20240315-ktest-v2-1-c5c20a75f6a3@marliere.net Cc: John Hawley Signed-off-by: Ricardo B. Marliere Signed-off-by: Steven Rostedt Signed-off-by: Sasha Levin --- tools/testing/ktest/ktest.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index e6c381498e63..449e45bd6966 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -836,6 +836,7 @@ sub set_value { if ($lvalue =~ /^(TEST|BISECT|CONFIG_BISECT)_TYPE(\[.*\])?$/ && $prvalue !~ /^(config_|)bisect$/ && $prvalue !~ /^build$/ && + $prvalue !~ /^make_warnings_file$/ && $buildonly) { # Note if a test is something other than build, then we From 1e9f5619d90866a6285139fc19291a93289a40cd Mon Sep 17 00:00:00 2001 From: linke li Date: Sat, 2 Mar 2024 12:42:21 +0800 Subject: [PATCH 194/553] ring-buffer: use READ_ONCE() to read cpu_buffer->commit_page in concurrent environment [ Upstream commit f1e30cb6369251c03f63c564006f96a54197dcc4 ] In function ring_buffer_iter_empty(), cpu_buffer->commit_page is read while other threads may change it. It may cause the time_stamp that read in the next line come from a different page. Use READ_ONCE() to avoid having to reason about compiler optimizations now and in future. Link: https://lore.kernel.org/linux-trace-kernel/tencent_DFF7D3561A0686B5E8FC079150A02505180A@qq.com Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Signed-off-by: linke li Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 431a922e5c89..d2947de3021a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4431,7 +4431,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter) cpu_buffer = iter->cpu_buffer; reader = cpu_buffer->reader_page; head_page = cpu_buffer->head_page; - commit_page = cpu_buffer->commit_page; + commit_page = READ_ONCE(cpu_buffer->commit_page); commit_ts = commit_page->page->time_stamp; /* From 63eaa43d5d5fd00777d5b849ada385a7047ae589 Mon Sep 17 00:00:00 2001 From: Petre Rodan Date: Mon, 8 Jan 2024 12:32:20 +0200 Subject: [PATCH 195/553] tools: iio: replace seekdir() in iio_generic_buffer [ Upstream commit 4e6500bfa053dc133021f9c144261b77b0ba7dc8 ] Replace seekdir() with rewinddir() in order to fix a localized glibc bug. One of the glibc patches that stable Gentoo is using causes an improper directory stream positioning bug on 32bit arm. That in turn ends up as a floating point exception in iio_generic_buffer. The attached patch provides a fix by using an equivalent function which should not cause trouble for other distros and is easier to reason about in general as it obviously always goes back to to the start. https://sourceware.org/bugzilla/show_bug.cgi?id=31212 Signed-off-by: Petre Rodan Link: https://lore.kernel.org/r/20240108103224.3986-1-petre.rodan@subdimension.ro Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- tools/iio/iio_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/iio/iio_utils.c b/tools/iio/iio_utils.c index 6a00a6eecaef..c5c5082cb24e 100644 --- a/tools/iio/iio_utils.c +++ b/tools/iio/iio_utils.c @@ -376,7 +376,7 @@ int build_channel_array(const char *device_dir, int buffer_idx, goto error_close_dir; } - seekdir(dp, 0); + rewinddir(dp); while (ent = readdir(dp), ent) { if (strcmp(ent->d_name + strlen(ent->d_name) - strlen("_en"), "_en") == 0) { From 7dbf082988d63b4d09c0fc90f616556eafa5448d Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 12 Jan 2024 11:08:00 -0700 Subject: [PATCH 196/553] bus: mhi: host: Add MHI_PM_SYS_ERR_FAIL state [ Upstream commit bce3f770684cc1d91ff9edab431b71ac991faf29 ] When processing a SYSERR, if the device does not respond to the MHI_RESET from the host, the host will be stuck in a difficult to recover state. The host will remain in MHI_PM_SYS_ERR_PROCESS and not clean up the host channels. Clients will not be notified of the SYSERR via the destruction of their channel devices, which means clients may think that the device is still up. Subsequent SYSERR events such as a device fatal error will not be processed as the state machine cannot transition from PROCESS back to DETECT. The only way to recover from this is to unload the mhi module (wipe the state machine state) or for the mhi controller to initiate SHUTDOWN. This issue was discovered by stress testing soc_reset events on AIC100 via the sysfs node. soc_reset is processed entirely in hardware. When the register write hits the endpoint hardware, it causes the soc to reset without firmware involvement. In stress testing, there is a rare race where soc_reset N will cause the soc to reset and PBL to signal SYSERR (fatal error). If soc_reset N+1 is triggered before PBL can process the MHI_RESET from the host, then the soc will reset again, and re-run PBL from the beginning. This will cause PBL to lose all state. PBL will be waiting for the host to respond to the new syserr, but host will be stuck expecting the previous MHI_RESET to be processed. Additionally, the AMSS EE firmware (QSM) was hacked to synthetically reproduce the issue by simulating a FW hang after the QSM issued a SYSERR. In this case, soc_reset would not recover the device. For this failure case, to recover the device, we need a state similar to PROCESS, but can transition to DETECT. There is not a viable existing state to use. POR has the needed transitions, but assumes the device is in a good state and could allow the host to attempt to use the device. Allowing PROCESS to transition to DETECT invites the possibility of parallel SYSERR processing which could get the host and device out of sync. Thus, invent a new state - MHI_PM_SYS_ERR_FAIL This essentially a holding state. It allows us to clean up the host elements that are based on the old state of the device (channels), but does not allow us to directly advance back to an operational state. It does allow the detection and processing of another SYSERR which may recover the device, or allows the controller to do a clean shutdown. Signed-off-by: Jeffrey Hugo Reviewed-by: Carl Vanderlip Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240112180800.536733-1-quic_jhugo@quicinc.com Signed-off-by: Manivannan Sadhasivam Signed-off-by: Sasha Levin --- drivers/bus/mhi/host/init.c | 1 + drivers/bus/mhi/host/internal.h | 9 ++++++--- drivers/bus/mhi/host/pm.c | 20 +++++++++++++++++--- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/bus/mhi/host/init.c b/drivers/bus/mhi/host/init.c index 04fbccff65ac..60c1df048fa2 100644 --- a/drivers/bus/mhi/host/init.c +++ b/drivers/bus/mhi/host/init.c @@ -62,6 +62,7 @@ static const char * const mhi_pm_state_str[] = { [MHI_PM_STATE_FW_DL_ERR] = "Firmware Download Error", [MHI_PM_STATE_SYS_ERR_DETECT] = "SYS ERROR Detect", [MHI_PM_STATE_SYS_ERR_PROCESS] = "SYS ERROR Process", + [MHI_PM_STATE_SYS_ERR_FAIL] = "SYS ERROR Failure", [MHI_PM_STATE_SHUTDOWN_PROCESS] = "SHUTDOWN Process", [MHI_PM_STATE_LD_ERR_FATAL_DETECT] = "Linkdown or Error Fatal Detect", }; diff --git a/drivers/bus/mhi/host/internal.h b/drivers/bus/mhi/host/internal.h index 01fd10a399b6..6abf09da4f61 100644 --- a/drivers/bus/mhi/host/internal.h +++ b/drivers/bus/mhi/host/internal.h @@ -88,6 +88,7 @@ enum mhi_pm_state { MHI_PM_STATE_FW_DL_ERR, MHI_PM_STATE_SYS_ERR_DETECT, MHI_PM_STATE_SYS_ERR_PROCESS, + MHI_PM_STATE_SYS_ERR_FAIL, MHI_PM_STATE_SHUTDOWN_PROCESS, MHI_PM_STATE_LD_ERR_FATAL_DETECT, MHI_PM_STATE_MAX @@ -104,14 +105,16 @@ enum mhi_pm_state { #define MHI_PM_FW_DL_ERR BIT(7) #define MHI_PM_SYS_ERR_DETECT BIT(8) #define MHI_PM_SYS_ERR_PROCESS BIT(9) -#define MHI_PM_SHUTDOWN_PROCESS BIT(10) +#define MHI_PM_SYS_ERR_FAIL BIT(10) +#define MHI_PM_SHUTDOWN_PROCESS BIT(11) /* link not accessible */ -#define MHI_PM_LD_ERR_FATAL_DETECT BIT(11) +#define MHI_PM_LD_ERR_FATAL_DETECT BIT(12) #define MHI_REG_ACCESS_VALID(pm_state) ((pm_state & (MHI_PM_POR | MHI_PM_M0 | \ MHI_PM_M2 | MHI_PM_M3_ENTER | MHI_PM_M3_EXIT | \ MHI_PM_SYS_ERR_DETECT | MHI_PM_SYS_ERR_PROCESS | \ - MHI_PM_SHUTDOWN_PROCESS | MHI_PM_FW_DL_ERR))) + MHI_PM_SYS_ERR_FAIL | MHI_PM_SHUTDOWN_PROCESS | \ + MHI_PM_FW_DL_ERR))) #define MHI_PM_IN_ERROR_STATE(pm_state) (pm_state >= MHI_PM_FW_DL_ERR) #define MHI_PM_IN_FATAL_STATE(pm_state) (pm_state == MHI_PM_LD_ERR_FATAL_DETECT) #define MHI_DB_ACCESS_VALID(mhi_cntrl) (mhi_cntrl->pm_state & mhi_cntrl->db_access) diff --git a/drivers/bus/mhi/host/pm.c b/drivers/bus/mhi/host/pm.c index 8a4362d75fc4..27f8a40f288c 100644 --- a/drivers/bus/mhi/host/pm.c +++ b/drivers/bus/mhi/host/pm.c @@ -36,7 +36,10 @@ * M0 <--> M0 * M0 -> FW_DL_ERR * M0 -> M3_ENTER -> M3 -> M3_EXIT --> M0 - * L1: SYS_ERR_DETECT -> SYS_ERR_PROCESS --> POR + * L1: SYS_ERR_DETECT -> SYS_ERR_PROCESS + * SYS_ERR_PROCESS -> SYS_ERR_FAIL + * SYS_ERR_FAIL -> SYS_ERR_DETECT + * SYS_ERR_PROCESS --> POR * L2: SHUTDOWN_PROCESS -> LD_ERR_FATAL_DETECT * SHUTDOWN_PROCESS -> DISABLE * L3: LD_ERR_FATAL_DETECT <--> LD_ERR_FATAL_DETECT @@ -93,7 +96,12 @@ static const struct mhi_pm_transitions dev_state_transitions[] = { }, { MHI_PM_SYS_ERR_PROCESS, - MHI_PM_POR | MHI_PM_SHUTDOWN_PROCESS | + MHI_PM_POR | MHI_PM_SYS_ERR_FAIL | MHI_PM_SHUTDOWN_PROCESS | + MHI_PM_LD_ERR_FATAL_DETECT + }, + { + MHI_PM_SYS_ERR_FAIL, + MHI_PM_SYS_ERR_DETECT | MHI_PM_SHUTDOWN_PROCESS | MHI_PM_LD_ERR_FATAL_DETECT }, /* L2 States */ @@ -624,7 +632,13 @@ static void mhi_pm_sys_error_transition(struct mhi_controller *mhi_cntrl) !in_reset, timeout); if (!ret || in_reset) { dev_err(dev, "Device failed to exit MHI Reset state\n"); - goto exit_sys_error_transition; + write_lock_irq(&mhi_cntrl->pm_lock); + cur_state = mhi_tryset_pm_state(mhi_cntrl, + MHI_PM_SYS_ERR_FAIL); + write_unlock_irq(&mhi_cntrl->pm_lock); + /* Shutdown may have occurred, otherwise cleanup now */ + if (cur_state != MHI_PM_SYS_ERR_FAIL) + goto exit_sys_error_transition; } /* From 893b2562613f8e0b8141e15aa2f3791ebdcbd1e8 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Wed, 14 Feb 2024 00:37:55 +0100 Subject: [PATCH 197/553] usb: gadget: uvc: mark incomplete frames with UVC_STREAM_ERR [ Upstream commit 2a3b7af120477d0571b815ccb8600cafd5ebf02f ] If an frame was transmitted incomplete to the host, we set the UVC_STREAM_ERR bit in the header for the last request that is going to be queued. This way the host will know that it should drop the frame instead of trying to display the corrupted content. Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20240214-uvc-error-tag-v1-2-37659a3877fe@pengutronix.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/uvc_video.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index e81865978299..be48d5ab17c7 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -35,6 +35,9 @@ uvc_video_encode_header(struct uvc_video *video, struct uvc_buffer *buf, data[1] = UVC_STREAM_EOH | video->fid; + if (video->queue.flags & UVC_QUEUE_DROP_INCOMPLETE) + data[1] |= UVC_STREAM_ERR; + if (video->queue.buf_used == 0 && ts.tv_sec) { /* dwClockFrequency is 48 MHz */ u32 pts = ((u64)ts.tv_sec * USEC_PER_SEC + ts.tv_nsec / NSEC_PER_USEC) * 48; From f692c547e3f2d5644499f9aca9dd4696306e72c0 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 26 Jan 2024 15:55:55 +0200 Subject: [PATCH 198/553] thunderbolt: Keep the domain powered when USB4 port is in redrive mode [ Upstream commit a75e0684efe567ae5f6a8e91a8360c4c1773cf3a ] If a DiplayPort cable is directly connected to the host routers USB4 port, there is no tunnel involved but the port is in "redrive" mode meaning that it is re-driving the DisplayPort signals from its DisplayPort source. In this case we need to keep the domain powered on otherwise once the domain enters D3cold the connected monitor blanks too. Since this happens only on Intel Barlow Ridge add a quirk that takes runtime PM reference if we detect that the USB4 port entered redrive mode (and release it once it exits the mode). Signed-off-by: Mika Westerberg Signed-off-by: Sasha Levin --- drivers/thunderbolt/quirks.c | 14 +++++++++++ drivers/thunderbolt/tb.c | 49 +++++++++++++++++++++++++++++++++++- drivers/thunderbolt/tb.h | 4 +++ 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/drivers/thunderbolt/quirks.c b/drivers/thunderbolt/quirks.c index 4ab3803e10c8..638cb5fb22c1 100644 --- a/drivers/thunderbolt/quirks.c +++ b/drivers/thunderbolt/quirks.c @@ -42,6 +42,12 @@ static void quirk_usb3_maximum_bandwidth(struct tb_switch *sw) } } +static void quirk_block_rpm_in_redrive(struct tb_switch *sw) +{ + sw->quirks |= QUIRK_KEEP_POWER_IN_DP_REDRIVE; + tb_sw_dbg(sw, "preventing runtime PM in DP redrive mode\n"); +} + struct tb_quirk { u16 hw_vendor_id; u16 hw_device_id; @@ -85,6 +91,14 @@ static const struct tb_quirk tb_quirks[] = { quirk_usb3_maximum_bandwidth }, { 0x8087, PCI_DEVICE_ID_INTEL_BARLOW_RIDGE_HUB_40G_BRIDGE, 0x0000, 0x0000, quirk_usb3_maximum_bandwidth }, + /* + * Block Runtime PM in DP redrive mode for Intel Barlow Ridge host + * controllers. + */ + { 0x8087, PCI_DEVICE_ID_INTEL_BARLOW_RIDGE_HOST_80G_NHI, 0x0000, 0x0000, + quirk_block_rpm_in_redrive }, + { 0x8087, PCI_DEVICE_ID_INTEL_BARLOW_RIDGE_HOST_40G_NHI, 0x0000, 0x0000, + quirk_block_rpm_in_redrive }, /* * CLx is not supported on AMD USB4 Yellow Carp and Pink Sardine platforms. */ diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index e1eb092ad1d6..e83269dc2b06 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -1050,6 +1050,49 @@ err_rpm_put: pm_runtime_put_autosuspend(&in->sw->dev); } +static void tb_enter_redrive(struct tb_port *port) +{ + struct tb_switch *sw = port->sw; + + if (!(sw->quirks & QUIRK_KEEP_POWER_IN_DP_REDRIVE)) + return; + + /* + * If we get hot-unplug for the DP IN port of the host router + * and the DP resource is not available anymore it means there + * is a monitor connected directly to the Type-C port and we are + * in "redrive" mode. For this to work we cannot enter RTD3 so + * we bump up the runtime PM reference count here. + */ + if (!tb_port_is_dpin(port)) + return; + if (tb_route(sw)) + return; + if (!tb_switch_query_dp_resource(sw, port)) { + port->redrive = true; + pm_runtime_get(&sw->dev); + tb_port_dbg(port, "enter redrive mode, keeping powered\n"); + } +} + +static void tb_exit_redrive(struct tb_port *port) +{ + struct tb_switch *sw = port->sw; + + if (!(sw->quirks & QUIRK_KEEP_POWER_IN_DP_REDRIVE)) + return; + + if (!tb_port_is_dpin(port)) + return; + if (tb_route(sw)) + return; + if (port->redrive && tb_switch_query_dp_resource(sw, port)) { + port->redrive = false; + pm_runtime_put(&sw->dev); + tb_port_dbg(port, "exit redrive mode\n"); + } +} + static void tb_dp_resource_unavailable(struct tb *tb, struct tb_port *port) { struct tb_port *in, *out; @@ -1066,7 +1109,10 @@ static void tb_dp_resource_unavailable(struct tb *tb, struct tb_port *port) } tunnel = tb_find_tunnel(tb, TB_TUNNEL_DP, in, out); - tb_deactivate_and_free_tunnel(tunnel); + if (tunnel) + tb_deactivate_and_free_tunnel(tunnel); + else + tb_enter_redrive(port); list_del_init(&port->list); /* @@ -1092,6 +1138,7 @@ static void tb_dp_resource_available(struct tb *tb, struct tb_port *port) tb_port_dbg(port, "DP %s resource available\n", tb_port_is_dpin(port) ? "IN" : "OUT"); list_add_tail(&port->list, &tcm->dp_resources); + tb_exit_redrive(port); /* Look for suitable DP IN <-> DP OUT pairs now */ tb_tunnel_dp(tb); diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index f79cae48a8ea..b3fec5f8e20c 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -27,6 +27,8 @@ #define QUIRK_FORCE_POWER_LINK_CONTROLLER BIT(0) /* Disable CLx if not supported */ #define QUIRK_NO_CLX BIT(1) +/* Need to keep power on while USB4 port is in redrive mode */ +#define QUIRK_KEEP_POWER_IN_DP_REDRIVE BIT(2) /** * struct tb_nvm - Structure holding NVM information @@ -254,6 +256,7 @@ struct tb_switch { * DMA paths through this port. * @max_bw: Maximum possible bandwidth through this adapter if set to * non-zero. + * @redrive: For DP IN, if true the adapter is in redrive mode. * * In USB4 terminology this structure represents an adapter (protocol or * lane adapter). @@ -280,6 +283,7 @@ struct tb_port { unsigned int ctl_credits; unsigned int dma_credits; unsigned int max_bw; + bool redrive; }; /** From 57e6634e591b6e51cbb59957d24630af18b3869f Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Thu, 22 Feb 2024 22:09:01 +0100 Subject: [PATCH 199/553] usb: typec: tcpci: add generic tcpci fallback compatible [ Upstream commit 8774ea7a553e2aec323170d49365b59af0a2b7e0 ] The driver already support the tcpci binding for the i2c_device_id so add the support for the of_device_id too. Signed-off-by: Marco Felsch Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240222210903.208901-3-m.felsch@pengutronix.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/typec/tcpm/tcpci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index 816945913ed0..f649769912e5 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -875,6 +875,7 @@ MODULE_DEVICE_TABLE(i2c, tcpci_id); #ifdef CONFIG_OF static const struct of_device_id tcpci_of_match[] = { { .compatible = "nxp,ptn5110", }, + { .compatible = "tcpci", }, {}, }; MODULE_DEVICE_TABLE(of, tcpci_of_match); From 409289d0a15166671dfa3f452ee87a00db8f08db Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Mar 2024 11:13:51 +0000 Subject: [PATCH 200/553] usb: sl811-hcd: only defined function checkdone if QUIRK2 is defined [ Upstream commit 12f371e2b6cb4b79c788f1f073992e115f4ca918 ] Function checkdone is only required if QUIRK2 is defined, so add appropriate #if / #endif around the function. Cleans up clang scan build warning: drivers/usb/host/sl811-hcd.c:588:18: warning: unused function 'checkdone' [-Wunused-function] Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20240307111351.1982382-1-colin.i.king@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/sl811-hcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c index b8b90eec9107..48478eb71211 100644 --- a/drivers/usb/host/sl811-hcd.c +++ b/drivers/usb/host/sl811-hcd.c @@ -585,6 +585,7 @@ done(struct sl811 *sl811, struct sl811h_ep *ep, u8 bank) finish_request(sl811, ep, urb, urbstat); } +#ifdef QUIRK2 static inline u8 checkdone(struct sl811 *sl811) { u8 ctl; @@ -616,6 +617,7 @@ static inline u8 checkdone(struct sl811 *sl811) #endif return irqstat; } +#endif static irqreturn_t sl811h_irq(struct usb_hcd *hcd) { From 103c0f946f9a173b40ec94788480fb667c153304 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 25 Jan 2024 13:11:16 +0100 Subject: [PATCH 201/553] thermal/of: Assume polling-delay(-passive) 0 when absent [ Upstream commit 488164006a281986d95abbc4b26e340c19c4c85b ] Currently, thermal zones associated with providers that have interrupts for signaling hot/critical trips are required to set a polling-delay of 0 to indicate no polling. This feels a bit backwards. Change the code such that "no polling delay" also means "no polling". Suggested-by: Bjorn Andersson Signed-off-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Reviewed-by: Bjorn Andersson Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20240125-topic-thermal-v1-2-3c9d4dced138@linaro.org Signed-off-by: Sasha Levin --- drivers/thermal/thermal_of.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index 4104743dbc17..202dce0d2e30 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -337,14 +337,18 @@ static int thermal_of_monitor_init(struct device_node *np, int *delay, int *pdel int ret; ret = of_property_read_u32(np, "polling-delay-passive", pdelay); - if (ret < 0) { - pr_err("%pOFn: missing polling-delay-passive property\n", np); + if (ret == -EINVAL) { + *pdelay = 0; + } else if (ret < 0) { + pr_err("%pOFn: Couldn't get polling-delay-passive: %d\n", np, ret); return ret; } ret = of_property_read_u32(np, "polling-delay", delay); - if (ret < 0) { - pr_err("%pOFn: missing polling-delay property\n", np); + if (ret == -EINVAL) { + *delay = 0; + } else if (ret < 0) { + pr_err("%pOFn: Couldn't get polling-delay: %d\n", np, ret); return ret; } From 8b849265dad8fcc5e1b5945a0aca088ff2791852 Mon Sep 17 00:00:00 2001 From: Chancel Liu Date: Tue, 5 Mar 2024 15:56:06 +0900 Subject: [PATCH 202/553] ASoC: soc-core.c: Skip dummy codec when adding platforms [ Upstream commit 23fb6bc2696119391ec3a92ccaffe50e567c515e ] When pcm_runtime is adding platform components it will scan all registered components. In case of DPCM FE/BE some DAI links will configure dummy platform. However both dummy codec and dummy platform are using "snd-soc-dummy" as component->name. Dummy codec should be skipped when adding platforms otherwise there'll be overflow and UBSAN complains. Reported-by: Zhipeng Wang Signed-off-by: Chancel Liu Link: https://msgid.link/r/20240305065606.3778642-1-chancel.liu@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index a409fbed8f34..6a4101dc15a5 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1020,6 +1020,9 @@ int snd_soc_add_pcm_runtime(struct snd_soc_card *card, if (!snd_soc_is_matching_component(platform, component)) continue; + if (snd_soc_component_is_dummy(component) && component->num_dai) + continue; + snd_soc_rtd_add_component(rtd, component); } } From f8a7b7b085df99dc870df525d6806f035cc91ad1 Mon Sep 17 00:00:00 2001 From: Aleksandr Burakov Date: Fri, 1 Mar 2024 14:35:43 +0300 Subject: [PATCH 203/553] fbdev: viafb: fix typo in hw_bitblt_1 and hw_bitblt_2 [ Upstream commit bc87bb342f106a0402186bcb588fcbe945dced4b ] There are some actions with value 'tmp' but 'dst_addr' is checked instead. It is obvious that a copy-paste error was made here and the value of variable 'tmp' should be checked here. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Aleksandr Burakov Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/via/accel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/via/accel.c b/drivers/video/fbdev/via/accel.c index 0a1bc7a4d785..1e04026f0809 100644 --- a/drivers/video/fbdev/via/accel.c +++ b/drivers/video/fbdev/via/accel.c @@ -115,7 +115,7 @@ static int hw_bitblt_1(void __iomem *engine, u8 op, u32 width, u32 height, if (op != VIA_BITBLT_FILL) { tmp = src_mem ? 0 : src_addr; - if (dst_addr & 0xE0000007) { + if (tmp & 0xE0000007) { printk(KERN_WARNING "hw_bitblt_1: Unsupported source " "address %X\n", tmp); return -EINVAL; @@ -260,7 +260,7 @@ static int hw_bitblt_2(void __iomem *engine, u8 op, u32 width, u32 height, writel(tmp, engine + 0x18); tmp = src_mem ? 0 : src_addr; - if (dst_addr & 0xE0000007) { + if (tmp & 0xE0000007) { printk(KERN_WARNING "hw_bitblt_2: Unsupported source " "address %X\n", tmp); return -EINVAL; From 5245a6da27ef79f8dba98dad5542ebe56d311837 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 16 Mar 2024 09:51:40 -0600 Subject: [PATCH 204/553] io_uring: clear opcode specific data for an early failure [ Upstream commit e21e1c45e1fe2e31732f40256b49c04e76a17cee ] If failure happens before the opcode prep handler is called, ensure that we clear the opcode specific area of the request, which holds data specific to that request type. This prevents errors where opcode handlers either don't get to clear per-request private data since prep isn't even called. Reported-and-tested-by: syzbot+f8e9a371388aa62ecab4@syzkaller.appspotmail.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/io_uring.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 415248c1f82c..68f1b6f8699a 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1978,6 +1978,13 @@ static void io_init_req_drain(struct io_kiocb *req) } } +static __cold int io_init_fail_req(struct io_kiocb *req, int err) +{ + /* ensure per-opcode data is cleared if we fail before prep */ + memset(&req->cmd.data, 0, sizeof(req->cmd.data)); + return err; +} + static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, const struct io_uring_sqe *sqe) __must_hold(&ctx->uring_lock) @@ -1998,29 +2005,29 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (unlikely(opcode >= IORING_OP_LAST)) { req->opcode = 0; - return -EINVAL; + return io_init_fail_req(req, -EINVAL); } def = &io_op_defs[opcode]; if (unlikely(sqe_flags & ~SQE_COMMON_FLAGS)) { /* enforce forwards compatibility on users */ if (sqe_flags & ~SQE_VALID_FLAGS) - return -EINVAL; + return io_init_fail_req(req, -EINVAL); if (sqe_flags & IOSQE_BUFFER_SELECT) { if (!def->buffer_select) - return -EOPNOTSUPP; + return io_init_fail_req(req, -EOPNOTSUPP); req->buf_index = READ_ONCE(sqe->buf_group); } if (sqe_flags & IOSQE_CQE_SKIP_SUCCESS) ctx->drain_disabled = true; if (sqe_flags & IOSQE_IO_DRAIN) { if (ctx->drain_disabled) - return -EOPNOTSUPP; + return io_init_fail_req(req, -EOPNOTSUPP); io_init_req_drain(req); } } if (unlikely(ctx->restricted || ctx->drain_active || ctx->drain_next)) { if (ctx->restricted && !io_check_restriction(ctx, req, sqe_flags)) - return -EACCES; + return io_init_fail_req(req, -EACCES); /* knock it to the slow queue path, will be drained there */ if (ctx->drain_active) req->flags |= REQ_F_FORCE_ASYNC; @@ -2033,9 +2040,9 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, } if (!def->ioprio && sqe->ioprio) - return -EINVAL; + return io_init_fail_req(req, -EINVAL); if (!def->iopoll && (ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; + return io_init_fail_req(req, -EINVAL); if (def->needs_file) { struct io_submit_state *state = &ctx->submit_state; @@ -2059,12 +2066,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->creds = xa_load(&ctx->personalities, personality); if (!req->creds) - return -EINVAL; + return io_init_fail_req(req, -EINVAL); get_cred(req->creds); ret = security_uring_override_creds(req->creds); if (ret) { put_cred(req->creds); - return ret; + return io_init_fail_req(req, ret); } req->flags |= REQ_F_CREDS; } From 30044c66bc1a8521bc96944a42bf5ff4e517b246 Mon Sep 17 00:00:00 2001 From: "Jiawei Fu (iBug)" Date: Sat, 16 Mar 2024 03:27:49 +0800 Subject: [PATCH 205/553] drivers/nvme: Add quirks for device 126f:2262 [ Upstream commit e89086c43f0500bc7c4ce225495b73b8ce234c1f ] This commit adds NVME_QUIRK_NO_DEEPEST_PS and NVME_QUIRK_BOGUS_NID for device [126f:2262], which appears to be a generic VID:PID pair used for many SSDs based on the Silicon Motion SM2262/SM2262EN controller. Two of my SSDs with this VID:PID pair exhibit the same behavior: * They frequently have trouble exiting the deepest power state (5), resulting in the entire disk unresponsive. Verified by setting nvme_core.default_ps_max_latency_us=10000 and observing them behaving normally. * They produce all-zero nguid and eui64 with `nvme id-ns` command. The offending products are: * HP SSD EX950 1TB * HIKVISION C2000Pro 2TB Signed-off-by: Jiawei Fu Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3d01290994d8..5ff09f2cacab 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3471,6 +3471,9 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_BOGUS_NID, }, { PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x126f, 0x2262), /* Silicon Motion generic */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | NVME_QUIRK_BOGUS_NID, }, From 664206ff8b019bcd1e55b10b2eea3add8761b971 Mon Sep 17 00:00:00 2001 From: Roman Smirnov Date: Tue, 19 Mar 2024 11:13:44 +0300 Subject: [PATCH 206/553] fbmon: prevent division by zero in fb_videomode_from_videomode() [ Upstream commit c2d953276b8b27459baed1277a4fdd5dd9bd4126 ] The expression htotal * vtotal can have a zero value on overflow. It is necessary to prevent division by zero like in fb_var_to_videomode(). Found by Linux Verification Center (linuxtesting.org) with Svace. Signed-off-by: Roman Smirnov Reviewed-by: Sergey Shtylyov Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/core/fbmon.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c index b0e690f41025..9ca99da3a56a 100644 --- a/drivers/video/fbdev/core/fbmon.c +++ b/drivers/video/fbdev/core/fbmon.c @@ -1311,7 +1311,7 @@ int fb_get_mode(int flags, u32 val, struct fb_var_screeninfo *var, struct fb_inf int fb_videomode_from_videomode(const struct videomode *vm, struct fb_videomode *fbmode) { - unsigned int htotal, vtotal; + unsigned int htotal, vtotal, total; fbmode->xres = vm->hactive; fbmode->left_margin = vm->hback_porch; @@ -1344,8 +1344,9 @@ int fb_videomode_from_videomode(const struct videomode *vm, vtotal = vm->vactive + vm->vfront_porch + vm->vback_porch + vm->vsync_len; /* prevent division by zero */ - if (htotal && vtotal) { - fbmode->refresh = vm->pixelclock / (htotal * vtotal); + total = htotal * vtotal; + if (total) { + fbmode->refresh = vm->pixelclock / total; /* a mode must have htotal and vtotal != 0 or it is invalid */ } else { fbmode->refresh = 0; From fcd1993a2937604d43ce134b9b9022cb76136610 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 8 Apr 2024 23:18:32 +0200 Subject: [PATCH 207/553] netfilter: nf_tables: release batch on table validation from abort path commit a45e6889575c2067d3c0212b6bc1022891e65b91 upstream. Unlike early commit path stage which triggers a call to abort, an explicit release of the batch is required on abort, otherwise mutex is released and commit_list remains in place. Add WARN_ON_ONCE to ensure commit_list is empty from the abort path before releasing the mutex. After this patch, commit_list is always assumed to be empty before grabbing the mutex, therefore 03c1f1ef1584 ("netfilter: Cleanup nft_net->module_list from nf_tables_exit_net()") only needs to release the pending modules for registration. Cc: stable@vger.kernel.org Fixes: c0391b6ab810 ("netfilter: nf_tables: missing validation from the abort path") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8d38cd504769..6b032a90e2b1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9902,10 +9902,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) struct nft_trans *trans, *next; LIST_HEAD(set_update_list); struct nft_trans_elem *te; + int err = 0; if (action == NFNL_ABORT_VALIDATE && nf_tables_validate(net) < 0) - return -EAGAIN; + err = -EAGAIN; list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { @@ -10081,7 +10082,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) else nf_tables_module_autoload_cleanup(net); - return 0; + return err; } static int nf_tables_abort(struct net *net, struct sk_buff *skb, @@ -10095,6 +10096,8 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, ret = __nf_tables_abort(net, action); nft_gc_seq_end(nft_net, gc_seq); + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + mutex_unlock(&nft_net->commit_mutex); return ret; @@ -10892,9 +10895,10 @@ static void __net_exit nf_tables_exit_net(struct net *net) gc_seq = nft_gc_seq_begin(nft_net); - if (!list_empty(&nft_net->commit_list) || - !list_empty(&nft_net->module_list)) - __nf_tables_abort(net, NFNL_ABORT_NONE); + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + + if (!list_empty(&nft_net->module_list)) + nf_tables_module_autoload_cleanup(net); __nft_release_tables(net); From 8d3a58af50e46167b6f1db47adadad03c0045dae Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 8 Apr 2024 23:18:33 +0200 Subject: [PATCH 208/553] netfilter: nf_tables: release mutex after nft_gc_seq_end from abort path commit 0d459e2ffb541841714839e8228b845458ed3b27 upstream. The commit mutex should not be released during the critical section between nft_gc_seq_begin() and nft_gc_seq_end(), otherwise, async GC worker could collect expired objects and get the released commit lock within the same GC sequence. nf_tables_module_autoload() temporarily releases the mutex to load module dependencies, then it goes back to replay the transaction again. Move it at the end of the abort phase after nft_gc_seq_end() is called. Cc: stable@vger.kernel.org Fixes: 720344340fb9 ("netfilter: nf_tables: GC transaction race with abort path") Reported-by: Kuan-Ting Chen Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6b032a90e2b1..e7b31c2c92df 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10077,11 +10077,6 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nf_tables_abort_release(trans); } - if (action == NFNL_ABORT_AUTOLOAD) - nf_tables_module_autoload(net); - else - nf_tables_module_autoload_cleanup(net); - return err; } @@ -10098,6 +10093,14 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + /* module autoload needs to happen after GC sequence update because it + * temporarily releases and grabs mutex again. + */ + if (action == NFNL_ABORT_AUTOLOAD) + nf_tables_module_autoload(net); + else + nf_tables_module_autoload_cleanup(net); + mutex_unlock(&nft_net->commit_mutex); return ret; From 2aeb805a1bcd5f27c8c0d1a9d4d653f16d1506f4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 8 Apr 2024 23:18:34 +0200 Subject: [PATCH 209/553] netfilter: nf_tables: discard table flag update with pending basechain deletion commit 1bc83a019bbe268be3526406245ec28c2458a518 upstream. Hook unregistration is deferred to the commit phase, same occurs with hook updates triggered by the table dormant flag. When both commands are combined, this results in deleting a basechain while leaving its hook still registered in the core. Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e7b31c2c92df..8152a69d8268 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1192,6 +1192,24 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table) #define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ __NFT_TABLE_F_WAS_AWAKEN) +static bool nft_table_pending_update(const struct nft_ctx *ctx) +{ + struct nftables_pernet *nft_net = nft_pernet(ctx->net); + struct nft_trans *trans; + + if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + return true; + + list_for_each_entry(trans, &nft_net->commit_list, list) { + if (trans->ctx.table == ctx->table && + trans->msg_type == NFT_MSG_DELCHAIN && + nft_is_base_chain(trans->ctx.chain)) + return true; + } + + return false; +} + static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; @@ -1215,7 +1233,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) return -EOPNOTSUPP; /* No dormant off/on/off/on games in single transaction */ - if (ctx->table->flags & __NFT_TABLE_F_UPDATE) + if (nft_table_pending_update(ctx)) return -EINVAL; trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, From 2b85977977cbd120591b23c2450e90a5806a7167 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 31 Jul 2023 15:59:42 -0300 Subject: [PATCH 210/553] tty: n_gsm: require CAP_NET_ADMIN to attach N_GSM0710 ldisc commit 67c37756898a5a6b2941a13ae7260c89b54e0d88 upstream. Any unprivileged user can attach N_GSM0710 ldisc, but it requires CAP_NET_ADMIN to create a GSM network anyway. Require initial namespace CAP_NET_ADMIN to do that. Signed-off-by: Thadeu Lima de Souza Cascardo Link: https://lore.kernel.org/r/20230731185942.279611-1-cascardo@canonical.com Cc: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 6b6abce6b69f..d2daf0a72e34 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2969,6 +2969,9 @@ static int gsmld_open(struct tty_struct *tty) { struct gsm_mux *gsm; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (tty->ops->write == NULL) return -EINVAL; From cf0650adb62574b1d5ada471c34aff086e554db3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 28 Mar 2024 07:42:57 +0100 Subject: [PATCH 211/553] gcc-plugins/stackleak: Avoid .head.text section commit e7d24c0aa8e678f41457d1304e2091cac6fd1a2e upstream. The .head.text section carries the startup code that runs with the MMU off or with a translation of memory that deviates from the ordinary one. So avoid instrumentation with the stackleak plugin, which already avoids .init.text and .noinstr.text entirely. Fixes: 48204aba801f1b51 ("x86/sme: Move early SME kernel encryption handling into .head.text") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202403221630.2692c998-oliver.sang@intel.com Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20240328064256.2358634-2-ardb+git@google.com Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- scripts/gcc-plugins/stackleak_plugin.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c index c5c2ce113c92..d20c47d21ad8 100644 --- a/scripts/gcc-plugins/stackleak_plugin.c +++ b/scripts/gcc-plugins/stackleak_plugin.c @@ -467,6 +467,8 @@ static bool stackleak_gate(void) return false; if (STRING_EQUAL(section, ".entry.text")) return false; + if (STRING_EQUAL(section, ".head.text")) + return false; } return track_frame_size >= 0; From f1465ff4c83c0544fd2c6333523301f3484184a7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 11 Apr 2024 09:24:48 +0200 Subject: [PATCH 212/553] Revert "scsi: sd: usb_storage: uas: Access media prior to querying device properties" This reverts commit b73dd5f9997279715cd450ee8ca599aaff2eabb9 which is commit 321da3dc1f3c92a12e3c5da934090d2992a8814c upstream. It is known to cause problems and has asked to be dropped. Link: https://lore.kernel.org/r/yq1frvvpymp.fsf@ca-mkp.ca.oracle.com Cc: Tasos Sahanidis Cc: Ewan D. Milne Cc: Bart Van Assche Cc: Tasos Sahanidis Cc: Martin K. Petersen Cc: James Bottomley Cc: Sasha Levin Reported-by: John David Anglin Reported-by: Cyril Brulebois Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 26 +------------------------- drivers/usb/storage/scsiglue.c | 7 ------- drivers/usb/storage/uas.c | 7 ------- include/scsi/scsi_device.h | 1 - 4 files changed, 1 insertion(+), 40 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index f32236c3f81c..ad619f7c7418 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3286,24 +3286,6 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, return true; } -static void sd_read_block_zero(struct scsi_disk *sdkp) -{ - unsigned int buf_len = sdkp->device->sector_size; - char *buffer, cmd[10] = { }; - - buffer = kmalloc(buf_len, GFP_KERNEL); - if (!buffer) - return; - - cmd[0] = READ_10; - put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ - put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ - - scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, - SD_TIMEOUT, sdkp->max_retries, NULL); - kfree(buffer); -} - /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -3343,13 +3325,7 @@ static int sd_revalidate_disk(struct gendisk *disk) */ if (sdkp->media_present) { sd_read_capacity(sdkp, buffer); - /* - * Some USB/UAS devices return generic values for mode pages - * until the media has been accessed. Trigger a READ operation - * to force the device to populate mode pages. - */ - if (sdp->read_before_ms) - sd_read_block_zero(sdkp); + /* * set the default to rotational. All non-rotational devices * support the block characteristics VPD page, which will diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 12cf9940e5b6..c54e9805da53 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -179,13 +179,6 @@ static int slave_configure(struct scsi_device *sdev) */ sdev->use_192_bytes_for_3f = 1; - /* - * Some devices report generic values until the media has been - * accessed. Force a READ(10) prior to querying device - * characteristics. - */ - sdev->read_before_ms = 1; - /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index af619efe8eab..ee5621bdb11e 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -876,13 +876,6 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_CAPACITY_HEURISTICS) sdev->guess_capacity = 1; - /* - * Some devices report generic values until the media has been - * accessed. Force a READ(10) prior to querying device - * characteristics. - */ - sdev->read_before_ms = 1; - /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index a64713fe5264..b407807cc669 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -204,7 +204,6 @@ struct scsi_device { unsigned use_10_for_rw:1; /* first try 10-byte read / write */ unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */ - unsigned read_before_ms:1; /* perform a READ before MODE SENSE */ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ unsigned no_write_same:1; /* no WRITE SAME command */ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ From 345b6b831980964b607db53cfd681abd2234a1b7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 11 Apr 2024 09:26:49 +0200 Subject: [PATCH 213/553] Revert "scsi: core: Add struct for args to execution functions" This reverts commit cf33e6ca12d814e1be2263cb76960d0019d7fb94 which is commit d0949565811f0896c1c7e781ab2ad99d34273fdf upstream. It is known to cause problems and has asked to be dropped. Link: https://lore.kernel.org/r/yq1frvvpymp.fsf@ca-mkp.ca.oracle.com Cc: Tasos Sahanidis Cc: Ewan D. Milne Cc: Bart Van Assche Cc: Tasos Sahanidis Cc: Martin K. Petersen Cc: James Bottomley Cc: Sasha Levin Reported-by: John David Anglin Reported-by: Cyril Brulebois Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_lib.c | 52 ++++++++++++++++++++------------------ include/scsi/scsi_device.h | 51 ++++++++++--------------------------- 2 files changed, 41 insertions(+), 62 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index edd296f950a3..5c5954b78585 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -185,37 +185,39 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) __scsi_queue_insert(cmd, reason, true); } + /** - * scsi_execute_cmd - insert request and wait for the result - * @sdev: scsi_device + * __scsi_execute - insert request and wait for the result + * @sdev: scsi device * @cmd: scsi command - * @opf: block layer request cmd_flags + * @data_direction: data direction * @buffer: data buffer * @bufflen: len of buffer + * @sense: optional sense buffer + * @sshdr: optional decoded sense header * @timeout: request timeout in HZ * @retries: number of times to retry request - * @args: Optional args. See struct definition for field descriptions + * @flags: flags for ->cmd_flags + * @rq_flags: flags for ->rq_flags + * @resid: optional residual length * * Returns the scsi_cmnd result field if a command was executed, or a negative * Linux error code if we didn't get that far. */ -int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, - blk_opf_t opf, void *buffer, unsigned int bufflen, - int timeout, int retries, - const struct scsi_exec_args *args) +int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, + int data_direction, void *buffer, unsigned bufflen, + unsigned char *sense, struct scsi_sense_hdr *sshdr, + int timeout, int retries, blk_opf_t flags, + req_flags_t rq_flags, int *resid) { - static const struct scsi_exec_args default_args; struct request *req; struct scsi_cmnd *scmd; int ret; - if (!args) - args = &default_args; - else if (WARN_ON_ONCE(args->sense && - args->sense_len != SCSI_SENSE_BUFFERSIZE)) - return -EINVAL; - - req = scsi_alloc_request(sdev->request_queue, opf, args->req_flags); + req = scsi_alloc_request(sdev->request_queue, + data_direction == DMA_TO_DEVICE ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, + rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -230,7 +232,8 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, memcpy(scmd->cmnd, cmd, scmd->cmd_len); scmd->allowed = retries; req->timeout = timeout; - req->rq_flags |= RQF_QUIET; + req->cmd_flags |= flags; + req->rq_flags |= rq_flags | RQF_QUIET; /* * head injection *required* here otherwise quiesce won't work @@ -246,21 +249,20 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, if (unlikely(scmd->resid_len > 0 && scmd->resid_len <= bufflen)) memset(buffer + bufflen - scmd->resid_len, 0, scmd->resid_len); - if (args->resid) - *args->resid = scmd->resid_len; - if (args->sense) - memcpy(args->sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); - if (args->sshdr) + if (resid) + *resid = scmd->resid_len; + if (sense && scmd->sense_len) + memcpy(sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); + if (sshdr) scsi_normalize_sense(scmd->sense_buffer, scmd->sense_len, - args->sshdr); - + sshdr); ret = scmd->result; out: blk_mq_free_request(req); return ret; } -EXPORT_SYMBOL(scsi_execute_cmd); +EXPORT_SYMBOL(__scsi_execute); /* * Wake up the error handler if necessary. Avoid as follows that the error diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index b407807cc669..d2751ed536df 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -479,51 +479,28 @@ extern const char *scsi_device_state_name(enum scsi_device_state); extern int scsi_is_sdev_device(const struct device *); extern int scsi_is_target_device(const struct device *); extern void scsi_sanitize_inquiry_string(unsigned char *s, int len); - -/* Optional arguments to scsi_execute_cmd */ -struct scsi_exec_args { - unsigned char *sense; /* sense buffer */ - unsigned int sense_len; /* sense buffer len */ - struct scsi_sense_hdr *sshdr; /* decoded sense header */ - blk_mq_req_flags_t req_flags; /* BLK_MQ_REQ flags */ - int *resid; /* residual length */ -}; - -int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, - blk_opf_t opf, void *buffer, unsigned int bufflen, - int timeout, int retries, - const struct scsi_exec_args *args); - +extern int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, + int data_direction, void *buffer, unsigned bufflen, + unsigned char *sense, struct scsi_sense_hdr *sshdr, + int timeout, int retries, blk_opf_t flags, + req_flags_t rq_flags, int *resid); /* Make sure any sense buffer is the correct size. */ -#define scsi_execute(_sdev, _cmd, _data_dir, _buffer, _bufflen, _sense, \ - _sshdr, _timeout, _retries, _flags, _rq_flags, \ - _resid) \ +#define scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense, \ + sshdr, timeout, retries, flags, rq_flags, resid) \ ({ \ - scsi_execute_cmd(_sdev, _cmd, (_data_dir == DMA_TO_DEVICE ? \ - REQ_OP_DRV_OUT : REQ_OP_DRV_IN) | _flags, \ - _buffer, _bufflen, _timeout, _retries, \ - &(struct scsi_exec_args) { \ - .sense = _sense, \ - .sshdr = _sshdr, \ - .req_flags = _rq_flags & RQF_PM ? \ - BLK_MQ_REQ_PM : 0, \ - .resid = _resid, \ - }); \ + BUILD_BUG_ON((sense) != NULL && \ + sizeof(sense) != SCSI_SENSE_BUFFERSIZE); \ + __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, \ + sense, sshdr, timeout, retries, flags, rq_flags, \ + resid); \ }) - static inline int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, int retries, int *resid) { - return scsi_execute_cmd(sdev, cmd, - data_direction == DMA_TO_DEVICE ? - REQ_OP_DRV_OUT : REQ_OP_DRV_IN, buffer, - bufflen, timeout, retries, - &(struct scsi_exec_args) { - .sshdr = sshdr, - .resid = resid, - }); + return scsi_execute(sdev, cmd, data_direction, buffer, + bufflen, NULL, sshdr, timeout, retries, 0, 0, resid); } extern void sdev_disable_disk_events(struct scsi_device *sdev); extern void sdev_enable_disk_events(struct scsi_device *sdev); From 3a9c459091e33f7947c6c7958963e0bd14e737e4 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 13 Feb 2024 09:33:06 -0500 Subject: [PATCH 214/553] scsi: sd: usb_storage: uas: Access media prior to querying device properties [ Upstream commit 321da3dc1f3c92a12e3c5da934090d2992a8814c ] It has been observed that some USB/UAS devices return generic properties hardcoded in firmware for mode pages for a period of time after a device has been discovered. The reported properties are either garbage or they do not accurately reflect the characteristics of the physical storage device attached in the case of a bridge. Prior to commit 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") we would call revalidate several times during device discovery. As a result, incorrect values would eventually get replaced with ones accurately describing the attached storage. When we did away with the redundant revalidate pass, several cases were reported where devices reported nonsensical values or would end up in write-protected state. An initial attempt at addressing this issue involved introducing a delayed second revalidate invocation. However, this approach still left some devices reporting incorrect characteristics. Tasos Sahanidis debugged the problem further and identified that introducing a READ operation prior to MODE SENSE fixed the problem and that it wasn't a timing issue. Issuing a READ appears to cause the devices to update their state to reflect the actual properties of the storage media. Device properties like vendor, model, and storage capacity appear to be correctly reported from the get-go. It is unclear why these devices defer populating the remaining characteristics. Match the behavior of a well known commercial operating system and trigger a READ operation prior to querying device characteristics to force the device to populate the mode pages. The additional READ is triggered by a flag set in the USB storage and UAS drivers. We avoid issuing the READ for other transport classes since some storage devices identify Linux through our particular discovery command sequence. Link: https://lore.kernel.org/r/20240213143306.2194237-1-martin.petersen@oracle.com Fixes: 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") Cc: stable@vger.kernel.org Reported-by: Tasos Sahanidis Reviewed-by: Ewan D. Milne Reviewed-by: Bart Van Assche Tested-by: Tasos Sahanidis Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 26 +++++++++++++++++++++++++- drivers/usb/storage/scsiglue.c | 7 +++++++ drivers/usb/storage/uas.c | 7 +++++++ include/scsi/scsi_device.h | 1 + 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index ad619f7c7418..3ec9b324fdcf 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3286,6 +3286,24 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, return true; } +static void sd_read_block_zero(struct scsi_disk *sdkp) +{ + unsigned int buf_len = sdkp->device->sector_size; + char *buffer, cmd[10] = { }; + + buffer = kmalloc(buf_len, GFP_KERNEL); + if (!buffer) + return; + + cmd[0] = READ_10; + put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ + put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ + + scsi_execute_req(sdkp->device, cmd, DMA_FROM_DEVICE, buffer, buf_len, + NULL, SD_TIMEOUT, sdkp->max_retries, NULL); + kfree(buffer); +} + /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -3325,7 +3343,13 @@ static int sd_revalidate_disk(struct gendisk *disk) */ if (sdkp->media_present) { sd_read_capacity(sdkp, buffer); - + /* + * Some USB/UAS devices return generic values for mode pages + * until the media has been accessed. Trigger a READ operation + * to force the device to populate mode pages. + */ + if (sdp->read_before_ms) + sd_read_block_zero(sdkp); /* * set the default to rotational. All non-rotational devices * support the block characteristics VPD page, which will diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index c54e9805da53..12cf9940e5b6 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -179,6 +179,13 @@ static int slave_configure(struct scsi_device *sdev) */ sdev->use_192_bytes_for_3f = 1; + /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index ee5621bdb11e..af619efe8eab 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -876,6 +876,13 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_CAPACITY_HEURISTICS) sdev->guess_capacity = 1; + /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index d2751ed536df..1504d3137cc6 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -204,6 +204,7 @@ struct scsi_device { unsigned use_10_for_rw:1; /* first try 10-byte read / write */ unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */ + unsigned read_before_ms:1; /* perform a READ before MODE SENSE */ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ unsigned no_write_same:1; /* no WRITE SAME command */ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ From 649e5646362a2815ec9cc85120fc59a885282085 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 13 Feb 2024 14:54:25 +0100 Subject: [PATCH 215/553] virtio: reenable config if freezing device failed commit 310227f42882c52356b523e2f4e11690eebcd2ab upstream. Currently, we don't reenable the config if freezing the device failed. For example, virtio-mem currently doesn't support suspend+resume, and trying to freeze the device will always fail. Afterwards, the device will no longer respond to resize requests, because it won't get notified about config changes. Let's fix this by re-enabling the config if freezing fails. Fixes: 22b7050a024d ("virtio: defer config changed notifications") Cc: Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Xuan Zhuo Signed-off-by: David Hildenbrand Message-Id: <20240213135425.795001-1-david@redhat.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 828ced060742..1ef094427f29 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -489,13 +489,19 @@ EXPORT_SYMBOL_GPL(unregister_virtio_device); int virtio_device_freeze(struct virtio_device *dev) { struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); + int ret; virtio_config_disable(dev); dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED; - if (drv && drv->freeze) - return drv->freeze(dev); + if (drv && drv->freeze) { + ret = drv->freeze(dev); + if (ret) { + virtio_config_enable(dev); + return ret; + } + } return 0; } From e80b4980af2688d8ff69c157ffa773dd1f1eb02c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 9 Mar 2024 12:24:48 -0800 Subject: [PATCH 216/553] randomize_kstack: Improve entropy diffusion [ Upstream commit 9c573cd313433f6c1f7236fe64b9b743500c1628 ] The kstack_offset variable was really only ever using the low bits for kernel stack offset entropy. Add a ror32() to increase bit diffusion. Suggested-by: Arnd Bergmann Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall") Link: https://lore.kernel.org/r/20240309202445.work.165-kees@kernel.org Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- include/linux/randomize_kstack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index 5d868505a94e..6d92b68efbf6 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -80,7 +80,7 @@ DECLARE_PER_CPU(u32, kstack_offset); if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ u32 offset = raw_cpu_read(kstack_offset); \ - offset ^= (rand); \ + offset = ror32(offset, 5) ^ (rand); \ raw_cpu_write(kstack_offset, offset); \ } \ } while (0) From 00e34ff1ce811749c2dbf3ff5ad2322bce54ebcb Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Fri, 29 Mar 2024 07:32:06 -0700 Subject: [PATCH 217/553] platform/x86: intel-vbtn: Update tablet mode switch at end of probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 434e5781d8cd2d0ed512d920c6cdeba4b33a2e81 ] ACER Vivobook Flip (TP401NAS) virtual intel switch is implemented as follow: Device (VGBI) { Name (_HID, EisaId ("INT33D6") ... Name (VBDS, Zero) Method (_STA, 0, Serialized) // _STA: Status ... Method (VBDL, 0, Serialized) { PB1E |= 0x20 VBDS |= 0x40 } Method (VGBS, 0, Serialized) { Return (VBDS) /* \_SB_.PCI0.SBRG.EC0_.VGBI.VBDS */ } ... } By default VBDS is set to 0. At boot it is set to clamshell (bit 6 set) only after method VBDL is executed. Since VBDL is now evaluated in the probe routine later, after the device is registered, the retrieved value of VBDS was still 0 ("tablet mode") when setting up the virtual switch. Make sure to evaluate VGBS after VBDL, to ensure the convertible boots in clamshell mode, the expected default. Fixes: 26173179fae1 ("platform/x86: intel-vbtn: Eval VBDL after registering our notifier") Signed-off-by: Gwendal Grignou Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240329143206.2977734-3-gwendal@chromium.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/x86/intel/vbtn.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index c10c99a31a90..224139006a43 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -136,8 +136,6 @@ static int intel_vbtn_input_setup(struct platform_device *device) priv->switches_dev->id.bustype = BUS_HOST; if (priv->has_switches) { - detect_tablet_mode(&device->dev); - ret = input_register_device(priv->switches_dev); if (ret) return ret; @@ -316,6 +314,9 @@ static int intel_vbtn_probe(struct platform_device *device) if (ACPI_FAILURE(status)) dev_err(&device->dev, "Error VBDL failed with ACPI status %d\n", status); } + // Check switches after buttons since VBDL may have side effects. + if (has_switches) + detect_tablet_mode(&device->dev); device_init_wakeup(&device->dev, true); /* From 21f5cfed1e7a3254de46fee5f0ceeca6bda14b21 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 23 Feb 2024 12:36:23 -0500 Subject: [PATCH 218/553] Bluetooth: btintel: Fixe build regression commit 6e62ebfb49eb65bdcbfc5797db55e0ce7f79c3dd upstream. This fixes the following build regression: drivers-bluetooth-btintel.c-btintel_read_version()-warn: passing-zero-to-PTR_ERR Fixes: b79e04091010 ("Bluetooth: btintel: Fix null ptr deref in btintel_read_version") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btintel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index c77c06b84d86..7a9d2da3c814 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -405,13 +405,13 @@ int btintel_read_version(struct hci_dev *hdev, struct intel_version *ver) struct sk_buff *skb; skb = __hci_cmd_sync(hdev, 0xfc05, 0, NULL, HCI_CMD_TIMEOUT); - if (IS_ERR_OR_NULL(skb)) { + if (IS_ERR(skb)) { bt_dev_err(hdev, "Reading Intel version information failed (%ld)", PTR_ERR(skb)); return PTR_ERR(skb); } - if (skb->len != sizeof(*ver)) { + if (!skb || skb->len != sizeof(*ver)) { bt_dev_err(hdev, "Intel version event size mismatch"); kfree_skb(skb); return -EILSEQ; From cea93dae3e253f03b39403b2e0fd15626feea4ce Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 22 Feb 2024 15:03:10 +0100 Subject: [PATCH 219/553] net: mpls: error out if inner headers are not set commit 025f8ad20f2e3264d11683aa9cbbf0083eefbdcd upstream. mpls_gso_segment() assumes skb_inner_network_header() returns a valid result: mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb); if (unlikely(!mpls_hlen || mpls_hlen % MPLS_HLEN)) goto out; if (unlikely(!pskb_may_pull(skb, mpls_hlen))) With syzbot reproducer, skb_inner_network_header() yields 0, skb_network_header() returns 108, so this will "pskb_may_pull(skb, -108)))" which triggers a newly added DEBUG_NET_WARN_ON_ONCE() check: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 5068 at include/linux/skbuff.h:2723 pskb_may_pull_reason include/linux/skbuff.h:2723 [inline] WARNING: CPU: 0 PID: 5068 at include/linux/skbuff.h:2723 pskb_may_pull include/linux/skbuff.h:2739 [inline] WARNING: CPU: 0 PID: 5068 at include/linux/skbuff.h:2723 mpls_gso_segment+0x773/0xaa0 net/mpls/mpls_gso.c:34 [..] skb_mac_gso_segment+0x383/0x740 net/core/gso.c:53 nsh_gso_segment+0x40a/0xad0 net/nsh/nsh.c:108 skb_mac_gso_segment+0x383/0x740 net/core/gso.c:53 __skb_gso_segment+0x324/0x4c0 net/core/gso.c:124 skb_gso_segment include/net/gso.h:83 [inline] [..] sch_direct_xmit+0x11a/0x5f0 net/sched/sch_generic.c:327 [..] packet_sendmsg+0x46a9/0x6130 net/packet/af_packet.c:3113 [..] First iteration of this patch made mpls_hlen signed and changed test to error out to "mpls_hlen <= 0 || ..". Eric Dumazet said: > I was thinking about adding a debug check in skb_inner_network_header() > if inner_network_header is zero (that would mean it is not 'set' yet), > but this would trigger even after your patch. So add new skb_inner_network_header_was_set() helper and use that. The syzbot reproducer injects data via packet socket. The skb that gets allocated and passed down the stack has ->protocol set to NSH (0x894f) and gso_type set to SKB_GSO_UDP | SKB_GSO_DODGY. This gets passed to skb_mac_gso_segment(), which sees NSH as ptype to find a callback for. nsh_gso_segment() retrieves next type: proto = tun_p_to_eth_p(nsh_hdr(skb)->np); ... which is MPLS (TUN_P_MPLS_UC). It updates skb->protocol and then calls mpls_gso_segment(). Inner offsets are all 0, so mpls_gso_segment() ends up with a negative header size. In case more callers rely on silent handling of such large may_pull values we could also 'legalize' this behaviour, either replacing the debug check with (len > INT_MAX) test or removing it and instead adding a comment before existing if (unlikely(len > skb->len)) return SKB_DROP_REASON_PKT_TOO_SMALL; test in pskb_may_pull_reason(), saying that this check also implicitly takes care of callers that miscompute header sizes. Cc: Simon Horman Fixes: 219eee9c0d16 ("net: skbuff: add overflow debug check to pull/push helpers") Reported-by: syzbot+99d15fcdb0132a1e1a82@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/00000000000043b1310611e388aa@google.com/raw Signed-off-by: Florian Westphal Link: https://lore.kernel.org/r/20240222140321.14080-1-fw@strlen.de Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 5 +++++ net/mpls/mpls_gso.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1326a935b6fa..d5f888fe0e33 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2802,6 +2802,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header += offset; } +static inline bool skb_inner_network_header_was_set(const struct sk_buff *skb) +{ + return skb->inner_network_header > 0; +} + static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) { return skb->head + skb->inner_mac_header; diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 1482259de9b5..40334d4d89de 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -26,6 +26,9 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, __be16 mpls_protocol; unsigned int mpls_hlen; + if (!skb_inner_network_header_was_set(skb)) + goto out; + skb_reset_network_header(skb); mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb); if (unlikely(!mpls_hlen || mpls_hlen % MPLS_HLEN)) From eea65ed73898e6e94ba44b21836f911d64ec59cb Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Mon, 19 Feb 2024 13:53:15 +0300 Subject: [PATCH 220/553] VMCI: Fix possible memcpy() run-time warning in vmci_datagram_invoke_guest_handler() commit e606e4b71798cc1df20e987dde2468e9527bd376 upstream. The changes are similar to those given in the commit 19b070fefd0d ("VMCI: Fix memcpy() run-time warning in dg_dispatch_as_host()"). Fix filling of the msg and msg_payload in dg_info struct, which prevents a possible "detected field-spanning write" of memcpy warning that is issued by the tracking mechanism __fortify_memcpy_chk. Signed-off-by: Vasiliy Kovalev Link: https://lore.kernel.org/r/20240219105315.76955-1-kovalev@altlinux.org Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_vmci/vmci_datagram.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/vmw_vmci/vmci_datagram.c b/drivers/misc/vmw_vmci/vmci_datagram.c index d1d8224c8800..a0ad1f3a69f7 100644 --- a/drivers/misc/vmw_vmci/vmci_datagram.c +++ b/drivers/misc/vmw_vmci/vmci_datagram.c @@ -378,7 +378,8 @@ int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg) dg_info->in_dg_host_queue = false; dg_info->entry = dst_entry; - memcpy(&dg_info->msg, dg, VMCI_DG_SIZE(dg)); + dg_info->msg = *dg; + memcpy(&dg_info->msg_payload, dg + 1, dg->payload_size); INIT_WORK(&dg_info->work, dg_delayed_dispatch); schedule_work(&dg_info->work); From c5f9fe2c1e5023fa096189a8bfba6420aa035587 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Tue, 19 Mar 2024 15:24:03 +0800 Subject: [PATCH 221/553] Revert "drm/amd/amdgpu: Fix potential ioremap() memory leaks in amdgpu_device_init()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 03c6284df179de3a4a6e0684764b1c71d2a405e2 upstream. This patch causes the following iounmap erorr and calltrace iounmap: bad address 00000000d0b3631f The original patch was unjustified because amdgpu_device_fini_sw() will always cleanup the rmmio mapping. This reverts commit eb4f139888f636614dab3bcce97ff61cefc4b3a7. Signed-off-by: Ma Jun Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e4eb906806a5..b11690a816e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3713,10 +3713,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, * early on during init and before calling to RREG32. */ adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev"); - if (!adev->reset_domain) { - r = -ENOMEM; - goto unmap_memory; - } + if (!adev->reset_domain) + return -ENOMEM; /* detect hw virtualization here */ amdgpu_detect_virtualization(adev); @@ -3724,18 +3722,18 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_device_get_job_timeout_settings(adev); if (r) { dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); - goto unmap_memory; + return r; } /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) - goto unmap_memory; + return r; /* Get rid of things like offb */ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); if (r) - goto unmap_memory; + return r; /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); @@ -3745,7 +3743,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (adev->gmc.xgmi.supported) { r = adev->gfxhub.funcs->get_xgmi_info(adev); if (r) - goto unmap_memory; + return r; } /* enable PCIE atomic ops */ @@ -4001,8 +3999,6 @@ release_ras_con: failed: amdgpu_vf_error_trans_all(adev); -unmap_memory: - iounmap(adev->rmmio); return r; } From cd5d98c0556cd790f78a3ba26afc9d2f896163e4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 13 Apr 2024 13:05:29 +0200 Subject: [PATCH 222/553] Linux 6.1.86 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240411095412.671665933@linuxfoundation.org Tested-by: Pavel Machek (CIP) Tested-by: SeongJae Park Tested-by: Florian Fainelli Tested-by: Shuah Khan Tested-by: kernelci.org bot Tested-by: Ron Economos Tested-by: Jon Hunter Tested-by: Linux Kernel Functional Testing Tested-by: Mateusz Jończyk Tested-by: Kelsey Steele Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5dff9ff99998..baddd8ed8186 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 85 +SUBLEVEL = 86 EXTRAVERSION = NAME = Curry Ramen From ca5962bdc53c86d3aa23fa619aa52be194f1e20a Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 6 Apr 2024 23:16:08 -0500 Subject: [PATCH 223/553] smb3: fix Open files on server counter going negative commit 28e0947651ce6a2200b9a7eceb93282e97d7e51a upstream. We were decrementing the count of open files on server twice for the case where we were closing cached directories. Fixes: 8e843bf38f7b ("cifs: return a single-use cfid if we did not get a lease") Cc: stable@vger.kernel.org Acked-by: Bharath SM Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cached_dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index f4ad343b06c1..2ca1881919c7 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -386,8 +386,8 @@ smb2_close_cached_fid(struct kref *ref) if (cfid->is_open) { rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, cfid->fid.volatile_fid); - if (rc != -EBUSY && rc != -EAGAIN) - atomic_dec(&cfid->tcon->num_remote_opens); + if (rc) /* should we retry on -EBUSY or -EAGAIN? */ + cifs_dbg(VFS, "close cached dir rc %d\n", rc); } free_cached_dir(cfid); From 0559b2d759be06bb92a0470d88d2f1207db20211 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 12 Apr 2024 08:41:15 +0900 Subject: [PATCH 224/553] ata: libata-scsi: Fix ata_scsi_dev_rescan() error path commit 79336504781e7fee5ddaf046dcc186c8dfdf60b1 upstream. Commit 0c76106cb975 ("scsi: sd: Fix TCG OPAL unlock on system resume") incorrectly handles failures of scsi_resume_device() in ata_scsi_dev_rescan(), leading to a double call to spin_unlock_irqrestore() to unlock a device port. Fix this by redefining the goto labels used in case of errors and only unlock the port scsi_scan_mutex when scsi_resume_device() fails. Bug found with the Smatch static checker warning: drivers/ata/libata-scsi.c:4774 ata_scsi_dev_rescan() error: double unlocked 'ap->lock' (orig line 4757) Reported-by: Dan Carpenter Fixes: 0c76106cb975 ("scsi: sd: Fix TCG OPAL unlock on system resume") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-scsi.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a09548630fc8..65fde5717928 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4667,7 +4667,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) * bail out. */ if (ap->pflags & ATA_PFLAG_SUSPENDED) - goto unlock; + goto unlock_ap; if (!sdev) continue; @@ -4680,7 +4680,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) if (do_resume) { ret = scsi_resume_device(sdev); if (ret == -EWOULDBLOCK) - goto unlock; + goto unlock_scan; dev->flags &= ~ATA_DFLAG_RESUMING; } ret = scsi_rescan_device(sdev); @@ -4688,12 +4688,13 @@ void ata_scsi_dev_rescan(struct work_struct *work) spin_lock_irqsave(ap->lock, flags); if (ret) - goto unlock; + goto unlock_ap; } } -unlock: +unlock_ap: spin_unlock_irqrestore(ap->lock, flags); +unlock_scan: mutex_unlock(&ap->scsi_scan_mutex); /* Reschedule with a delay if scsi_rescan_device() returned an error */ From 3fe79b2c83461edbbf86ed8a6f3924820ff89259 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 12 Feb 2024 13:58:33 +0100 Subject: [PATCH 225/553] batman-adv: Avoid infinite loop trying to resize local TT commit b1f532a3b1e6d2e5559c7ace49322922637a28aa upstream. If the MTU of one of an attached interface becomes too small to transmit the local translation table then it must be resized to fit inside all fragments (when enabled) or a single packet. But if the MTU becomes too low to transmit even the header + the VLAN specific part then the resizing of the local TT will never succeed. This can for example happen when the usable space is 110 bytes and 11 VLANs are on top of batman-adv. In this case, at least 116 byte would be needed. There will just be an endless spam of batman_adv: batadv0: Forced to purge local tt entries to fit new maximum fragment MTU (110) in the log but the function will never finish. Problem here is that the timeout will be halved all the time and will then stagnate at 0 and therefore never be able to reduce the table even more. There are other scenarios possible with a similar result. The number of BATADV_TT_CLIENT_NOPURGE entries in the local TT can for example be too high to fit inside a packet. Such a scenario can therefore happen also with only a single VLAN + 7 non-purgable addresses - requiring at least 120 bytes. While this should be handled proactively when: * interface with too low MTU is added * VLAN is added * non-purgeable local mac is added * MTU of an attached interface is reduced * fragmentation setting gets disabled (which most likely requires dropping attached interfaces) not all of these scenarios can be prevented because batman-adv is only consuming events without the the possibility to prevent these actions (non-purgable MAC address added, MTU of an attached interface is reduced). It is therefore necessary to also make sure that the code is able to handle also the situations when there were already incompatible system configuration are present. Cc: stable@vger.kernel.org Fixes: a19d3d85e1b8 ("batman-adv: limit local translation table max size") Reported-by: syzbot+a6a4b5bb3da165594cff@syzkaller.appspotmail.com Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/translation-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 5d8cee74772f..4fc66cd95dc4 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -3948,7 +3948,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface) spin_lock_bh(&bat_priv->tt.commit_lock); - while (true) { + while (timeout) { table_size = batadv_tt_local_table_transmit_size(bat_priv); if (packet_size_max >= table_size) break; From 2d5f12de4cf589c85a0f33ef66f68d38b8f24970 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 9 Apr 2024 15:13:09 -0400 Subject: [PATCH 226/553] ring-buffer: Only update pages_touched when a new page is touched commit ffe3986fece696cf65e0ef99e74c75f848be8e30 upstream. The "buffer_percent" logic that is used by the ring buffer splice code to only wake up the tasks when there's no data after the buffer is filled to the percentage of the "buffer_percent" file is dependent on three variables that determine the amount of data that is in the ring buffer: 1) pages_read - incremented whenever a new sub-buffer is consumed 2) pages_lost - incremented every time a writer overwrites a sub-buffer 3) pages_touched - incremented when a write goes to a new sub-buffer The percentage is the calculation of: (pages_touched - (pages_lost + pages_read)) / nr_pages Basically, the amount of data is the total number of sub-bufs that have been touched, minus the number of sub-bufs lost and sub-bufs consumed. This is divided by the total count to give the buffer percentage. When the percentage is greater than the value in the "buffer_percent" file, it wakes up splice readers waiting for that amount. It was observed that over time, the amount read from the splice was constantly decreasing the longer the trace was running. That is, if one asked for 60%, it would read over 60% when it first starts tracing, but then it would be woken up at under 60% and would slowly decrease the amount of data read after being woken up, where the amount becomes much less than the buffer percent. This was due to an accounting of the pages_touched incrementation. This value is incremented whenever a writer transfers to a new sub-buffer. But the place where it was incremented was incorrect. If a writer overflowed the current sub-buffer it would go to the next one. If it gets preempted by an interrupt at that time, and the interrupt performs a trace, it too will end up going to the next sub-buffer. But only one should increment the counter. Unfortunately, that was not the case. Change the cmpxchg() that does the real switch of the tail-page into a try_cmpxchg(), and on success, perform the increment of pages_touched. This will only increment the counter once for when the writer moves to a new sub-buffer, and not when there's a race and is incremented for when a writer and its preempting writer both move to the same new sub-buffer. Link: https://lore.kernel.org/linux-trace-kernel/20240409151309.0d0e5056@gandalf.local.home Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Fixes: 2c2b0a78b3739 ("ring-buffer: Add percentage of ring buffer full to wake up reader") Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d2947de3021a..337162e0c3d5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1543,7 +1543,6 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write); old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries); - local_inc(&cpu_buffer->pages_touched); /* * Just make sure we have seen our old_write and synchronize * with any interrupts that come in. @@ -1580,8 +1579,9 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, */ local_set(&next_page->page->commit, 0); - /* Again, either we update tail_page or an interrupt does */ - (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page); + /* Either we update tail_page or an interrupt does */ + if (try_cmpxchg(&cpu_buffer->tail_page, &tail_page, next_page)) + local_inc(&cpu_buffer->pages_touched); } } From 66fab1e120b39f8f47a94186ddee36006fc02ca8 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 2 Apr 2024 14:32:05 +0300 Subject: [PATCH 227/553] Bluetooth: Fix memory leak in hci_req_sync_complete() commit 45d355a926ab40f3ae7bc0b0a00cb0e3e8a5a810 upstream. In 'hci_req_sync_complete()', always free the previous sync request state before assigning reference to a new one. Reported-by: syzbot+39ec16ff6cc18b1d066d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=39ec16ff6cc18b1d066d Cc: stable@vger.kernel.org Fixes: f60cb30579d3 ("Bluetooth: Convert hci_req_sync family of function to new request API") Signed-off-by: Dmitry Antipov Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_request.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 4468647df672..cf69e973b724 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -105,8 +105,10 @@ void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, if (hdev->req_status == HCI_REQ_PEND) { hdev->req_result = result; hdev->req_status = HCI_REQ_DONE; - if (skb) + if (skb) { + kfree_skb(hdev->req_skb); hdev->req_skb = skb_get(skb); + } wake_up_interruptible(&hdev->req_wait_q); } } From bd9b94055c3deb2398ee4490c1dfdf03f53efb8f Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Wed, 27 Mar 2024 13:10:37 +0800 Subject: [PATCH 228/553] drm/amd/pm: fixes a random hang in S4 for SMU v13.0.4/11 commit 31729e8c21ecfd671458e02b6511eb68c2225113 upstream. While doing multiple S4 stress tests, GC/RLC/PMFW get into an invalid state resulting into hard hangs. Adding a GFX reset as workaround just before sending the MP1_UNLOAD message avoids this failure. Signed-off-by: Tim Huang Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index 6d9760eac16d..21b374d12181 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -222,8 +222,18 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en) struct amdgpu_device *adev = smu->adev; int ret = 0; - if (!en && !adev->in_s0ix) + if (!en && !adev->in_s0ix) { + /* Adds a GFX reset as workaround just before sending the + * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering + * an invalid state. + */ + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, + SMU_RESET_MODE_2, NULL); + if (ret) + return ret; + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL); + } return ret; } From bccc8d15509273c49cfb2541003febe1469fa680 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 8 Apr 2024 09:02:23 +0200 Subject: [PATCH 229/553] PM: s2idle: Make sure CPUs will wakeup directly on resume commit 3c89a068bfd0698a5478f4cf39493595ef757d5e upstream. s2idle works like a regular suspend with freezing processes and freezing devices. All CPUs except the control CPU go into idle. Once this is completed the control CPU kicks all other CPUs out of idle, so that they reenter the idle loop and then enter s2idle state. The control CPU then issues an swait() on the suspend state and therefore enters the idle loop as well. Due to being kicked out of idle, the other CPUs leave their NOHZ states, which means the tick is active and the corresponding hrtimer is programmed to the next jiffie. On entering s2idle the CPUs shut down their local clockevent device to prevent wakeups. The last CPU which enters s2idle shuts down its local clockevent and freezes timekeeping. On resume, one of the CPUs receives the wakeup interrupt, unfreezes timekeeping and its local clockevent and starts the resume process. At that point all other CPUs are still in s2idle with their clockevents switched off. They only resume when they are kicked by another CPU or after resuming devices and then receiving a device interrupt. That means there is no guarantee that all CPUs will wakeup directly on resume. As a consequence there is no guarantee that timers which are queued on those CPUs and should expire directly after resume, are handled. Also timer list timers which are remotely queued to one of those CPUs after resume will not result in a reprogramming IPI as the tick is active. Queueing a hrtimer will also not result in a reprogramming IPI because the first hrtimer event is already in the past. The recent introduction of the timer pull model (7ee988770326 ("timers: Implement the hierarchical pull model")) amplifies this problem, if the current migrator is one of the non woken up CPUs. When a non pinned timer list timer is queued and the queuing CPU goes idle, it relies on the still suspended migrator CPU to expire the timer which will happen by chance. The problem exists since commit 8d89835b0467 ("PM: suspend: Do not pause cpuidle in the suspend-to-idle path"). There the cpuidle_pause() call which in turn invoked a wakeup for all idle CPUs was moved to a later point in the resume process. This might not be reached or reached very late because it waits on a timer of a still suspended CPU. Address this by kicking all CPUs out of idle after the control CPU returns from swait() so that they resume their timers and restore consistent system state. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218641 Fixes: 8d89835b0467 ("PM: suspend: Do not pause cpuidle in the suspend-to-idle path") Signed-off-by: Anna-Maria Behnsen Reviewed-by: Thomas Gleixner Tested-by: Mario Limonciello Cc: 5.16+ # 5.16+ Acked-by: Peter Zijlstra (Intel) Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/power/suspend.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index a718067deece..3aae526cc4aa 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -106,6 +106,12 @@ static void s2idle_enter(void) swait_event_exclusive(s2idle_wait_head, s2idle_state == S2IDLE_STATE_WAKE); + /* + * Kick all CPUs to ensure that they resume their timers and restore + * consistent system state. + */ + wake_up_all_idle_cpus(); + cpus_read_unlock(); raw_spin_lock_irq(&s2idle_lock); From 49054b3ed24954827c7691db78831a86da0cdb47 Mon Sep 17 00:00:00 2001 From: Nini Song Date: Thu, 25 Jan 2024 21:28:45 +0800 Subject: [PATCH 230/553] media: cec: core: remove length check of Timer Status commit ce5d241c3ad4568c12842168288993234345c0eb upstream. The valid_la is used to check the length requirements, including special cases of Timer Status. If the length is shorter than 5, that means no Duration Available is returned, the message will be forced to be invalid. However, the description of Duration Available in the spec is that this parameter may be returned when these cases, or that it can be optionally return when these cases. The key words in the spec description are flexible choices. Remove the special length check of Timer Status to fit the spec which is not compulsory about that. Signed-off-by: Nini Song Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/cec/core/cec-adap.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/media/cec/core/cec-adap.c b/drivers/media/cec/core/cec-adap.c index 4bc2a705029e..c761ac35e120 100644 --- a/drivers/media/cec/core/cec-adap.c +++ b/drivers/media/cec/core/cec-adap.c @@ -1121,20 +1121,6 @@ void cec_received_msg_ts(struct cec_adapter *adap, if (valid_la && min_len) { /* These messages have special length requirements */ switch (cmd) { - case CEC_MSG_TIMER_STATUS: - if (msg->msg[2] & 0x10) { - switch (msg->msg[2] & 0xf) { - case CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE: - case CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE: - if (msg->len < 5) - valid_la = false; - break; - } - } else if ((msg->msg[2] & 0xf) == CEC_OP_PROG_ERROR_DUPLICATE) { - if (msg->len < 5) - valid_la = false; - } - break; case CEC_MSG_RECORD_ON: switch (msg->msg[2]) { case CEC_OP_RECORD_SRC_OWN: From 24c0c5867ad279926a6eba411133eb962a1d55fb Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 22 Mar 2024 12:47:05 -0400 Subject: [PATCH 231/553] arm64: dts: imx8-ss-conn: fix usdhc wrong lpcg clock order [ Upstream commit c6ddd6e7b166532a0816825442ff60f70aed9647 ] The actual clock show wrong frequency: echo on >/sys/devices/platform/bus\@5b000000/5b010000.mmc/power/control cat /sys/kernel/debug/mmc0/ios clock: 200000000 Hz actual clock: 166000000 Hz ^^^^^^^^^ ..... According to sdhc0_lpcg: clock-controller@5b200000 { compatible = "fsl,imx8qxp-lpcg"; reg = <0x5b200000 0x10000>; #clock-cells = <1>; clocks = <&clk IMX_SC_R_SDHC_0 IMX_SC_PM_CLK_PER>, <&conn_ipg_clk>, <&conn_axi_clk>; clock-indices = , , ; clock-output-names = "sdhc0_lpcg_per_clk", "sdhc0_lpcg_ipg_clk", "sdhc0_lpcg_ahb_clk"; power-domains = <&pd IMX_SC_R_SDHC_0>; } "per_clk" should be IMX_LPCG_CLK_0 instead of IMX_LPCG_CLK_5. After correct clocks order: echo on >/sys/devices/platform/bus\@5b000000/5b010000.mmc/power/control cat /sys/kernel/debug/mmc0/ios clock: 200000000 Hz actual clock: 198000000 Hz ^^^^^^^^ ... Fixes: 16c4ea7501b1 ("arm64: dts: imx8: switch to new lpcg clock binding") Signed-off-by: Frank Li Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi index 10370d1a6c6d..dbb298b907c1 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi @@ -38,8 +38,8 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b010000 0x10000>; clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>, - <&sdhc0_lpcg IMX_LPCG_CLK_0>, - <&sdhc0_lpcg IMX_LPCG_CLK_5>; + <&sdhc0_lpcg IMX_LPCG_CLK_5>, + <&sdhc0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_0>; status = "disabled"; @@ -49,8 +49,8 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b020000 0x10000>; clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>, - <&sdhc1_lpcg IMX_LPCG_CLK_0>, - <&sdhc1_lpcg IMX_LPCG_CLK_5>; + <&sdhc1_lpcg IMX_LPCG_CLK_5>, + <&sdhc1_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_1>; fsl,tuning-start-tap = <20>; @@ -62,8 +62,8 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b030000 0x10000>; clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>, - <&sdhc2_lpcg IMX_LPCG_CLK_0>, - <&sdhc2_lpcg IMX_LPCG_CLK_5>; + <&sdhc2_lpcg IMX_LPCG_CLK_5>, + <&sdhc2_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_2>; status = "disabled"; From 84fb60063509e462e39c0e097c7d6dbb71c95967 Mon Sep 17 00:00:00 2001 From: Alex Constantino Date: Thu, 4 Apr 2024 19:14:48 +0100 Subject: [PATCH 232/553] Revert "drm/qxl: simplify qxl_fence_wait" [ Upstream commit 07ed11afb68d94eadd4ffc082b97c2331307c5ea ] This reverts commit 5a838e5d5825c85556011478abde708251cc0776. Changes from commit 5a838e5d5825 ("drm/qxl: simplify qxl_fence_wait") would result in a '[TTM] Buffer eviction failed' exception whenever it reached a timeout. Due to a dependency to DMA_FENCE_WARN this also restores some code deleted by commit d72277b6c37d ("dma-buf: nuke DMA_FENCE_TRACE macros v2"). Fixes: 5a838e5d5825 ("drm/qxl: simplify qxl_fence_wait") Link: https://lore.kernel.org/regressions/ZTgydqRlK6WX_b29@eldamar.lan/ Reported-by: Timo Lindfors Closes: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1054514 Signed-off-by: Alex Constantino Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240404181448.1643-2-dreaming.about.electric.sheep@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/qxl/qxl_release.c | 50 +++++++++++++++++++++++++++---- include/linux/dma-fence.h | 7 +++++ 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 368d26da0d6a..9febc8b73f09 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -58,16 +58,56 @@ static long qxl_fence_wait(struct dma_fence *fence, bool intr, signed long timeout) { struct qxl_device *qdev; + struct qxl_release *release; + int count = 0, sc = 0; + bool have_drawable_releases; unsigned long cur, end = jiffies + timeout; qdev = container_of(fence->lock, struct qxl_device, release_lock); + release = container_of(fence, struct qxl_release, base); + have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE; - if (!wait_event_timeout(qdev->release_event, - (dma_fence_is_signaled(fence) || - (qxl_io_notify_oom(qdev), 0)), - timeout)) - return 0; +retry: + sc++; + if (dma_fence_is_signaled(fence)) + goto signaled; + + qxl_io_notify_oom(qdev); + + for (count = 0; count < 11; count++) { + if (!qxl_queue_garbage_collect(qdev, true)) + break; + + if (dma_fence_is_signaled(fence)) + goto signaled; + } + + if (dma_fence_is_signaled(fence)) + goto signaled; + + if (have_drawable_releases || sc < 4) { + if (sc > 2) + /* back off */ + usleep_range(500, 1000); + + if (time_after(jiffies, end)) + return 0; + + if (have_drawable_releases && sc > 300) { + DMA_FENCE_WARN(fence, + "failed to wait on release %llu after spincount %d\n", + fence->context & ~0xf0000000, sc); + goto signaled; + } + goto retry; + } + /* + * yeah, original sync_obj_wait gave up after 3 spins when + * have_drawable_releases is not set. + */ + +signaled: cur = jiffies; if (time_after(cur, end)) return 0; diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index b79097b9070b..5d6a5f3097cd 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -659,4 +659,11 @@ static inline bool dma_fence_is_container(struct dma_fence *fence) return dma_fence_is_array(fence) || dma_fence_is_chain(fence); } +#define DMA_FENCE_WARN(f, fmt, args...) \ + do { \ + struct dma_fence *__ff = (f); \ + pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\ + ##args); \ + } while (0) + #endif /* __LINUX_DMA_FENCE_H */ From b7dc2e6b8798518ee69b1bcf67f0f8374af91dd2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Apr 2024 18:02:25 +0200 Subject: [PATCH 233/553] nouveau: fix function cast warning [ Upstream commit 185fdb4697cc9684a02f2fab0530ecdd0c2f15d4 ] Calling a function through an incompatible pointer type causes breaks kcfi, so clang warns about the assignment: drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c:73:10: error: cast from 'void (*)(const void *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 73 | .fini = (void(*)(void *))kfree, Avoid this with a trivial wrapper. Fixes: c39f472e9f14 ("drm/nouveau: remove symlinks, move core/ to nvkm/ (no code changes)") Signed-off-by: Arnd Bergmann Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240404160234.2923554-1-arnd@kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c index 4bf486b57101..cb05f7f48a98 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c @@ -66,11 +66,16 @@ of_init(struct nvkm_bios *bios, const char *name) return ERR_PTR(-EINVAL); } +static void of_fini(void *p) +{ + kfree(p); +} + const struct nvbios_source nvbios_of = { .name = "OpenFirmware", .init = of_init, - .fini = (void(*)(void *))kfree, + .fini = of_fini, .read = of_read, .size = of_size, .rw = false, From 438b9a71b25abbf27a49c977200dcb708b0a1643 Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Tue, 2 Apr 2024 11:55:13 +0800 Subject: [PATCH 234/553] scsi: hisi_sas: Modify the deadline for ata_wait_after_reset() [ Upstream commit 0098c55e0881f0b32591f2110410d5c8b7f9bd5a ] We found that the second parameter of function ata_wait_after_reset() is incorrectly used. We call smp_ata_check_ready_type() to poll the device type until the 30s timeout, so the correct deadline should be (jiffies + 30000). Fixes: 3c2673a09cf1 ("scsi: hisi_sas: Fix SATA devices missing issue during I_T nexus reset") Co-developed-by: xiabing Signed-off-by: xiabing Co-developed-by: Yihang Li Signed-off-by: Yihang Li Signed-off-by: Xiang Chen Link: https://lore.kernel.org/r/20240402035513.2024241-3-chenxiang66@hisilicon.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/hisi_sas/hisi_sas_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 450a8578157c..2116f5ee36e2 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1715,7 +1715,7 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device) if (dev_is_sata(device)) { struct ata_link *link = &device->sata_dev.ap->link; - rc = ata_wait_after_reset(link, HISI_SAS_WAIT_PHYUP_TIMEOUT, + rc = ata_wait_after_reset(link, jiffies + HISI_SAS_WAIT_PHYUP_TIMEOUT, smp_ata_check_ready_type); } else { msleep(2000); From 9fc74e367be4247a5ac39bb8ec41eaa73fade510 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Apr 2024 12:56:54 +0300 Subject: [PATCH 235/553] scsi: qla2xxx: Fix off by one in qla_edif_app_getstats() [ Upstream commit 4406e4176f47177f5e51b4cc7e6a7a2ff3dbfbbd ] The app_reply->elem[] array is allocated earlier in this function and it has app_req.num_ports elements. Thus this > comparison needs to be >= to prevent memory corruption. Fixes: 7878f22a2e03 ("scsi: qla2xxx: edif: Add getfcinfo and statistic bsgs") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/5c125b2f-92dd-412b-9b6f-fc3a3207bd60@moroto.mountain Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_edif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c index 7aee4d093969..969008071dec 100644 --- a/drivers/scsi/qla2xxx/qla_edif.c +++ b/drivers/scsi/qla2xxx/qla_edif.c @@ -1058,7 +1058,7 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job) list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) { if (fcport->edif.enable) { - if (pcnt > app_req.num_ports) + if (pcnt >= app_req.num_ports) break; app_reply->elem[pcnt].rekey_count = From 0b445005599d915991361b5f16957f45a0cd0425 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 3 Apr 2024 22:38:01 +0200 Subject: [PATCH 236/553] net: openvswitch: fix unwanted error log on timeout policy probing [ Upstream commit 4539f91f2a801c0c028c252bffae56030cfb2cae ] On startup, ovs-vswitchd probes different datapath features including support for timeout policies. While probing, it tries to execute certain operations with OVS_PACKET_ATTR_PROBE or OVS_FLOW_ATTR_PROBE attributes set. These attributes tell the openvswitch module to not log any errors when they occur as it is expected that some of the probes will fail. For some reason, setting the timeout policy ignores the PROBE attribute and logs a failure anyway. This is causing the following kernel log on each re-start of ovs-vswitchd: kernel: Failed to associated timeout policy `ovs_test_tp' Fix that by using the same logging macro that all other messages are using. The message will still be printed at info level when needed and will be rate limited, but with a net rate limiter instead of generic printk one. The nf_ct_set_timeout() itself will still print some info messages, but at least this change makes logging in openvswitch module more consistent. Fixes: 06bd2bdf19d2 ("openvswitch: Add timeout support to ct action") Signed-off-by: Ilya Maximets Acked-by: Eelco Chaudron Link: https://lore.kernel.org/r/20240403203803.2137962-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/openvswitch/conntrack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 0591cfb289d5..e4ba86b84b9b 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1711,8 +1711,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, if (ct_info.timeout[0]) { if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto, ct_info.timeout)) - pr_info_ratelimited("Failed to associated timeout " - "policy `%s'\n", ct_info.timeout); + OVS_NLERR(log, + "Failed to associated timeout policy '%s'", + ct_info.timeout); else ct_info.nf_ct_timeout = rcu_dereference( nf_ct_timeout_find(ct_info.ct)->timeout); From ac1c10b4ebdf7b1f4211524030e089d8f9c64d52 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Thu, 4 Apr 2024 09:57:40 +0200 Subject: [PATCH 237/553] u64_stats: fix u64_stats_init() for lockdep when used repeatedly in one file [ Upstream commit 38a15d0a50e0a43778561a5861403851f0b0194c ] Fix bogus lockdep warnings if multiple u64_stats_sync variables are initialized in the same file. With CONFIG_LOCKDEP, seqcount_init() is a macro which declares: static struct lock_class_key __key; Since u64_stats_init() is a function (albeit an inline one), all calls within the same file end up using the same instance, effectively treating them all as a single lock-class. Fixes: 9464ca650008 ("net: make u64_stats_init() a function") Closes: https://lore.kernel.org/netdev/ea1567d9-ce66-45e6-8168-ac40a47d1821@roeck-us.net/ Signed-off-by: Petr Tesarik Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240404075740.30682-1-petr@tesarici.cz Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/linux/u64_stats_sync.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 46040d66334a..79c3bbaa7e13 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -135,10 +135,11 @@ static inline void u64_stats_inc(u64_stats_t *p) p->v++; } -static inline void u64_stats_init(struct u64_stats_sync *syncp) -{ - seqcount_init(&syncp->seq); -} +#define u64_stats_init(syncp) \ + do { \ + struct u64_stats_sync *__s = (syncp); \ + seqcount_init(&__s->seq); \ + } while (0) static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { From 2a523f14a3f53b46ff0e1fafd215b0bc5f6783aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Apr 2024 20:27:38 +0000 Subject: [PATCH 238/553] xsk: validate user input for XDP_{UMEM|COMPLETION}_FILL_RING MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 237f3cf13b20db183d3706d997eedc3c49eacd44 ] syzbot reported an illegal copy in xsk_setsockopt() [1] Make sure to validate setsockopt() @optlen parameter. [1] BUG: KASAN: slab-out-of-bounds in copy_from_sockptr_offset include/linux/sockptr.h:49 [inline] BUG: KASAN: slab-out-of-bounds in copy_from_sockptr include/linux/sockptr.h:55 [inline] BUG: KASAN: slab-out-of-bounds in xsk_setsockopt+0x909/0xa40 net/xdp/xsk.c:1420 Read of size 4 at addr ffff888028c6cde3 by task syz-executor.0/7549 CPU: 0 PID: 7549 Comm: syz-executor.0 Not tainted 6.8.0-syzkaller-08951-gfe46a7dd189e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 print_address_description mm/kasan/report.c:377 [inline] print_report+0x169/0x550 mm/kasan/report.c:488 kasan_report+0x143/0x180 mm/kasan/report.c:601 copy_from_sockptr_offset include/linux/sockptr.h:49 [inline] copy_from_sockptr include/linux/sockptr.h:55 [inline] xsk_setsockopt+0x909/0xa40 net/xdp/xsk.c:1420 do_sock_setsockopt+0x3af/0x720 net/socket.c:2311 __sys_setsockopt+0x1ae/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340 do_syscall_64+0xfb/0x240 entry_SYSCALL_64_after_hwframe+0x6d/0x75 RIP: 0033:0x7fb40587de69 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fb40665a0c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 00007fb4059abf80 RCX: 00007fb40587de69 RDX: 0000000000000005 RSI: 000000000000011b RDI: 0000000000000006 RBP: 00007fb4058ca47a R08: 0000000000000002 R09: 0000000000000000 R10: 0000000020001980 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007fb4059abf80 R15: 00007fff57ee4d08 Allocated by task 7549: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:370 [inline] __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:387 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slub.c:3966 [inline] __kmalloc+0x233/0x4a0 mm/slub.c:3979 kmalloc include/linux/slab.h:632 [inline] __cgroup_bpf_run_filter_setsockopt+0xd2f/0x1040 kernel/bpf/cgroup.c:1869 do_sock_setsockopt+0x6b4/0x720 net/socket.c:2293 __sys_setsockopt+0x1ae/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340 do_syscall_64+0xfb/0x240 entry_SYSCALL_64_after_hwframe+0x6d/0x75 The buggy address belongs to the object at ffff888028c6cde0 which belongs to the cache kmalloc-8 of size 8 The buggy address is located 1 bytes to the right of allocated 2-byte region [ffff888028c6cde0, ffff888028c6cde2) The buggy address belongs to the physical page: page:ffffea0000a31b00 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888028c6c9c0 pfn:0x28c6c anon flags: 0xfff00000000800(slab|node=0|zone=1|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 00fff00000000800 ffff888014c41280 0000000000000000 dead000000000001 raw: ffff888028c6c9c0 0000000080800057 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x112cc0(GFP_USER|__GFP_NOWARN|__GFP_NORETRY), pid 6648, tgid 6644 (syz-executor.0), ts 133906047828, free_ts 133859922223 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x1ea/0x210 mm/page_alloc.c:1533 prep_new_page mm/page_alloc.c:1540 [inline] get_page_from_freelist+0x33ea/0x3580 mm/page_alloc.c:3311 __alloc_pages+0x256/0x680 mm/page_alloc.c:4569 __alloc_pages_node include/linux/gfp.h:238 [inline] alloc_pages_node include/linux/gfp.h:261 [inline] alloc_slab_page+0x5f/0x160 mm/slub.c:2175 allocate_slab mm/slub.c:2338 [inline] new_slab+0x84/0x2f0 mm/slub.c:2391 ___slab_alloc+0xc73/0x1260 mm/slub.c:3525 __slab_alloc mm/slub.c:3610 [inline] __slab_alloc_node mm/slub.c:3663 [inline] slab_alloc_node mm/slub.c:3835 [inline] __do_kmalloc_node mm/slub.c:3965 [inline] __kmalloc_node+0x2db/0x4e0 mm/slub.c:3973 kmalloc_node include/linux/slab.h:648 [inline] __vmalloc_area_node mm/vmalloc.c:3197 [inline] __vmalloc_node_range+0x5f9/0x14a0 mm/vmalloc.c:3392 __vmalloc_node mm/vmalloc.c:3457 [inline] vzalloc+0x79/0x90 mm/vmalloc.c:3530 bpf_check+0x260/0x19010 kernel/bpf/verifier.c:21162 bpf_prog_load+0x1667/0x20f0 kernel/bpf/syscall.c:2895 __sys_bpf+0x4ee/0x810 kernel/bpf/syscall.c:5631 __do_sys_bpf kernel/bpf/syscall.c:5738 [inline] __se_sys_bpf kernel/bpf/syscall.c:5736 [inline] __x64_sys_bpf+0x7c/0x90 kernel/bpf/syscall.c:5736 do_syscall_64+0xfb/0x240 entry_SYSCALL_64_after_hwframe+0x6d/0x75 page last free pid 6650 tgid 6647 stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1140 [inline] free_unref_page_prepare+0x95d/0xa80 mm/page_alloc.c:2346 free_unref_page_list+0x5a3/0x850 mm/page_alloc.c:2532 release_pages+0x2117/0x2400 mm/swap.c:1042 tlb_batch_pages_flush mm/mmu_gather.c:98 [inline] tlb_flush_mmu_free mm/mmu_gather.c:293 [inline] tlb_flush_mmu+0x34d/0x4e0 mm/mmu_gather.c:300 tlb_finish_mmu+0xd4/0x200 mm/mmu_gather.c:392 exit_mmap+0x4b6/0xd40 mm/mmap.c:3300 __mmput+0x115/0x3c0 kernel/fork.c:1345 exit_mm+0x220/0x310 kernel/exit.c:569 do_exit+0x99e/0x27e0 kernel/exit.c:865 do_group_exit+0x207/0x2c0 kernel/exit.c:1027 get_signal+0x176e/0x1850 kernel/signal.c:2907 arch_do_signal_or_restart+0x96/0x860 arch/x86/kernel/signal.c:310 exit_to_user_mode_loop kernel/entry/common.c:105 [inline] exit_to_user_mode_prepare include/linux/entry-common.h:328 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:201 [inline] syscall_exit_to_user_mode+0xc9/0x360 kernel/entry/common.c:212 do_syscall_64+0x10a/0x240 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x6d/0x75 Memory state around the buggy address: ffff888028c6cc80: fa fc fc fc fa fc fc fc fa fc fc fc fa fc fc fc ffff888028c6cd00: fa fc fc fc fa fc fc fc 00 fc fc fc 06 fc fc fc >ffff888028c6cd80: fa fc fc fc fa fc fc fc fa fc fc fc 02 fc fc fc ^ ffff888028c6ce00: fa fc fc fc fa fc fc fc fa fc fc fc fa fc fc fc ffff888028c6ce80: fa fc fc fc fa fc fc fc fa fc fc fc fa fc fc fc Fixes: 423f38329d26 ("xsk: add umem fill queue support and mmap") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: "Björn Töpel" Cc: Magnus Karlsson Cc: Maciej Fijalkowski Cc: Jonathan Lemon Acked-by: Daniel Borkmann Link: https://lore.kernel.org/r/20240404202738.3634547-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/xdp/xsk.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 5c8e02d56fd4..e3bdfc517424 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1127,6 +1127,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, struct xsk_queue **q; int entries; + if (optlen < sizeof(entries)) + return -EINVAL; if (copy_from_sockptr(&entries, optval, sizeof(entries))) return -EFAULT; From 4a1b65d1e55d53b397cb27014208be1e04172670 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 5 Apr 2024 10:30:34 +0000 Subject: [PATCH 239/553] geneve: fix header validation in geneve[6]_xmit_skb [ Upstream commit d8a6213d70accb403b82924a1c229e733433a5ef ] syzbot is able to trigger an uninit-value in geneve_xmit() [1] Problem : While most ip tunnel helpers (like ip_tunnel_get_dsfield()) uses skb_protocol(skb, true), pskb_inet_may_pull() is only using skb->protocol. If anything else than ETH_P_IPV6 or ETH_P_IP is found in skb->protocol, pskb_inet_may_pull() does nothing at all. If a vlan tag was provided by the caller (af_packet in the syzbot case), the network header might not point to the correct location, and skb linear part could be smaller than expected. Add skb_vlan_inet_prepare() to perform a complete mac validation. Use this in geneve for the moment, I suspect we need to adopt this more broadly. v4 - Jakub reported v3 broke l2_tos_ttl_inherit.sh selftest - Only call __vlan_get_protocol() for vlan types. Link: https://lore.kernel.org/netdev/20240404100035.3270a7d5@kernel.org/ v2,v3 - Addressed Sabrina comments on v1 and v2 Link: https://lore.kernel.org/netdev/Zg1l9L2BNoZWZDZG@hog/ [1] BUG: KMSAN: uninit-value in geneve_xmit_skb drivers/net/geneve.c:910 [inline] BUG: KMSAN: uninit-value in geneve_xmit+0x302d/0x5420 drivers/net/geneve.c:1030 geneve_xmit_skb drivers/net/geneve.c:910 [inline] geneve_xmit+0x302d/0x5420 drivers/net/geneve.c:1030 __netdev_start_xmit include/linux/netdevice.h:4903 [inline] netdev_start_xmit include/linux/netdevice.h:4917 [inline] xmit_one net/core/dev.c:3531 [inline] dev_hard_start_xmit+0x247/0xa20 net/core/dev.c:3547 __dev_queue_xmit+0x348d/0x52c0 net/core/dev.c:4335 dev_queue_xmit include/linux/netdevice.h:3091 [inline] packet_xmit+0x9c/0x6c0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3081 [inline] packet_sendmsg+0x8bb0/0x9ef0 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:745 __sys_sendto+0x685/0x830 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1d0 net/socket.c:2199 do_syscall_64+0xd5/0x1f0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 Uninit was created at: slab_post_alloc_hook mm/slub.c:3804 [inline] slab_alloc_node mm/slub.c:3845 [inline] kmem_cache_alloc_node+0x613/0xc50 mm/slub.c:3888 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:577 __alloc_skb+0x35b/0x7a0 net/core/skbuff.c:668 alloc_skb include/linux/skbuff.h:1318 [inline] alloc_skb_with_frags+0xc8/0xbf0 net/core/skbuff.c:6504 sock_alloc_send_pskb+0xa81/0xbf0 net/core/sock.c:2795 packet_alloc_skb net/packet/af_packet.c:2930 [inline] packet_snd net/packet/af_packet.c:3024 [inline] packet_sendmsg+0x722d/0x9ef0 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:745 __sys_sendto+0x685/0x830 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1d0 net/socket.c:2199 do_syscall_64+0xd5/0x1f0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 CPU: 0 PID: 5033 Comm: syz-executor346 Not tainted 6.9.0-rc1-syzkaller-00005-g928a87efa423 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/29/2024 Fixes: d13f048dd40e ("net: geneve: modify IP header check in geneve6_xmit_skb and geneve_xmit_skb") Reported-by: syzbot+9ee20ec1de7b3168db09@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/000000000000d19c3a06152f9ee4@google.com/ Signed-off-by: Eric Dumazet Cc: Phillip Potter Cc: Sabrina Dubroca Reviewed-by: Sabrina Dubroca Reviewed-by: Phillip Potter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/geneve.c | 4 ++-- include/net/ip_tunnels.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 3f8da6f0b25c..488ca1c85496 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -930,7 +930,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!pskb_inet_may_pull(skb)) + if (!skb_vlan_inet_prepare(skb)) return -EINVAL; sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); @@ -1028,7 +1028,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!pskb_inet_may_pull(skb)) + if (!skb_vlan_inet_prepare(skb)) return -EINVAL; sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index bca80522f95c..f9906b73e7ff 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -351,6 +351,39 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) return pskb_network_may_pull(skb, nhlen); } +/* Variant of pskb_inet_may_pull(). + */ +static inline bool skb_vlan_inet_prepare(struct sk_buff *skb) +{ + int nhlen = 0, maclen = ETH_HLEN; + __be16 type = skb->protocol; + + /* Essentially this is skb_protocol(skb, true) + * And we get MAC len. + */ + if (eth_type_vlan(type)) + type = __vlan_get_protocol(skb, type, &maclen); + + switch (type) { +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + nhlen = sizeof(struct ipv6hdr); + break; +#endif + case htons(ETH_P_IP): + nhlen = sizeof(struct iphdr); + break; + } + /* For ETH_P_IPV6/ETH_P_IP we make sure to pull + * a base network header in skb->head. + */ + if (!pskb_may_pull(skb, maclen + nhlen)) + return false; + + skb_set_network_header(skb, maclen); + return true; +} + static inline int ip_encap_hlen(struct ip_tunnel_encap *e) { const struct ip_tunnel_encap_ops *ops; From ecedcd7e398542c3e594605de9b44ac8981f17c2 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Fri, 5 Apr 2024 16:55:13 -0700 Subject: [PATCH 240/553] bnxt_en: Reset PTP tx_avail after possible firmware reset [ Upstream commit faa12ca245585379d612736a4b5e98e88481ea59 ] It is possible that during error recovery and firmware reset, there is a pending TX PTP packet waiting for the timestamp. We need to reset this condition so that after recovery, the tx_avail count for PTP is reset back to the initial value. Otherwise, we may not accept any PTP TX timestamps after recovery. Fixes: 118612d519d8 ("bnxt_en: Add PTP clock APIs, ioctls, and ethtool methods") Reviewed-by: Kalesh AP Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f810b5dc25f0..0d0aad7141c1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10564,6 +10564,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) /* VF-reps may need to be re-opened after the PF is re-opened */ if (BNXT_PF(bp)) bnxt_vf_reps_open(bp); + if (bp->ptp_cfg) + atomic_set(&bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS); bnxt_ptp_init_rtc(bp, true); bnxt_ptp_cfg_tstamp_filters(bp); return 0; From be033154523f554cf1d3af9e6bbe41b756fab2ff Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 5 Apr 2024 22:30:39 +0200 Subject: [PATCH 241/553] net: ks8851: Inline ks8851_rx_skb() [ Upstream commit f96f700449b6d190e06272f1cf732ae8e45b73df ] Both ks8851_rx_skb_par() and ks8851_rx_skb_spi() call netif_rx(skb), inline the netif_rx(skb) call directly into ks8851_common.c and drop the .rx_skb callback and ks8851_rx_skb() wrapper. This removes one indirect call from the driver, no functional change otherwise. Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20240405203204.82062-1-marex@denx.de Signed-off-by: Jakub Kicinski Stable-dep-of: be0384bf599c ("net: ks8851: Handle softirqs at the end of IRQ thread to fix hang") Signed-off-by: Sasha Levin --- drivers/net/ethernet/micrel/ks8851.h | 3 --- drivers/net/ethernet/micrel/ks8851_common.c | 12 +----------- drivers/net/ethernet/micrel/ks8851_par.c | 11 ----------- drivers/net/ethernet/micrel/ks8851_spi.c | 11 ----------- 4 files changed, 1 insertion(+), 36 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h index e5ec0a363aff..31f75b4a67fd 100644 --- a/drivers/net/ethernet/micrel/ks8851.h +++ b/drivers/net/ethernet/micrel/ks8851.h @@ -368,7 +368,6 @@ union ks8851_tx_hdr { * @rdfifo: FIFO read callback * @wrfifo: FIFO write callback * @start_xmit: start_xmit() implementation callback - * @rx_skb: rx_skb() implementation callback * @flush_tx_work: flush_tx_work() implementation callback * * The @statelock is used to protect information in the structure which may @@ -423,8 +422,6 @@ struct ks8851_net { struct sk_buff *txp, bool irq); netdev_tx_t (*start_xmit)(struct sk_buff *skb, struct net_device *dev); - void (*rx_skb)(struct ks8851_net *ks, - struct sk_buff *skb); void (*flush_tx_work)(struct ks8851_net *ks); }; diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 0bf13b38b8f5..896d43bb8883 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -231,16 +231,6 @@ static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt) rxpkt[12], rxpkt[13], rxpkt[14], rxpkt[15]); } -/** - * ks8851_rx_skb - receive skbuff - * @ks: The device state. - * @skb: The skbuff - */ -static void ks8851_rx_skb(struct ks8851_net *ks, struct sk_buff *skb) -{ - ks->rx_skb(ks, skb); -} - /** * ks8851_rx_pkts - receive packets from the host * @ks: The device information. @@ -309,7 +299,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) ks8851_dbg_dumpkkt(ks, rxpkt); skb->protocol = eth_type_trans(skb, ks->netdev); - ks8851_rx_skb(ks, skb); + netif_rx(skb); ks->netdev->stats.rx_packets++; ks->netdev->stats.rx_bytes += rxlen; diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c index 7f49042484bd..96fb0ffcedb9 100644 --- a/drivers/net/ethernet/micrel/ks8851_par.c +++ b/drivers/net/ethernet/micrel/ks8851_par.c @@ -210,16 +210,6 @@ static void ks8851_wrfifo_par(struct ks8851_net *ks, struct sk_buff *txp, iowrite16_rep(ksp->hw_addr, txp->data, len / 2); } -/** - * ks8851_rx_skb_par - receive skbuff - * @ks: The device state. - * @skb: The skbuff - */ -static void ks8851_rx_skb_par(struct ks8851_net *ks, struct sk_buff *skb) -{ - netif_rx(skb); -} - static unsigned int ks8851_rdreg16_par_txqcr(struct ks8851_net *ks) { return ks8851_rdreg16_par(ks, KS_TXQCR); @@ -298,7 +288,6 @@ static int ks8851_probe_par(struct platform_device *pdev) ks->rdfifo = ks8851_rdfifo_par; ks->wrfifo = ks8851_wrfifo_par; ks->start_xmit = ks8851_start_xmit_par; - ks->rx_skb = ks8851_rx_skb_par; #define STD_IRQ (IRQ_LCI | /* Link Change */ \ IRQ_RXI | /* RX done */ \ diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c index 88e26c120b48..4dcbff789b19 100644 --- a/drivers/net/ethernet/micrel/ks8851_spi.c +++ b/drivers/net/ethernet/micrel/ks8851_spi.c @@ -298,16 +298,6 @@ static unsigned int calc_txlen(unsigned int len) return ALIGN(len + 4, 4); } -/** - * ks8851_rx_skb_spi - receive skbuff - * @ks: The device state - * @skb: The skbuff - */ -static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb) -{ - netif_rx(skb); -} - /** * ks8851_tx_work - process tx packet(s) * @work: The work strucutre what was scheduled. @@ -435,7 +425,6 @@ static int ks8851_probe_spi(struct spi_device *spi) ks->rdfifo = ks8851_rdfifo_spi; ks->wrfifo = ks8851_wrfifo_spi; ks->start_xmit = ks8851_start_xmit_spi; - ks->rx_skb = ks8851_rx_skb_spi; ks->flush_tx_work = ks8851_flush_tx_work_spi; #define STD_IRQ (IRQ_LCI | /* Link Change */ \ From 492337a4fbd1421b42df684ee9b34be2a2722540 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 5 Apr 2024 22:30:40 +0200 Subject: [PATCH 242/553] net: ks8851: Handle softirqs at the end of IRQ thread to fix hang [ Upstream commit be0384bf599cf1eb8d337517feeb732d71f75a6f ] The ks8851_irq() thread may call ks8851_rx_pkts() in case there are any packets in the MAC FIFO, which calls netif_rx(). This netif_rx() implementation is guarded by local_bh_disable() and local_bh_enable(). The local_bh_enable() may call do_softirq() to run softirqs in case any are pending. One of the softirqs is net_rx_action, which ultimately reaches the driver .start_xmit callback. If that happens, the system hangs. The entire call chain is below: ks8851_start_xmit_par from netdev_start_xmit netdev_start_xmit from dev_hard_start_xmit dev_hard_start_xmit from sch_direct_xmit sch_direct_xmit from __dev_queue_xmit __dev_queue_xmit from __neigh_update __neigh_update from neigh_update neigh_update from arp_process.constprop.0 arp_process.constprop.0 from __netif_receive_skb_one_core __netif_receive_skb_one_core from process_backlog process_backlog from __napi_poll.constprop.0 __napi_poll.constprop.0 from net_rx_action net_rx_action from __do_softirq __do_softirq from call_with_stack call_with_stack from do_softirq do_softirq from __local_bh_enable_ip __local_bh_enable_ip from netif_rx netif_rx from ks8851_irq ks8851_irq from irq_thread_fn irq_thread_fn from irq_thread irq_thread from kthread kthread from ret_from_fork The hang happens because ks8851_irq() first locks a spinlock in ks8851_par.c ks8851_lock_par() spin_lock_irqsave(&ksp->lock, ...) and with that spinlock locked, calls netif_rx(). Once the execution reaches ks8851_start_xmit_par(), it calls ks8851_lock_par() again which attempts to claim the already locked spinlock again, and the hang happens. Move the do_softirq() call outside of the spinlock protected section of ks8851_irq() by disabling BHs around the entire spinlock protected section of ks8851_irq() handler. Place local_bh_enable() outside of the spinlock protected section, so that it can trigger do_softirq() without the ks8851_par.c ks8851_lock_par() spinlock being held, and safely call ks8851_start_xmit_par() without attempting to lock the already locked spinlock. Since ks8851_irq() is protected by local_bh_disable()/local_bh_enable() now, replace netif_rx() with __netif_rx() which is not duplicating the local_bh_disable()/local_bh_enable() calls. Fixes: 797047f875b5 ("net: ks8851: Implement Parallel bus operations") Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20240405203204.82062-2-marex@denx.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/micrel/ks8851_common.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 896d43bb8883..d4cdf3d4f552 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -299,7 +299,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) ks8851_dbg_dumpkkt(ks, rxpkt); skb->protocol = eth_type_trans(skb, ks->netdev); - netif_rx(skb); + __netif_rx(skb); ks->netdev->stats.rx_packets++; ks->netdev->stats.rx_bytes += rxlen; @@ -330,6 +330,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) unsigned long flags; unsigned int status; + local_bh_disable(); + ks8851_lock(ks, &flags); status = ks8851_rdreg16(ks, KS_ISR); @@ -406,6 +408,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) if (status & IRQ_LCI) mii_check_link(&ks->mii); + local_bh_enable(); + return IRQ_HANDLED; } From 84a352b7eba1142a95441380058985ff19f25ec9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 5 Apr 2024 15:10:57 -0700 Subject: [PATCH 243/553] af_unix: Clear stale u->oob_skb. [ Upstream commit b46f4eaa4f0ec38909fb0072eea3aeddb32f954e ] syzkaller started to report deadlock of unix_gc_lock after commit 4090fa373f0e ("af_unix: Replace garbage collection algorithm."), but it just uncovers the bug that has been there since commit 314001f0bf92 ("af_unix: Add OOB support"). The repro basically does the following. from socket import * from array import array c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) c1.sendmsg([b'a'], [(SOL_SOCKET, SCM_RIGHTS, array("i", [c2.fileno()]))], MSG_OOB) c2.recv(1) # blocked as no normal data in recv queue c2.close() # done async and unblock recv() c1.close() # done async and trigger GC A socket sends its file descriptor to itself as OOB data and tries to receive normal data, but finally recv() fails due to async close(). The problem here is wrong handling of OOB skb in manage_oob(). When recvmsg() is called without MSG_OOB, manage_oob() is called to check if the peeked skb is OOB skb. In such a case, manage_oob() pops it out of the receive queue but does not clear unix_sock(sk)->oob_skb. This is wrong in terms of uAPI. Let's say we send "hello" with MSG_OOB, and "world" without MSG_OOB. The 'o' is handled as OOB data. When recv() is called twice without MSG_OOB, the OOB data should be lost. >>> from socket import * >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM, 0) >>> c1.send(b'hello', MSG_OOB) # 'o' is OOB data 5 >>> c1.send(b'world') 5 >>> c2.recv(5) # OOB data is not received b'hell' >>> c2.recv(5) # OOB date is skipped b'world' >>> c2.recv(5, MSG_OOB) # This should return an error b'o' In the same situation, TCP actually returns -EINVAL for the last recv(). Also, if we do not clear unix_sk(sk)->oob_skb, unix_poll() always set EPOLLPRI even though the data has passed through by previous recv(). To avoid these issues, we must clear unix_sk(sk)->oob_skb when dequeuing it from recv queue. The reason why the old GC did not trigger the deadlock is because the old GC relied on the receive queue to detect the loop. When it is triggered, the socket with OOB data is marked as GC candidate because file refcount == inflight count (1). However, after traversing all inflight sockets, the socket still has a positive inflight count (1), thus the socket is excluded from candidates. Then, the old GC lose the chance to garbage-collect the socket. With the old GC, the repro continues to create true garbage that will never be freed nor detected by kmemleak as it's linked to the global inflight list. That's why we couldn't even notice the issue. Fixes: 314001f0bf92 ("af_unix: Add OOB support") Reported-by: syzbot+7f7f201cc2668a8fd169@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7f7f201cc2668a8fd169 Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240405221057.2406-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/unix/af_unix.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e1af94393789..373530303ad1 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2677,7 +2677,9 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, } } else if (!(flags & MSG_PEEK)) { skb_unlink(skb, &sk->sk_receive_queue); - consume_skb(skb); + WRITE_ONCE(u->oob_skb, NULL); + if (!WARN_ON_ONCE(skb_unref(skb))) + kfree_skb(skb); skb = skb_peek(&sk->sk_receive_queue); } } From 7e33f68791eb84c4f2457855fecfc592710accc8 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Mon, 8 Apr 2024 12:06:43 +0530 Subject: [PATCH 244/553] octeontx2-af: Fix NIX SQ mode and BP config [ Upstream commit faf23006185e777db18912685922c5ddb2df383f ] NIX SQ mode and link backpressure configuration is required for all platforms. But in current driver this code is wrongly placed under specific platform check. This patch fixes the issue by moving the code out of platform check. Fixes: 5d9b976d4480 ("octeontx2-af: Support fixed transmit scheduler topology") Signed-off-by: Geetha sowjanya Link: https://lore.kernel.org/r/20240408063643.26288-1-gakula@marvell.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../ethernet/marvell/octeontx2/af/rvu_nix.c | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index bb99302eab67..67080d5053e0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4237,19 +4237,19 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) */ rvu_write64(rvu, blkaddr, NIX_AF_CFG, rvu_read64(rvu, blkaddr, NIX_AF_CFG) | 0x40ULL); - - /* Set chan/link to backpressure TL3 instead of TL2 */ - rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01); - - /* Disable SQ manager's sticky mode operation (set TM6 = 0) - * This sticky mode is known to cause SQ stalls when multiple - * SQs are mapped to same SMQ and transmitting pkts at a time. - */ - cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS); - cfg &= ~BIT_ULL(15); - rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg); } + /* Set chan/link to backpressure TL3 instead of TL2 */ + rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01); + + /* Disable SQ manager's sticky mode operation (set TM6 = 0) + * This sticky mode is known to cause SQ stalls when multiple + * SQs are mapped to same SMQ and transmitting pkts at a time. + */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS); + cfg &= ~BIT_ULL(15); + rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg); + ltdefs = rvu->kpu.lt_def; /* Calibrate X2P bus to check if CGX/LBK links are fine */ err = nix_calibrate_x2p(rvu, blkaddr); From 2c46877f5f935099604c53c9ad3118bae6770108 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 8 Apr 2024 09:42:02 +0200 Subject: [PATCH 245/553] ipv6: fib: hide unused 'pn' variable [ Upstream commit 74043489fcb5e5ca4074133582b5b8011b67f9e7 ] When CONFIG_IPV6_SUBTREES is disabled, the only user is hidden, causing a 'make W=1' warning: net/ipv6/ip6_fib.c: In function 'fib6_add': net/ipv6/ip6_fib.c:1388:32: error: variable 'pn' set but not used [-Werror=unused-but-set-variable] Add another #ifdef around the variable declaration, matching the other uses in this file. Fixes: 66729e18df08 ("[IPV6] ROUTE: Make sure we have fn->leaf when adding a node on subtree.") Link: https://lore.kernel.org/netdev/20240322131746.904943-1-arnd@kernel.org/ Reviewed-by: David Ahern Signed-off-by: Arnd Bergmann Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240408074219.3030256-1-arnd@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv6/ip6_fib.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e606374854ce..8213626434b9 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1376,7 +1376,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack) { struct fib6_table *table = rt->fib6_table; - struct fib6_node *fn, *pn = NULL; + struct fib6_node *fn; +#ifdef CONFIG_IPV6_SUBTREES + struct fib6_node *pn = NULL; +#endif int err = -ENOMEM; int allow_create = 1; int replace_required = 0; @@ -1400,9 +1403,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, goto out; } +#ifdef CONFIG_IPV6_SUBTREES pn = fn; -#ifdef CONFIG_IPV6_SUBTREES if (rt->fib6_src.plen) { struct fib6_node *sn; From 03d564999fa8a5ed01965628c39f70a69be19485 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 8 Apr 2024 09:42:03 +0200 Subject: [PATCH 246/553] ipv4/route: avoid unused-but-set-variable warning [ Upstream commit cf1b7201df59fb936f40f4a807433fe3f2ce310a ] The log_martians variable is only used in an #ifdef, causing a 'make W=1' warning with gcc: net/ipv4/route.c: In function 'ip_rt_send_redirect': net/ipv4/route.c:880:13: error: variable 'log_martians' set but not used [-Werror=unused-but-set-variable] Change the #ifdef to an equivalent IS_ENABLED() to let the compiler see where the variable is used. Fixes: 30038fc61adf ("net: ip_rt_send_redirect() optimization") Reviewed-by: David Ahern Signed-off-by: Arnd Bergmann Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240408074219.3030256-2-arnd@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/route.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 474f391fab35..a0c687ff2598 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -926,13 +926,11 @@ void ip_rt_send_redirect(struct sk_buff *skb) icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->n_redirects; -#ifdef CONFIG_IP_ROUTE_VERBOSE - if (log_martians && + if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians && peer->n_redirects == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", &ip_hdr(skb)->saddr, inet_iif(skb), &ip_hdr(skb)->daddr, &gw); -#endif } out_put_peer: inet_putpeer(peer); From de76ae9ea1a6cf9e77fcec4f2df2904e26c23ceb Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Mon, 8 Apr 2024 16:18:21 +0200 Subject: [PATCH 247/553] ipv6: fix race condition between ipv6_get_ifaddr and ipv6_del_addr [ Upstream commit 7633c4da919ad51164acbf1aa322cc1a3ead6129 ] Although ipv6_get_ifaddr walks inet6_addr_lst under the RCU lock, it still means hlist_for_each_entry_rcu can return an item that got removed from the list. The memory itself of such item is not freed thanks to RCU but nothing guarantees the actual content of the memory is sane. In particular, the reference count can be zero. This can happen if ipv6_del_addr is called in parallel. ipv6_del_addr removes the entry from inet6_addr_lst (hlist_del_init_rcu(&ifp->addr_lst)) and drops all references (__in6_ifa_put(ifp) + in6_ifa_put(ifp)). With bad enough timing, this can happen: 1. In ipv6_get_ifaddr, hlist_for_each_entry_rcu returns an entry. 2. Then, the whole ipv6_del_addr is executed for the given entry. The reference count drops to zero and kfree_rcu is scheduled. 3. ipv6_get_ifaddr continues and tries to increments the reference count (in6_ifa_hold). 4. The rcu is unlocked and the entry is freed. 5. The freed entry is returned. Prevent increasing of the reference count in such case. The name in6_ifa_hold_safe is chosen to mimic the existing fib6_info_hold_safe. [ 41.506330] refcount_t: addition on 0; use-after-free. [ 41.506760] WARNING: CPU: 0 PID: 595 at lib/refcount.c:25 refcount_warn_saturate+0xa5/0x130 [ 41.507413] Modules linked in: veth bridge stp llc [ 41.507821] CPU: 0 PID: 595 Comm: python3 Not tainted 6.9.0-rc2.main-00208-g49563be82afa #14 [ 41.508479] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) [ 41.509163] RIP: 0010:refcount_warn_saturate+0xa5/0x130 [ 41.509586] Code: ad ff 90 0f 0b 90 90 c3 cc cc cc cc 80 3d c0 30 ad 01 00 75 a0 c6 05 b7 30 ad 01 01 90 48 c7 c7 38 cc 7a 8c e8 cc 18 ad ff 90 <0f> 0b 90 90 c3 cc cc cc cc 80 3d 98 30 ad 01 00 0f 85 75 ff ff ff [ 41.510956] RSP: 0018:ffffbda3c026baf0 EFLAGS: 00010282 [ 41.511368] RAX: 0000000000000000 RBX: ffff9e9c46914800 RCX: 0000000000000000 [ 41.511910] RDX: ffff9e9c7ec29c00 RSI: ffff9e9c7ec1c900 RDI: ffff9e9c7ec1c900 [ 41.512445] RBP: ffff9e9c43660c9c R08: 0000000000009ffb R09: 00000000ffffdfff [ 41.512998] R10: 00000000ffffdfff R11: ffffffff8ca58a40 R12: ffff9e9c4339a000 [ 41.513534] R13: 0000000000000001 R14: ffff9e9c438a0000 R15: ffffbda3c026bb48 [ 41.514086] FS: 00007fbc4cda1740(0000) GS:ffff9e9c7ec00000(0000) knlGS:0000000000000000 [ 41.514726] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 41.515176] CR2: 000056233b337d88 CR3: 000000000376e006 CR4: 0000000000370ef0 [ 41.515713] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 41.516252] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 41.516799] Call Trace: [ 41.517037] [ 41.517249] ? __warn+0x7b/0x120 [ 41.517535] ? refcount_warn_saturate+0xa5/0x130 [ 41.517923] ? report_bug+0x164/0x190 [ 41.518240] ? handle_bug+0x3d/0x70 [ 41.518541] ? exc_invalid_op+0x17/0x70 [ 41.520972] ? asm_exc_invalid_op+0x1a/0x20 [ 41.521325] ? refcount_warn_saturate+0xa5/0x130 [ 41.521708] ipv6_get_ifaddr+0xda/0xe0 [ 41.522035] inet6_rtm_getaddr+0x342/0x3f0 [ 41.522376] ? __pfx_inet6_rtm_getaddr+0x10/0x10 [ 41.522758] rtnetlink_rcv_msg+0x334/0x3d0 [ 41.523102] ? netlink_unicast+0x30f/0x390 [ 41.523445] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 [ 41.523832] netlink_rcv_skb+0x53/0x100 [ 41.524157] netlink_unicast+0x23b/0x390 [ 41.524484] netlink_sendmsg+0x1f2/0x440 [ 41.524826] __sys_sendto+0x1d8/0x1f0 [ 41.525145] __x64_sys_sendto+0x1f/0x30 [ 41.525467] do_syscall_64+0xa5/0x1b0 [ 41.525794] entry_SYSCALL_64_after_hwframe+0x72/0x7a [ 41.526213] RIP: 0033:0x7fbc4cfcea9a [ 41.526528] Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89 [ 41.527942] RSP: 002b:00007ffcf54012a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 41.528593] RAX: ffffffffffffffda RBX: 00007ffcf5401368 RCX: 00007fbc4cfcea9a [ 41.529173] RDX: 000000000000002c RSI: 00007fbc4b9d9bd0 RDI: 0000000000000005 [ 41.529786] RBP: 00007fbc4bafb040 R08: 00007ffcf54013e0 R09: 000000000000000c [ 41.530375] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [ 41.530977] R13: ffffffffc4653600 R14: 0000000000000001 R15: 00007fbc4ca85d1b [ 41.531573] Fixes: 5c578aedcb21d ("IPv6: convert addrconf hash list to RCU") Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: Jiri Benc Link: https://lore.kernel.org/r/8ab821e36073a4a406c50ec83c9e8dc586c539e4.1712585809.git.jbenc@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/addrconf.h | 4 ++++ net/ipv6/addrconf.c | 7 ++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 86eb2aba1479..5bcc63eade03 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -437,6 +437,10 @@ static inline void in6_ifa_hold(struct inet6_ifaddr *ifp) refcount_inc(&ifp->refcnt); } +static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp) +{ + return refcount_inc_not_zero(&ifp->refcnt); +} /* * compute link-local solicited-node multicast address diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1648373692a9..3866deaadbb6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2050,9 +2050,10 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add if (ipv6_addr_equal(&ifp->addr, addr)) { if (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { - result = ifp; - in6_ifa_hold(ifp); - break; + if (in6_ifa_hold_safe(ifp)) { + result = ifp; + break; + } } } } From 7bc65d23ba20dcd7ecc094a12c181e594e5eb315 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 5 Apr 2024 15:41:52 -0400 Subject: [PATCH 248/553] Bluetooth: SCO: Fix not validating setsockopt user input [ Upstream commit 51eda36d33e43201e7a4fd35232e069b2c850b01 ] syzbot reported sco_sock_setsockopt() is copying data without checking user input length. BUG: KASAN: slab-out-of-bounds in copy_from_sockptr_offset include/linux/sockptr.h:49 [inline] BUG: KASAN: slab-out-of-bounds in copy_from_sockptr include/linux/sockptr.h:55 [inline] BUG: KASAN: slab-out-of-bounds in sco_sock_setsockopt+0xc0b/0xf90 net/bluetooth/sco.c:893 Read of size 4 at addr ffff88805f7b15a3 by task syz-executor.5/12578 Fixes: ad10b1a48754 ("Bluetooth: Add Bluetooth socket voice option") Fixes: b96e9c671b05 ("Bluetooth: Add BT_DEFER_SETUP option to sco socket") Fixes: 00398e1d5183 ("Bluetooth: Add support for BT_PKT_STATUS CMSG data for SCO connections") Fixes: f6873401a608 ("Bluetooth: Allow setting of codec for HFP offload use case") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- include/net/bluetooth/bluetooth.h | 9 +++++++++ net/bluetooth/sco.c | 23 ++++++++++------------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index bcc5a4cd2c17..5aaf7d7f3c6f 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -565,6 +565,15 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, return skb; } +static inline int bt_copy_from_sockptr(void *dst, size_t dst_size, + sockptr_t src, size_t src_size) +{ + if (dst_size > src_size) + return -EINVAL; + + return copy_from_sockptr(dst, src, dst_size); +} + int bt_to_errno(u16 code); __u8 bt_status(int err); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 6d4168cfeb56..2e9137c539a4 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -831,7 +831,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; - int len, err = 0; + int err = 0; struct bt_voice voice; u32 opt; struct bt_codecs *codecs; @@ -850,10 +850,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt) set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); @@ -870,11 +869,10 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, voice.setting = sco_pi(sk)->setting; - len = min_t(unsigned int, sizeof(voice), optlen); - if (copy_from_sockptr(&voice, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&voice, sizeof(voice), optval, + optlen); + if (err) break; - } /* Explicitly check for these values */ if (voice.setting != BT_VOICE_TRANSPARENT && @@ -897,10 +895,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_PKT_STATUS: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt) sco_pi(sk)->cmsg_mask |= SCO_CMSG_PKT_STATUS; @@ -941,9 +938,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(buffer, optval, optlen)) { + err = bt_copy_from_sockptr(buffer, optlen, optval, optlen); + if (err) { hci_dev_put(hdev); - err = -EFAULT; break; } From 9d42f373391211c7c8af66a3a316533a32b8a607 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 5 Apr 2024 15:50:47 -0400 Subject: [PATCH 249/553] Bluetooth: L2CAP: Fix not validating setsockopt user input [ Upstream commit 4f3951242ace5efc7131932e2e01e6ac6baed846 ] Check user input length before copying data. Fixes: 33575df7be67 ("Bluetooth: move l2cap_sock_setsockopt() to l2cap_sock.c") Fixes: 3ee7b7cd8390 ("Bluetooth: Add BT_MODE socket option") Signed-off-by: Eric Dumazet Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/l2cap_sock.c | 52 +++++++++++++++----------------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 947ca580bb9a..4198ca66fbe1 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -745,7 +745,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, struct sock *sk = sock->sk; struct l2cap_chan *chan = l2cap_pi(sk)->chan; struct l2cap_options opts; - int len, err = 0; + int err = 0; u32 opt; BT_DBG("sk %p", sk); @@ -772,11 +772,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, opts.max_tx = chan->max_tx; opts.txwin_size = chan->tx_win; - len = min_t(unsigned int, sizeof(opts), optlen); - if (copy_from_sockptr(&opts, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opts, sizeof(opts), optval, optlen); + if (err) break; - } if (opts.txwin_size > L2CAP_DEFAULT_EXT_WINDOW) { err = -EINVAL; @@ -819,10 +817,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, break; case L2CAP_LM: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt & L2CAP_LM_FIPS) { err = -EINVAL; @@ -903,7 +900,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, struct bt_security sec; struct bt_power pwr; struct l2cap_conn *conn; - int len, err = 0; + int err = 0; u32 opt; u16 mtu; u8 mode; @@ -929,11 +926,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, sec.level = BT_SECURITY_LOW; - len = min_t(unsigned int, sizeof(sec), optlen); - if (copy_from_sockptr(&sec, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen); + if (err) break; - } if (sec.level < BT_SECURITY_LOW || sec.level > BT_SECURITY_FIPS) { @@ -978,10 +973,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt) { set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); @@ -993,10 +987,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_FLUSHABLE: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt > BT_FLUSHABLE_ON) { err = -EINVAL; @@ -1028,11 +1021,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, pwr.force_active = BT_POWER_FORCE_ACTIVE_ON; - len = min_t(unsigned int, sizeof(pwr), optlen); - if (copy_from_sockptr(&pwr, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&pwr, sizeof(pwr), optval, optlen); + if (err) break; - } if (pwr.force_active) set_bit(FLAG_FORCE_ACTIVE, &chan->flags); @@ -1041,10 +1032,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_CHANNEL_POLICY: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt > BT_CHANNEL_POLICY_AMP_PREFERRED) { err = -EINVAL; @@ -1089,10 +1079,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&mtu, optval, sizeof(u16))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&mtu, sizeof(mtu), optval, optlen); + if (err) break; - } if (chan->mode == L2CAP_MODE_EXT_FLOWCTL && sk->sk_state == BT_CONNECTED) @@ -1120,10 +1109,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&mode, optval, sizeof(u8))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&mode, sizeof(mode), optval, optlen); + if (err) break; - } BT_DBG("mode %u", mode); From c760089aa98289b4b88a7ff5a62dd92845adf223 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 Apr 2024 12:07:41 +0000 Subject: [PATCH 250/553] netfilter: complete validation of user input [ Upstream commit 65acf6e0501ac8880a4f73980d01b5d27648b956 ] In my recent commit, I missed that do_replace() handlers use copy_from_sockptr() (which I fixed), followed by unsafe copy_from_sockptr_offset() calls. In all functions, we can perform the @optlen validation before even calling xt_alloc_table_info() with the following check: if ((u64)optlen < (u64)tmp.size + sizeof(tmp)) return -EINVAL; Fixes: 0c83842df40f ("netfilter: validate user input for expected length") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Pablo Neira Ayuso Link: https://lore.kernel.org/r/20240409120741.3538135-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/netfilter/arp_tables.c | 4 ++++ net/ipv4/netfilter/ip_tables.c | 4 ++++ net/ipv6/netfilter/ip6_tables.c | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index b150c9929b12..14365b20f1c5 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -966,6 +966,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; @@ -1266,6 +1268,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 1f365e28e316..a6208efcfccf 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1120,6 +1120,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; @@ -1506,6 +1508,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 37a2b3301e42..b844e519da1b 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1137,6 +1137,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; @@ -1515,6 +1517,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; + if ((u64)len < (u64)tmp.size + sizeof(tmp)) + return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; From 2e8dc5cffc844dacfa79f056dea88002312f253f Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 9 Apr 2024 22:08:12 +0300 Subject: [PATCH 251/553] net/mlx5: Properly link new fs rules into the tree [ Upstream commit 7c6782ad4911cbee874e85630226ed389ff2e453 ] Previously, add_rule_fg would only add newly created rules from the handle into the tree when they had a refcount of 1. On the other hand, create_flow_handle tries hard to find and reference already existing identical rules instead of creating new ones. These two behaviors can result in a situation where create_flow_handle 1) creates a new rule and references it, then 2) in a subsequent step during the same handle creation references it again, resulting in a rule with a refcount of 2 that is not linked into the tree, will have a NULL parent and root and will result in a crash when the flow group is deleted because del_sw_hw_rule, invoked on rule deletion, assumes node->parent is != NULL. This happened in the wild, due to another bug related to incorrect handling of duplicate pkt_reformat ids, which lead to the code in create_flow_handle incorrectly referencing a just-added rule in the same flow handle, resulting in the problem described above. Full details are at [1]. This patch changes add_rule_fg to add new rules without parents into the tree, properly initializing them and avoiding the crash. This makes it more consistent with how rules are added to an FTE in create_flow_handle. Fixes: 74491de93712 ("net/mlx5: Add multi dest support") Link: https://lore.kernel.org/netdev/ea5264d6-6b55-4449-a602-214c6f509c1e@163.com/T/#u [1] Signed-off-by: Cosmin Ratiu Reviewed-by: Tariq Toukan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240409190820.227554-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e6674118bc42..164e10b5f9b7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1752,8 +1752,9 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, } trace_mlx5_fs_set_fte(fte, false); + /* Link newly added rules into the tree. */ for (i = 0; i < handle->num_rules; i++) { - if (refcount_read(&handle->rule[i]->node.refcount) == 1) { + if (!handle->rule[i]->node.parent) { tree_add_node(&handle->rule[i]->node, &fte->node); trace_mlx5_fs_add_rule(handle->rule[i]); } From ad26f26abd353113dea4e8d5ebadccdab9b61e76 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Tue, 9 Apr 2024 22:08:15 +0300 Subject: [PATCH 252/553] net/mlx5e: Fix mlx5e_priv_init() cleanup flow [ Upstream commit ecb829459a841198e142f72fadab56424ae96519 ] When mlx5e_priv_init() fails, the cleanup flow calls mlx5e_selq_cleanup which calls mlx5e_selq_apply() that assures that the `priv->state_lock` is held using lockdep_is_held(). Acquire the state_lock in mlx5e_selq_cleanup(). Kernel log: ============================= WARNING: suspicious RCU usage 6.8.0-rc3_net_next_841a9b5 #1 Not tainted ----------------------------- drivers/net/ethernet/mellanox/mlx5/core/en/selq.c:124 suspicious rcu_dereference_protected() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by systemd-modules/293: #0: ffffffffa05067b0 (devices_rwsem){++++}-{3:3}, at: ib_register_client+0x109/0x1b0 [ib_core] #1: ffff8881096c65c0 (&device->client_data_rwsem){++++}-{3:3}, at: add_client_context+0x104/0x1c0 [ib_core] stack backtrace: CPU: 4 PID: 293 Comm: systemd-modules Not tainted 6.8.0-rc3_net_next_841a9b5 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x8a/0xa0 lockdep_rcu_suspicious+0x154/0x1a0 mlx5e_selq_apply+0x94/0xa0 [mlx5_core] mlx5e_selq_cleanup+0x3a/0x60 [mlx5_core] mlx5e_priv_init+0x2be/0x2f0 [mlx5_core] mlx5_rdma_setup_rn+0x7c/0x1a0 [mlx5_core] rdma_init_netdev+0x4e/0x80 [ib_core] ? mlx5_rdma_netdev_free+0x70/0x70 [mlx5_core] ipoib_intf_init+0x64/0x550 [ib_ipoib] ipoib_intf_alloc+0x4e/0xc0 [ib_ipoib] ipoib_add_one+0xb0/0x360 [ib_ipoib] add_client_context+0x112/0x1c0 [ib_core] ib_register_client+0x166/0x1b0 [ib_core] ? 0xffffffffa0573000 ipoib_init_module+0xeb/0x1a0 [ib_ipoib] do_one_initcall+0x61/0x250 do_init_module+0x8a/0x270 init_module_from_file+0x8b/0xd0 idempotent_init_module+0x17d/0x230 __x64_sys_finit_module+0x61/0xb0 do_syscall_64+0x71/0x140 entry_SYSCALL_64_after_hwframe+0x46/0x4e Fixes: 8bf30be75069 ("net/mlx5e: Introduce select queue parameters") Signed-off-by: Carolina Jubran Reviewed-by: Tariq Toukan Reviewed-by: Dragos Tatulea Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240409190820.227554-8-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/selq.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c index f675b1926340..f66bbc846464 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c @@ -57,6 +57,7 @@ int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock) void mlx5e_selq_cleanup(struct mlx5e_selq *selq) { + mutex_lock(selq->state_lock); WARN_ON_ONCE(selq->is_prepared); kvfree(selq->standby); @@ -67,6 +68,7 @@ void mlx5e_selq_cleanup(struct mlx5e_selq *selq) kvfree(selq->standby); selq->standby = NULL; + mutex_unlock(selq->state_lock); } void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9910a0480f58..e7d396434da3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5578,9 +5578,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv) kfree(priv->tx_rates); kfree(priv->txq2sq); destroy_workqueue(priv->wq); - mutex_lock(&priv->state_lock); mlx5e_selq_cleanup(&priv->selq); - mutex_unlock(&priv->state_lock); free_cpumask_var(priv->scratchpad.cpumask); for (i = 0; i < priv->htb_max_qos_sqs; i++) From 88a50c8a504823773373dd116c43ba0b93639790 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Tue, 9 Apr 2024 22:08:16 +0300 Subject: [PATCH 253/553] net/mlx5e: HTB, Fix inconsistencies with QoS SQs number [ Upstream commit 2f436f1869771d46e1a9f85738d5a1a7c5653a4e ] When creating a new HTB class while the interface is down, the variable that follows the number of QoS SQs (htb_max_qos_sqs) may not be consistent with the number of HTB classes. Previously, we compared these two values to ensure that the node_qid is lower than the number of QoS SQs, and we allocated stats for that SQ when they are equal. Change the check to compare the node_qid with the current number of leaf nodes and fix the checking conditions to ensure allocation of stats_list and stats for each node. Fixes: 214baf22870c ("net/mlx5e: Support HTB offload") Signed-off-by: Carolina Jubran Reviewed-by: Tariq Toukan Reviewed-by: Dragos Tatulea Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240409190820.227554-9-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en/qos.c | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index 2842195ee548..1e887d640cff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -82,24 +82,25 @@ int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs, txq_ix = mlx5e_qid_from_qos(chs, node_qid); - WARN_ON(node_qid > priv->htb_max_qos_sqs); - if (node_qid == priv->htb_max_qos_sqs) { - struct mlx5e_sq_stats *stats, **stats_list = NULL; + WARN_ON(node_qid >= mlx5e_htb_cur_leaf_nodes(priv->htb)); + if (!priv->htb_qos_sq_stats) { + struct mlx5e_sq_stats **stats_list; - if (priv->htb_max_qos_sqs == 0) { - stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev), - sizeof(*stats_list), - GFP_KERNEL); - if (!stats_list) - return -ENOMEM; - } - stats = kzalloc(sizeof(*stats), GFP_KERNEL); - if (!stats) { - kvfree(stats_list); + stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev), + sizeof(*stats_list), GFP_KERNEL); + if (!stats_list) return -ENOMEM; - } - if (stats_list) - WRITE_ONCE(priv->htb_qos_sq_stats, stats_list); + + WRITE_ONCE(priv->htb_qos_sq_stats, stats_list); + } + + if (!priv->htb_qos_sq_stats[node_qid]) { + struct mlx5e_sq_stats *stats; + + stats = kzalloc(sizeof(*stats), GFP_KERNEL); + if (!stats) + return -ENOMEM; + WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats); /* Order htb_max_qos_sqs increment after writing the array pointer. * Pairs with smp_load_acquire in en_stats.c. From 8edb087c44a43d2404276e9efc274f007da70166 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Tue, 9 Apr 2024 12:41:59 +0200 Subject: [PATCH 254/553] net: sparx5: fix wrong config being used when reconfiguring PCS [ Upstream commit 33623113a48ea906f1955cbf71094f6aa4462e8f ] The wrong port config is being used if the PCS is reconfigured. Fix this by correctly using the new config instead of the old one. Fixes: 946e7fd5053a ("net: sparx5: add port module support") Signed-off-by: Daniel Machon Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240409-link-mode-reconfiguration-fix-v2-1-db6a507f3627@microchip.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/sparx5/sparx5_port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c index 32709d21ab2f..212bf6f4ed72 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c @@ -730,7 +730,7 @@ static int sparx5_port_pcs_low_set(struct sparx5 *sparx5, bool sgmii = false, inband_aneg = false; int err; - if (port->conf.inband) { + if (conf->inband) { if (conf->portmode == PHY_INTERFACE_MODE_SGMII || conf->portmode == PHY_INTERFACE_MODE_QSGMII) inband_aneg = true; /* Cisco-SGMII in-band-aneg */ @@ -947,7 +947,7 @@ int sparx5_port_pcs_set(struct sparx5 *sparx5, if (err) return -EINVAL; - if (port->conf.inband) { + if (conf->inband) { /* Enable/disable 1G counters in ASM */ spx5_rmw(ASM_PORT_CFG_CSC_STAT_DIS_SET(high_speed_dev), ASM_PORT_CFG_CSC_STAT_DIS, From 19643bf8c9b5bb5eea5163bf2f6a3eee6fb5b99b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Tue, 9 Apr 2024 18:01:14 +0300 Subject: [PATCH 255/553] net: dsa: mt7530: trap link-local frames regardless of ST Port State MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 17c560113231ddc20088553c7b499b289b664311 ] In Clause 5 of IEEE Std 802-2014, two sublayers of the data link layer (DLL) of the Open Systems Interconnection basic reference model (OSI/RM) are described; the medium access control (MAC) and logical link control (LLC) sublayers. The MAC sublayer is the one facing the physical layer. In 8.2 of IEEE Std 802.1Q-2022, the Bridge architecture is described. A Bridge component comprises a MAC Relay Entity for interconnecting the Ports of the Bridge, at least two Ports, and higher layer entities with at least a Spanning Tree Protocol Entity included. Each Bridge Port also functions as an end station and shall provide the MAC Service to an LLC Entity. Each instance of the MAC Service is provided to a distinct LLC Entity that supports protocol identification, multiplexing, and demultiplexing, for protocol data unit (PDU) transmission and reception by one or more higher layer entities. It is described in 8.13.9 of IEEE Std 802.1Q-2022 that in a Bridge, the LLC Entity associated with each Bridge Port is modeled as being directly connected to the attached Local Area Network (LAN). On the switch with CPU port architecture, CPU port functions as Management Port, and the Management Port functionality is provided by software which functions as an end station. Software is connected to an IEEE 802 LAN that is wholly contained within the system that incorporates the Bridge. Software provides access to the LLC Entity associated with each Bridge Port by the value of the source port field on the special tag on the frame received by software. We call frames that carry control information to determine the active topology and current extent of each Virtual Local Area Network (VLAN), i.e., spanning tree or Shortest Path Bridging (SPB) and Multiple VLAN Registration Protocol Data Units (MVRPDUs), and frames from other link constrained protocols, such as Extensible Authentication Protocol over LAN (EAPOL) and Link Layer Discovery Protocol (LLDP), link-local frames. They are not forwarded by a Bridge. Permanently configured entries in the filtering database (FDB) ensure that such frames are discarded by the Forwarding Process. In 8.6.3 of IEEE Std 802.1Q-2022, this is described in detail: Each of the reserved MAC addresses specified in Table 8-1 (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]) shall be permanently configured in the FDB in C-VLAN components and ERs. Each of the reserved MAC addresses specified in Table 8-2 (01-80-C2-00-00-[01,02,03,04,05,06,07,08,09,0A,0E]) shall be permanently configured in the FDB in S-VLAN components. Each of the reserved MAC addresses specified in Table 8-3 (01-80-C2-00-00-[01,02,04,0E]) shall be permanently configured in the FDB in TPMR components. The FDB entries for reserved MAC addresses shall specify filtering for all Bridge Ports and all VIDs. Management shall not provide the capability to modify or remove entries for reserved MAC addresses. The addresses in Table 8-1, Table 8-2, and Table 8-3 determine the scope of propagation of PDUs within a Bridged Network, as follows: The Nearest Bridge group address (01-80-C2-00-00-0E) is an address that no conformant Two-Port MAC Relay (TPMR) component, Service VLAN (S-VLAN) component, Customer VLAN (C-VLAN) component, or MAC Bridge can forward. PDUs transmitted using this destination address, or any other addresses that appear in Table 8-1, Table 8-2, and Table 8-3 (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]), can therefore travel no further than those stations that can be reached via a single individual LAN from the originating station. The Nearest non-TPMR Bridge group address (01-80-C2-00-00-03), is an address that no conformant S-VLAN component, C-VLAN component, or MAC Bridge can forward; however, this address is relayed by a TPMR component. PDUs using this destination address, or any of the other addresses that appear in both Table 8-1 and Table 8-2 but not in Table 8-3 (01-80-C2-00-00-[00,03,05,06,07,08,09,0A,0B,0C,0D,0F]), will be relayed by any TPMRs but will propagate no further than the nearest S-VLAN component, C-VLAN component, or MAC Bridge. The Nearest Customer Bridge group address (01-80-C2-00-00-00) is an address that no conformant C-VLAN component, MAC Bridge can forward; however, it is relayed by TPMR components and S-VLAN components. PDUs using this destination address, or any of the other addresses that appear in Table 8-1 but not in either Table 8-2 or Table 8-3 (01-80-C2-00-00-[00,0B,0C,0D,0F]), will be relayed by TPMR components and S-VLAN components but will propagate no further than the nearest C-VLAN component or MAC Bridge. Because the LLC Entity associated with each Bridge Port is provided via CPU port, we must not filter these frames but forward them to CPU port. In a Bridge, the transmission Port is majorly decided by ingress and egress rules, FDB, and spanning tree Port State functions of the Forwarding Process. For link-local frames, only CPU port should be designated as destination port in the FDB, and the other functions of the Forwarding Process must not interfere with the decision of the transmission Port. We call this process trapping frames to CPU port. Therefore, on the switch with CPU port architecture, link-local frames must be trapped to CPU port, and certain link-local frames received by a Port of a Bridge comprising a TPMR component or an S-VLAN component must be excluded from it. A Bridge of the switch with CPU port architecture cannot comprise a Two-Port MAC Relay (TPMR) component as a TPMR component supports only a subset of the functionality of a MAC Bridge. A Bridge comprising two Ports (Management Port doesn't count) of this architecture will either function as a standard MAC Bridge or a standard VLAN Bridge. Therefore, a Bridge of this architecture can only comprise S-VLAN components, C-VLAN components, or MAC Bridge components. Since there's no TPMR component, we don't need to relay PDUs using the destination addresses specified on the Nearest non-TPMR section, and the proportion of the Nearest Customer Bridge section where they must be relayed by TPMR components. One option to trap link-local frames to CPU port is to add static FDB entries with CPU port designated as destination port. However, because that Independent VLAN Learning (IVL) is being used on every VID, each entry only applies to a single VLAN Identifier (VID). For a Bridge comprising a MAC Bridge component or a C-VLAN component, there would have to be 16 times 4096 entries. This switch intellectual property can only hold a maximum of 2048 entries. Using this option, there also isn't a mechanism to prevent link-local frames from being discarded when the spanning tree Port State of the reception Port is discarding. The remaining option is to utilise the BPC, RGAC1, RGAC2, RGAC3, and RGAC4 registers. Whilst this applies to every VID, it doesn't contain all of the reserved MAC addresses without affecting the remaining Standard Group MAC Addresses. The REV_UN frame tag utilised using the RGAC4 register covers the remaining 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] destination addresses. It also includes the 01-80-C2-00-00-22 to 01-80-C2-00-00-FF destination addresses which may be relayed by MAC Bridges or VLAN Bridges. The latter option provides better but not complete conformance. This switch intellectual property also does not provide a mechanism to trap link-local frames with specific destination addresses to CPU port by Bridge, to conform to the filtering rules for the distinct Bridge components. Therefore, regardless of the type of the Bridge component, link-local frames with these destination addresses will be trapped to CPU port: 01-80-C2-00-00-[00,01,02,03,0E] In a Bridge comprising a MAC Bridge component or a C-VLAN component: Link-local frames with these destination addresses won't be trapped to CPU port which won't conform to IEEE Std 802.1Q-2022: 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] In a Bridge comprising an S-VLAN component: Link-local frames with these destination addresses will be trapped to CPU port which won't conform to IEEE Std 802.1Q-2022: 01-80-C2-00-00-00 Link-local frames with these destination addresses won't be trapped to CPU port which won't conform to IEEE Std 802.1Q-2022: 01-80-C2-00-00-[04,05,06,07,08,09,0A] Currently on this switch intellectual property, if the spanning tree Port State of the reception Port is discarding, link-local frames will be discarded. To trap link-local frames regardless of the spanning tree Port State, make the switch regard them as Bridge Protocol Data Units (BPDUs). This switch intellectual property only lets the frames regarded as BPDUs bypass the spanning tree Port State function of the Forwarding Process. With this change, the only remaining interference is the ingress rules. When the reception Port has no PVID assigned on software, VLAN-untagged frames won't be allowed in. There doesn't seem to be a mechanism on the switch intellectual property to have link-local frames bypass this function of the Forwarding Process. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Reviewed-by: Daniel Golle Signed-off-by: Arınç ÜNAL Link: https://lore.kernel.org/r/20240409-b4-for-net-mt7530-fix-link-local-when-stp-discarding-v2-1-07b1150164ac@arinc9.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/mt7530.c | 229 +++++++++++++++++++++++++++++++++------ drivers/net/dsa/mt7530.h | 5 + 2 files changed, 200 insertions(+), 34 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 07065c1af55e..d4515c19a5f3 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -998,20 +998,173 @@ unlock_exit: mutex_unlock(&priv->reg_mutex); } -/* On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std - * 802.1Q™-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA - * must only be propagated to C-VLAN and MAC Bridge components. That means - * VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports, - * these frames are supposed to be processed by the CPU (software). So we make - * the switch only forward them to the CPU port. And if received from a CPU - * port, forward to a single port. The software is responsible of making the - * switch conform to the latter by setting a single port as destination port on - * the special tag. +/* In Clause 5 of IEEE Std 802-2014, two sublayers of the data link layer (DLL) + * of the Open Systems Interconnection basic reference model (OSI/RM) are + * described; the medium access control (MAC) and logical link control (LLC) + * sublayers. The MAC sublayer is the one facing the physical layer. * - * This switch intellectual property cannot conform to this part of the standard - * fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC - * DAs, it also includes :22-FF which the scope of propagation is not supposed - * to be restricted for these MAC DAs. + * In 8.2 of IEEE Std 802.1Q-2022, the Bridge architecture is described. A + * Bridge component comprises a MAC Relay Entity for interconnecting the Ports + * of the Bridge, at least two Ports, and higher layer entities with at least a + * Spanning Tree Protocol Entity included. + * + * Each Bridge Port also functions as an end station and shall provide the MAC + * Service to an LLC Entity. Each instance of the MAC Service is provided to a + * distinct LLC Entity that supports protocol identification, multiplexing, and + * demultiplexing, for protocol data unit (PDU) transmission and reception by + * one or more higher layer entities. + * + * It is described in 8.13.9 of IEEE Std 802.1Q-2022 that in a Bridge, the LLC + * Entity associated with each Bridge Port is modeled as being directly + * connected to the attached Local Area Network (LAN). + * + * On the switch with CPU port architecture, CPU port functions as Management + * Port, and the Management Port functionality is provided by software which + * functions as an end station. Software is connected to an IEEE 802 LAN that is + * wholly contained within the system that incorporates the Bridge. Software + * provides access to the LLC Entity associated with each Bridge Port by the + * value of the source port field on the special tag on the frame received by + * software. + * + * We call frames that carry control information to determine the active + * topology and current extent of each Virtual Local Area Network (VLAN), i.e., + * spanning tree or Shortest Path Bridging (SPB) and Multiple VLAN Registration + * Protocol Data Units (MVRPDUs), and frames from other link constrained + * protocols, such as Extensible Authentication Protocol over LAN (EAPOL) and + * Link Layer Discovery Protocol (LLDP), link-local frames. They are not + * forwarded by a Bridge. Permanently configured entries in the filtering + * database (FDB) ensure that such frames are discarded by the Forwarding + * Process. In 8.6.3 of IEEE Std 802.1Q-2022, this is described in detail: + * + * Each of the reserved MAC addresses specified in Table 8-1 + * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]) shall be + * permanently configured in the FDB in C-VLAN components and ERs. + * + * Each of the reserved MAC addresses specified in Table 8-2 + * (01-80-C2-00-00-[01,02,03,04,05,06,07,08,09,0A,0E]) shall be permanently + * configured in the FDB in S-VLAN components. + * + * Each of the reserved MAC addresses specified in Table 8-3 + * (01-80-C2-00-00-[01,02,04,0E]) shall be permanently configured in the FDB in + * TPMR components. + * + * The FDB entries for reserved MAC addresses shall specify filtering for all + * Bridge Ports and all VIDs. Management shall not provide the capability to + * modify or remove entries for reserved MAC addresses. + * + * The addresses in Table 8-1, Table 8-2, and Table 8-3 determine the scope of + * propagation of PDUs within a Bridged Network, as follows: + * + * The Nearest Bridge group address (01-80-C2-00-00-0E) is an address that no + * conformant Two-Port MAC Relay (TPMR) component, Service VLAN (S-VLAN) + * component, Customer VLAN (C-VLAN) component, or MAC Bridge can forward. + * PDUs transmitted using this destination address, or any other addresses + * that appear in Table 8-1, Table 8-2, and Table 8-3 + * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]), can + * therefore travel no further than those stations that can be reached via a + * single individual LAN from the originating station. + * + * The Nearest non-TPMR Bridge group address (01-80-C2-00-00-03), is an + * address that no conformant S-VLAN component, C-VLAN component, or MAC + * Bridge can forward; however, this address is relayed by a TPMR component. + * PDUs using this destination address, or any of the other addresses that + * appear in both Table 8-1 and Table 8-2 but not in Table 8-3 + * (01-80-C2-00-00-[00,03,05,06,07,08,09,0A,0B,0C,0D,0F]), will be relayed by + * any TPMRs but will propagate no further than the nearest S-VLAN component, + * C-VLAN component, or MAC Bridge. + * + * The Nearest Customer Bridge group address (01-80-C2-00-00-00) is an address + * that no conformant C-VLAN component, MAC Bridge can forward; however, it is + * relayed by TPMR components and S-VLAN components. PDUs using this + * destination address, or any of the other addresses that appear in Table 8-1 + * but not in either Table 8-2 or Table 8-3 (01-80-C2-00-00-[00,0B,0C,0D,0F]), + * will be relayed by TPMR components and S-VLAN components but will propagate + * no further than the nearest C-VLAN component or MAC Bridge. + * + * Because the LLC Entity associated with each Bridge Port is provided via CPU + * port, we must not filter these frames but forward them to CPU port. + * + * In a Bridge, the transmission Port is majorly decided by ingress and egress + * rules, FDB, and spanning tree Port State functions of the Forwarding Process. + * For link-local frames, only CPU port should be designated as destination port + * in the FDB, and the other functions of the Forwarding Process must not + * interfere with the decision of the transmission Port. We call this process + * trapping frames to CPU port. + * + * Therefore, on the switch with CPU port architecture, link-local frames must + * be trapped to CPU port, and certain link-local frames received by a Port of a + * Bridge comprising a TPMR component or an S-VLAN component must be excluded + * from it. + * + * A Bridge of the switch with CPU port architecture cannot comprise a Two-Port + * MAC Relay (TPMR) component as a TPMR component supports only a subset of the + * functionality of a MAC Bridge. A Bridge comprising two Ports (Management Port + * doesn't count) of this architecture will either function as a standard MAC + * Bridge or a standard VLAN Bridge. + * + * Therefore, a Bridge of this architecture can only comprise S-VLAN components, + * C-VLAN components, or MAC Bridge components. Since there's no TPMR component, + * we don't need to relay PDUs using the destination addresses specified on the + * Nearest non-TPMR section, and the proportion of the Nearest Customer Bridge + * section where they must be relayed by TPMR components. + * + * One option to trap link-local frames to CPU port is to add static FDB entries + * with CPU port designated as destination port. However, because that + * Independent VLAN Learning (IVL) is being used on every VID, each entry only + * applies to a single VLAN Identifier (VID). For a Bridge comprising a MAC + * Bridge component or a C-VLAN component, there would have to be 16 times 4096 + * entries. This switch intellectual property can only hold a maximum of 2048 + * entries. Using this option, there also isn't a mechanism to prevent + * link-local frames from being discarded when the spanning tree Port State of + * the reception Port is discarding. + * + * The remaining option is to utilise the BPC, RGAC1, RGAC2, RGAC3, and RGAC4 + * registers. Whilst this applies to every VID, it doesn't contain all of the + * reserved MAC addresses without affecting the remaining Standard Group MAC + * Addresses. The REV_UN frame tag utilised using the RGAC4 register covers the + * remaining 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] destination + * addresses. It also includes the 01-80-C2-00-00-22 to 01-80-C2-00-00-FF + * destination addresses which may be relayed by MAC Bridges or VLAN Bridges. + * The latter option provides better but not complete conformance. + * + * This switch intellectual property also does not provide a mechanism to trap + * link-local frames with specific destination addresses to CPU port by Bridge, + * to conform to the filtering rules for the distinct Bridge components. + * + * Therefore, regardless of the type of the Bridge component, link-local frames + * with these destination addresses will be trapped to CPU port: + * + * 01-80-C2-00-00-[00,01,02,03,0E] + * + * In a Bridge comprising a MAC Bridge component or a C-VLAN component: + * + * Link-local frames with these destination addresses won't be trapped to CPU + * port which won't conform to IEEE Std 802.1Q-2022: + * + * 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] + * + * In a Bridge comprising an S-VLAN component: + * + * Link-local frames with these destination addresses will be trapped to CPU + * port which won't conform to IEEE Std 802.1Q-2022: + * + * 01-80-C2-00-00-00 + * + * Link-local frames with these destination addresses won't be trapped to CPU + * port which won't conform to IEEE Std 802.1Q-2022: + * + * 01-80-C2-00-00-[04,05,06,07,08,09,0A] + * + * To trap link-local frames to CPU port as conformant as this switch + * intellectual property can allow, link-local frames are made to be regarded as + * Bridge Protocol Data Units (BPDUs). This is because this switch intellectual + * property only lets the frames regarded as BPDUs bypass the spanning tree Port + * State function of the Forwarding Process. + * + * The only remaining interference is the ingress rules. When the reception Port + * has no PVID assigned on software, VLAN-untagged frames won't be allowed in. + * There doesn't seem to be a mechanism on the switch intellectual property to + * have link-local frames bypass this function of the Forwarding Process. */ static void mt753x_trap_frames(struct mt7530_priv *priv) @@ -1019,35 +1172,43 @@ mt753x_trap_frames(struct mt7530_priv *priv) /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them * VLAN-untagged. */ - mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_EG_TAG_MASK | - MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK | - MT753X_BPDU_PORT_FW_MASK, - MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) | - MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_BPDU_CPU_ONLY); + mt7530_rmw(priv, MT753X_BPC, + MT753X_PAE_BPDU_FR | MT753X_PAE_EG_TAG_MASK | + MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK | + MT753X_BPDU_PORT_FW_MASK, + MT753X_PAE_BPDU_FR | + MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress * them VLAN-untagged. */ - mt7530_rmw(priv, MT753X_RGAC1, MT753X_R02_EG_TAG_MASK | - MT753X_R02_PORT_FW_MASK | MT753X_R01_EG_TAG_MASK | - MT753X_R01_PORT_FW_MASK, - MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) | - MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_BPDU_CPU_ONLY); + mt7530_rmw(priv, MT753X_RGAC1, + MT753X_R02_BPDU_FR | MT753X_R02_EG_TAG_MASK | + MT753X_R02_PORT_FW_MASK | MT753X_R01_BPDU_FR | + MT753X_R01_EG_TAG_MASK | MT753X_R01_PORT_FW_MASK, + MT753X_R02_BPDU_FR | + MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_R01_BPDU_FR | + MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress * them VLAN-untagged. */ - mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK | - MT753X_R0E_PORT_FW_MASK | MT753X_R03_EG_TAG_MASK | - MT753X_R03_PORT_FW_MASK, - MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) | - MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | - MT753X_BPDU_CPU_ONLY); + mt7530_rmw(priv, MT753X_RGAC2, + MT753X_R0E_BPDU_FR | MT753X_R0E_EG_TAG_MASK | + MT753X_R0E_PORT_FW_MASK | MT753X_R03_BPDU_FR | + MT753X_R03_EG_TAG_MASK | MT753X_R03_PORT_FW_MASK, + MT753X_R0E_BPDU_FR | + MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) | + MT753X_R03_BPDU_FR | + MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + MT753X_BPDU_CPU_ONLY); } static int diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index fa2afa67ceb0..2d1ea390f05a 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -63,6 +63,7 @@ enum mt753x_id { /* Registers for BPDU and PAE frame control*/ #define MT753X_BPC 0x24 +#define MT753X_PAE_BPDU_FR BIT(25) #define MT753X_PAE_EG_TAG_MASK GENMASK(24, 22) #define MT753X_PAE_EG_TAG(x) FIELD_PREP(MT753X_PAE_EG_TAG_MASK, x) #define MT753X_PAE_PORT_FW_MASK GENMASK(18, 16) @@ -73,20 +74,24 @@ enum mt753x_id { /* Register for :01 and :02 MAC DA frame control */ #define MT753X_RGAC1 0x28 +#define MT753X_R02_BPDU_FR BIT(25) #define MT753X_R02_EG_TAG_MASK GENMASK(24, 22) #define MT753X_R02_EG_TAG(x) FIELD_PREP(MT753X_R02_EG_TAG_MASK, x) #define MT753X_R02_PORT_FW_MASK GENMASK(18, 16) #define MT753X_R02_PORT_FW(x) FIELD_PREP(MT753X_R02_PORT_FW_MASK, x) +#define MT753X_R01_BPDU_FR BIT(9) #define MT753X_R01_EG_TAG_MASK GENMASK(8, 6) #define MT753X_R01_EG_TAG(x) FIELD_PREP(MT753X_R01_EG_TAG_MASK, x) #define MT753X_R01_PORT_FW_MASK GENMASK(2, 0) /* Register for :03 and :0E MAC DA frame control */ #define MT753X_RGAC2 0x2c +#define MT753X_R0E_BPDU_FR BIT(25) #define MT753X_R0E_EG_TAG_MASK GENMASK(24, 22) #define MT753X_R0E_EG_TAG(x) FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x) #define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16) #define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x) +#define MT753X_R03_BPDU_FR BIT(9) #define MT753X_R03_EG_TAG_MASK GENMASK(8, 6) #define MT753X_R03_EG_TAG(x) FIELD_PREP(MT753X_R03_EG_TAG_MASK, x) #define MT753X_R03_PORT_FW_MASK GENMASK(2, 0) From fb6d14e23d489e338d4ddef1f013d5599e7722de Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Jan 2024 09:08:53 -0800 Subject: [PATCH 256/553] af_unix: Do not use atomic ops for unix_sk(sk)->inflight. [ Upstream commit 97af84a6bba2ab2b9c704c08e67de3b5ea551bb2 ] When touching unix_sk(sk)->inflight, we are always under spin_lock(&unix_gc_lock). Let's convert unix_sk(sk)->inflight to the normal unsigned long. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240123170856.41348-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Stable-dep-of: 47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()") Signed-off-by: Sasha Levin --- include/net/af_unix.h | 2 +- net/unix/af_unix.c | 4 ++-- net/unix/garbage.c | 17 ++++++++--------- net/unix/scm.c | 8 +++++--- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 0920b669b9b3..16d6936baa2f 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -54,7 +54,7 @@ struct unix_sock { struct mutex iolock, bindlock; struct sock *peer; struct list_head link; - atomic_long_t inflight; + unsigned long inflight; spinlock_t lock; unsigned long gc_flags; #define UNIX_GC_CANDIDATE 0 diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 373530303ad1..0a75d76535f7 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -968,11 +968,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; - u = unix_sk(sk); + u = unix_sk(sk); + u->inflight = 0; u->path.dentry = NULL; u->path.mnt = NULL; spin_lock_init(&u->lock); - atomic_long_set(&u->inflight, 0); INIT_LIST_HEAD(&u->link); mutex_init(&u->iolock); /* single task reading lock */ mutex_init(&u->bindlock); /* single task binding lock */ diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 9bfffe2a7f02..7b326582d97d 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -166,17 +166,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *), static void dec_inflight(struct unix_sock *usk) { - atomic_long_dec(&usk->inflight); + usk->inflight--; } static void inc_inflight(struct unix_sock *usk) { - atomic_long_inc(&usk->inflight); + usk->inflight++; } static void inc_inflight_move_tail(struct unix_sock *u) { - atomic_long_inc(&u->inflight); + u->inflight++; + /* If this still might be part of a cycle, move it to the end * of the list, so that it's checked even if it was already * passed over @@ -237,14 +238,12 @@ void unix_gc(void) */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { long total_refs; - long inflight_refs; total_refs = file_count(u->sk.sk_socket->file); - inflight_refs = atomic_long_read(&u->inflight); - BUG_ON(inflight_refs < 1); - BUG_ON(total_refs < inflight_refs); - if (total_refs == inflight_refs) { + BUG_ON(!u->inflight); + BUG_ON(total_refs < u->inflight); + if (total_refs == u->inflight) { list_move_tail(&u->link, &gc_candidates); __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); @@ -271,7 +270,7 @@ void unix_gc(void) /* Move cursor to after the current position. */ list_move(&cursor, &u->link); - if (atomic_long_read(&u->inflight) > 0) { + if (u->inflight) { list_move_tail(&u->link, ¬_cycle_list); __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); scan_children(&u->sk, inc_inflight_move_tail, NULL); diff --git a/net/unix/scm.c b/net/unix/scm.c index d1048b4c2baa..4eff7da9f6f9 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -52,12 +52,13 @@ void unix_inflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); - if (atomic_long_inc_return(&u->inflight) == 1) { + if (!u->inflight) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); } else { BUG_ON(list_empty(&u->link)); } + u->inflight++; /* Paired with READ_ONCE() in wait_for_unix_gc() */ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); } @@ -74,10 +75,11 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); - BUG_ON(!atomic_long_read(&u->inflight)); + BUG_ON(!u->inflight); BUG_ON(list_empty(&u->link)); - if (atomic_long_dec_and_test(&u->inflight)) + u->inflight--; + if (!u->inflight) list_del_init(&u->link); /* Paired with READ_ONCE() in wait_for_unix_gc() */ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); From b75722be422c276b699200de90527d01c602ea7c Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Tue, 9 Apr 2024 22:09:39 +0200 Subject: [PATCH 257/553] af_unix: Fix garbage collector racing against connect() [ Upstream commit 47d8ac011fe1c9251070e1bd64cb10b48193ec51 ] Garbage collector does not take into account the risk of embryo getting enqueued during the garbage collection. If such embryo has a peer that carries SCM_RIGHTS, two consecutive passes of scan_children() may see a different set of children. Leading to an incorrectly elevated inflight count, and then a dangling pointer within the gc_inflight_list. sockets are AF_UNIX/SOCK_STREAM S is an unconnected socket L is a listening in-flight socket bound to addr, not in fdtable V's fd will be passed via sendmsg(), gets inflight count bumped connect(S, addr) sendmsg(S, [V]); close(V) __unix_gc() ---------------- ------------------------- ----------- NS = unix_create1() skb1 = sock_wmalloc(NS) L = unix_find_other(addr) unix_state_lock(L) unix_peer(S) = NS // V count=1 inflight=0 NS = unix_peer(S) skb2 = sock_alloc() skb_queue_tail(NS, skb2[V]) // V became in-flight // V count=2 inflight=1 close(V) // V count=1 inflight=1 // GC candidate condition met for u in gc_inflight_list: if (total_refs == inflight_refs) add u to gc_candidates // gc_candidates={L, V} for u in gc_candidates: scan_children(u, dec_inflight) // embryo (skb1) was not // reachable from L yet, so V's // inflight remains unchanged __skb_queue_tail(L, skb1) unix_state_unlock(L) for u in gc_candidates: if (u.inflight) scan_children(u, inc_inflight_move_tail) // V count=1 inflight=2 (!) If there is a GC-candidate listening socket, lock/unlock its state. This makes GC wait until the end of any ongoing connect() to that socket. After flipping the lock, a possibly SCM-laden embryo is already enqueued. And if there is another embryo coming, it can not possibly carry SCM_RIGHTS. At this point, unix_inflight() can not happen because unix_gc_lock is already taken. Inflight graph remains unaffected. Fixes: 1fd05ba5a2f2 ("[AF_UNIX]: Rewrite garbage collector, fixes race.") Signed-off-by: Michal Luczaj Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240409201047.1032217-1-mhal@rbox.co Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/unix/garbage.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 7b326582d97d..85c6f05c0fa3 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -235,11 +235,22 @@ void unix_gc(void) * receive queues. Other, non candidate sockets _can_ be * added to queue, so we must make sure only to touch * candidates. + * + * Embryos, though never candidates themselves, affect which + * candidates are reachable by the garbage collector. Before + * being added to a listener's queue, an embryo may already + * receive data carrying SCM_RIGHTS, potentially making the + * passed socket a candidate that is not yet reachable by the + * collector. It becomes reachable once the embryo is + * enqueued. Therefore, we must ensure that no SCM-laden + * embryo appears in a (candidate) listener's queue between + * consecutive scan_children() calls. */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { + struct sock *sk = &u->sk; long total_refs; - total_refs = file_count(u->sk.sk_socket->file); + total_refs = file_count(sk->sk_socket->file); BUG_ON(!u->inflight); BUG_ON(total_refs < u->inflight); @@ -247,6 +258,11 @@ void unix_gc(void) list_move_tail(&u->link, &gc_candidates); __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); + + if (sk->sk_state == TCP_LISTEN) { + unix_state_lock(sk); + unix_state_unlock(sk); + } } } From 4dea83d483d574645763440aaf50f186a17bfbd6 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Wed, 10 Apr 2024 09:13:55 +0000 Subject: [PATCH 258/553] net: ena: Fix potential sign extension issue [ Upstream commit 713a85195aad25d8a26786a37b674e3e5ec09e3c ] Small unsigned types are promoted to larger signed types in the case of multiplication, the result of which may overflow. In case the result of such a multiplication has its MSB turned on, it will be sign extended with '1's. This changes the multiplication result. Code example of the phenomenon: ------------------------------- u16 x, y; size_t z1, z2; x = y = 0xffff; printk("x=%x y=%x\n",x,y); z1 = x*y; z2 = (size_t)x*y; printk("z1=%lx z2=%lx\n", z1, z2); Output: ------- x=ffff y=ffff z1=fffffffffffe0001 z2=fffe0001 The expected result of ffff*ffff is fffe0001, and without the explicit casting to avoid the unwanted sign extension we got fffffffffffe0001. This commit adds an explicit casting to avoid the sign extension issue. Fixes: 689b2bdaaa14 ("net: ena: add functions for handling Low Latency Queues in ena_com") Signed-off-by: Arthur Kiyanovski Signed-off-by: David Arinzon Reviewed-by: Shannon Nelson Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_com.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 633b321d7fdd..4db689372980 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -362,7 +362,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, ENA_COM_BOUNCE_BUFFER_CNTRL_CNT; io_sq->bounce_buf_ctrl.next_to_use = 0; - size = io_sq->bounce_buf_ctrl.buffer_size * + size = (size_t)io_sq->bounce_buf_ctrl.buffer_size * io_sq->bounce_buf_ctrl.buffers_num; dev_node = dev_to_node(ena_dev->dmadev); From 7d44e12efb7d777484feea2818d99d68224b3180 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Wed, 10 Apr 2024 09:13:56 +0000 Subject: [PATCH 259/553] net: ena: Wrong missing IO completions check order [ Upstream commit f7e417180665234fdb7af2ebe33d89aaa434d16f ] Missing IO completions check is called every second (HZ jiffies). This commit fixes several issues with this check: 1. Duplicate queues check: Max of 4 queues are scanned on each check due to monitor budget. Once reaching the budget, this check exits under the assumption that the next check will continue to scan the remainder of the queues, but in practice, next check will first scan the last already scanned queue which is not necessary and may cause the full queue scan to last a couple of seconds longer. The fix is to start every check with the next queue to scan. For example, on 8 IO queues: Bug: [0,1,2,3], [3,4,5,6], [6,7] Fix: [0,1,2,3], [4,5,6,7] 2. Unbalanced queues check: In case the number of active IO queues is not a multiple of budget, there will be checks which don't utilize the full budget because the full scan exits when reaching the last queue id. The fix is to run every TX completion check with exact queue budget regardless of the queue id. For example, on 7 IO queues: Bug: [0,1,2,3], [4,5,6], [0,1,2,3] Fix: [0,1,2,3], [4,5,6,0], [1,2,3,4] The budget may be lowered in case the number of IO queues is less than the budget (4) to make sure there are no duplicate queues on the same check. For example, on 3 IO queues: Bug: [0,1,2,0], [1,2,0,1] Fix: [0,1,2], [0,1,2] Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Amit Bernstein Signed-off-by: David Arinzon Reviewed-by: Shannon Nelson Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 21 +++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 9e82e7b9c3b7..b2eb6e1958f0 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3797,10 +3797,11 @@ static void check_for_missing_completions(struct ena_adapter *adapter) { struct ena_ring *tx_ring; struct ena_ring *rx_ring; - int i, budget, rc; + int qid, budget, rc; int io_queue_count; io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues; + /* Make sure the driver doesn't turn the device in other process */ smp_rmb(); @@ -3813,27 +3814,29 @@ static void check_for_missing_completions(struct ena_adapter *adapter) if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) return; - budget = ENA_MONITORED_TX_QUEUES; + budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES); - for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) { - tx_ring = &adapter->tx_ring[i]; - rx_ring = &adapter->rx_ring[i]; + qid = adapter->last_monitored_tx_qid; + + while (budget) { + qid = (qid + 1) % io_queue_count; + + tx_ring = &adapter->tx_ring[qid]; + rx_ring = &adapter->rx_ring[qid]; rc = check_missing_comp_in_tx_queue(adapter, tx_ring); if (unlikely(rc)) return; - rc = !ENA_IS_XDP_INDEX(adapter, i) ? + rc = !ENA_IS_XDP_INDEX(adapter, qid) ? check_for_rx_interrupt_queue(adapter, rx_ring) : 0; if (unlikely(rc)) return; budget--; - if (!budget) - break; } - adapter->last_monitored_tx_qid = i % io_queue_count; + adapter->last_monitored_tx_qid = qid; } /* trigger napi schedule after 2 consecutive detections */ From 19ff8fed3338898b70b2aad831386c78564912e1 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Wed, 10 Apr 2024 09:13:57 +0000 Subject: [PATCH 260/553] net: ena: Fix incorrect descriptor free behavior [ Upstream commit bf02d9fe00632d22fa91d34749c7aacf397b6cde ] ENA has two types of TX queues: - queues which only process TX packets arriving from the network stack - queues which only process TX packets forwarded to it by XDP_REDIRECT or XDP_TX instructions The ena_free_tx_bufs() cycles through all descriptors in a TX queue and unmaps + frees every descriptor that hasn't been acknowledged yet by the device (uncompleted TX transactions). The function assumes that the processed TX queue is necessarily from the first category listed above and ends up using napi_consume_skb() for descriptors belonging to an XDP specific queue. This patch solves a bug in which, in case of a VF reset, the descriptors aren't freed correctly, leading to crashes. Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action") Signed-off-by: Shay Agroskin Signed-off-by: David Arinzon Reviewed-by: Shannon Nelson Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index b2eb6e1958f0..5e37b18ac3ad 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1203,8 +1203,11 @@ static void ena_unmap_tx_buff(struct ena_ring *tx_ring, static void ena_free_tx_bufs(struct ena_ring *tx_ring) { bool print_once = true; + bool is_xdp_ring; u32 i; + is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid); + for (i = 0; i < tx_ring->ring_size; i++) { struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; @@ -1224,10 +1227,15 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring) ena_unmap_tx_buff(tx_ring, tx_info); - dev_kfree_skb_any(tx_info->skb); + if (is_xdp_ring) + xdp_return_frame(tx_info->xdpf); + else + dev_kfree_skb_any(tx_info->skb); } - netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->qid)); + + if (!is_xdp_ring) + netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->qid)); } static void ena_free_all_tx_bufs(struct ena_adapter *adapter) From 91580ea48b6dc32c236eecbdcb64ecd20fea3f6b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Apr 2024 10:06:24 +0200 Subject: [PATCH 261/553] tracing: hide unused ftrace_event_id_fops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5281ec83454d70d98b71f1836fb16512566c01cd ] When CONFIG_PERF_EVENTS, a 'make W=1' build produces a warning about the unused ftrace_event_id_fops variable: kernel/trace/trace_events.c:2155:37: error: 'ftrace_event_id_fops' defined but not used [-Werror=unused-const-variable=] 2155 | static const struct file_operations ftrace_event_id_fops = { Hide this in the same #ifdef as the reference to it. Link: https://lore.kernel.org/linux-trace-kernel/20240403080702.3509288-7-arnd@kernel.org Cc: Masami Hiramatsu Cc: Oleg Nesterov Cc: Mathieu Desnoyers Cc: Zheng Yejian Cc: Kees Cook Cc: Ajay Kaher Cc: Jinjie Ruan Cc: Clément Léger Cc: Dan Carpenter Cc: "Tzvetomir Stoyanov (VMware)" Fixes: 620a30e97feb ("tracing: Don't pass file_operations array to event_create_dir()") Signed-off-by: Arnd Bergmann Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace_events.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a6d2f99f847d..24859d964505 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1669,6 +1669,7 @@ static int trace_format_open(struct inode *inode, struct file *file) return 0; } +#ifdef CONFIG_PERF_EVENTS static ssize_t event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { @@ -1683,6 +1684,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); } +#endif static ssize_t event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, @@ -2127,10 +2129,12 @@ static const struct file_operations ftrace_event_format_fops = { .release = seq_release, }; +#ifdef CONFIG_PERF_EVENTS static const struct file_operations ftrace_event_id_fops = { .read = event_id_read, .llseek = default_llseek, }; +#endif static const struct file_operations ftrace_event_filter_fops = { .open = tracing_open_file_tr, From 5f1205b86bd0419beb246156945b9e6ac3b43afa Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 11 Apr 2024 11:07:43 +0800 Subject: [PATCH 262/553] iommu/vt-d: Allocate local memory for page request queue [ Upstream commit a34f3e20ddff02c4f12df2c0635367394e64c63d ] The page request queue is per IOMMU, its allocation should be made NUMA-aware for performance reasons. Fixes: a222a7f0bb6c ("iommu/vt-d: Implement page request handling") Signed-off-by: Jacob Pan Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240403214007.985600-1-jacob.jun.pan@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 03b25358946c..cb862ab96873 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -71,7 +71,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) struct page *pages; int irq, ret; - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); + pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); if (!pages) { pr_warn("IOMMU: %s: Failed to allocate page request queue\n", iommu->name); From cb3131b5a204dc8882dff3abe99f8618292a324b Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Tue, 19 Mar 2024 10:54:22 -0700 Subject: [PATCH 263/553] btrfs: qgroup: correctly model root qgroup rsv in convert commit 141fb8cd206ace23c02cd2791c6da52c1d77d42a upstream. We use add_root_meta_rsv and sub_root_meta_rsv to track prealloc and pertrans reservations for subvolumes when quotas are enabled. The convert function does not properly increment pertrans after decrementing prealloc, so the count is not accurate. Note: we check that the fs is not read-only to mirror the logic in qgroup_convert_meta, which checks that before adding to the pertrans rsv. Fixes: 8287475a2055 ("btrfs: qgroup: Use root::qgroup_meta_rsv_* to record qgroup meta reserved space") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index c14d4f70e84b..80ca7b435b0d 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4154,6 +4154,8 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes) BTRFS_QGROUP_RSV_META_PREALLOC); trace_qgroup_meta_convert(root, num_bytes); qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes); + if (!sb_rdonly(fs_info->sb)) + add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS); } /* From 06fe99985427385b60fbb933cc6fb668b82b5453 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Thu, 21 Mar 2024 10:14:24 -0700 Subject: [PATCH 264/553] btrfs: record delayed inode root in transaction commit 71537e35c324ea6fbd68377a4f26bb93a831ae35 upstream. When running delayed inode updates, we do not record the inode's root in the transaction, but we do allocate PREALLOC and thus converted PERTRANS space for it. To be sure we free that PERTRANS meta rsv, we must ensure that we record the root in the transaction. Fixes: 4f5427ccce5d ("btrfs: delayed-inode: Use new qgroup meta rsv for delayed inode and item") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/delayed-inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index c6426080cf0a..1494ce990d29 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1115,6 +1115,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, if (ret) return ret; + ret = btrfs_record_root_in_trans(trans, node->root); + if (ret) + return ret; ret = btrfs_update_delayed_inode(trans, node->root, path, node); return ret; } From c00146b399a51b5ccd4a960aebbbcd88edff34b7 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Thu, 21 Mar 2024 10:18:39 -0700 Subject: [PATCH 265/553] btrfs: qgroup: convert PREALLOC to PERTRANS after record_root_in_trans commit 211de93367304ab395357f8cb12568a4d1e20701 upstream. The transaction is only able to free PERTRANS reservations for a root once that root has been recorded with the TRANS tag on the roots radix tree. Therefore, until we are sure that this root will get tagged, it isn't safe to convert. Generally, this is not an issue as *some* transaction will likely tag the root before long and this reservation will get freed in that transaction, but technically it could stick around until unmount and result in a warning about leaked metadata reservation space. This path is most exercised by running the generic/269 fstest with CONFIG_BTRFS_DEBUG. Fixes: a6496849671a ("btrfs: fix start transaction qgroup rsv double free") CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/transaction.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b172091f4261..5549c843f0d3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -700,14 +700,6 @@ again: h->reloc_reserved = reloc_reserved; } - /* - * Now that we have found a transaction to be a part of, convert the - * qgroup reservation from prealloc to pertrans. A different transaction - * can't race in and free our pertrans out from under us. - */ - if (qgroup_reserved) - btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); - got_it: if (!current->journal_info) current->journal_info = h; @@ -741,8 +733,15 @@ got_it: * not just freed. */ btrfs_end_transaction(h); - return ERR_PTR(ret); + goto reserve_fail; } + /* + * Now that we have found a transaction to be a part of, convert the + * qgroup reservation from prealloc to pertrans. A different transaction + * can't race in and free our pertrans out from under us. + */ + if (qgroup_reserved) + btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); return h; From 88dd8bb129fca723c036c27803072d369254e5bb Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 8 Apr 2024 18:11:09 +0100 Subject: [PATCH 266/553] io_uring/net: restore msg_control on sendzc retry commit 4fe82aedeb8a8cb09bfa60f55ab57b5c10a74ac4 upstream. cac9e4418f4cb ("io_uring/net: save msghdr->msg_control for retries") reinstatiates msg_control before every __sys_sendmsg_sock(), since the function can overwrite the value in msghdr. We need to do same for zerocopy sendmsg. Cc: stable@vger.kernel.org Fixes: 493108d95f146 ("io_uring/net: zerocopy sendmsg") Link: https://github.com/axboe/liburing/issues/1067 Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/cc1d5d9df0576fa66ddad4420d240a98a020b267.1712596179.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/net.c b/io_uring/net.c index b1b564c04d1e..48404bd33001 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1229,6 +1229,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) if (req_has_async_data(req)) { kmsg = req->async_data; + kmsg->msg.msg_control_user = sr->msg_control; } else { ret = io_sendmsg_copy_hdr(req, &iomsg); if (ret) From 62029bc9ff2c17a4e3a2478d83418ec575413808 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Wed, 10 Apr 2024 09:58:02 +0800 Subject: [PATCH 267/553] kprobes: Fix possible use-after-free issue on kprobe registration commit 325f3fb551f8cd672dbbfc4cf58b14f9ee3fc9e8 upstream. When unloading a module, its state is changing MODULE_STATE_LIVE -> MODULE_STATE_GOING -> MODULE_STATE_UNFORMED. Each change will take a time. `is_module_text_address()` and `__module_text_address()` works with MODULE_STATE_LIVE and MODULE_STATE_GOING. If we use `is_module_text_address()` and `__module_text_address()` separately, there is a chance that the first one is succeeded but the next one is failed because module->state becomes MODULE_STATE_UNFORMED between those operations. In `check_kprobe_address_safe()`, if the second `__module_text_address()` is failed, that is ignored because it expected a kernel_text address. But it may have failed simply because module->state has been changed to MODULE_STATE_UNFORMED. In this case, arm_kprobe() will try to modify non-exist module text address (use-after-free). To fix this problem, we should not use separated `is_module_text_address()` and `__module_text_address()`, but use only `__module_text_address()` once and do `try_module_get(module)` which is only available with MODULE_STATE_LIVE. Link: https://lore.kernel.org/all/20240410015802.265220-1-zhengyejian1@huawei.com/ Fixes: 28f6c37a2910 ("kprobes: Forbid probing on trampoline and BPF code areas") Cc: stable@vger.kernel.org Signed-off-by: Zheng Yejian Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/kprobes.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index dbfddfa86c14..5b5ee060a2db 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1567,10 +1567,17 @@ static int check_kprobe_address_safe(struct kprobe *p, jump_label_lock(); preempt_disable(); - /* Ensure it is not in reserved area nor out of text */ - if (!(core_kernel_text((unsigned long) p->addr) || - is_module_text_address((unsigned long) p->addr)) || - in_gate_area_no_mm((unsigned long) p->addr) || + /* Ensure the address is in a text area, and find a module if exists. */ + *probed_mod = NULL; + if (!core_kernel_text((unsigned long) p->addr)) { + *probed_mod = __module_text_address((unsigned long) p->addr); + if (!(*probed_mod)) { + ret = -EINVAL; + goto out; + } + } + /* Ensure it is not in reserved area. */ + if (in_gate_area_no_mm((unsigned long) p->addr) || within_kprobe_blacklist((unsigned long) p->addr) || jump_label_text_reserved(p->addr, p->addr) || static_call_text_reserved(p->addr, p->addr) || @@ -1580,8 +1587,7 @@ static int check_kprobe_address_safe(struct kprobe *p, goto out; } - /* Check if 'p' is probing a module. */ - *probed_mod = __module_text_address((unsigned long) p->addr); + /* Get module refcount and reject __init functions for loaded modules. */ if (*probed_mod) { /* * We must hold a refcount of the probed module while updating From 4b53d7d620c45c60501fb81c00fd2eb07aeb142f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Apr 2024 00:34:29 +0300 Subject: [PATCH 268/553] drm/i915/vrr: Disable VRR when using bigjoiner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit dcd8992e47f13afb5c11a61e8d9c141c35e23751 upstream. All joined pipes share the same transcoder/timing generator. Currently we just do the commits per-pipe, which doesn't really work if we need to change switch between non-VRR and VRR timings generators on the fly, or even when sending the push to the transcoder. For now just disable VRR when bigjoiner is needed. Cc: stable@vger.kernel.org Tested-by: Vidya Srinivas Reviewed-by: Vandita Kulkarni Link: https://patchwork.freedesktop.org/patch/msgid/20240404213441.17637-6-ville.syrjala@linux.intel.com Signed-off-by: Ville Syrjälä (cherry picked from commit f9d5e51db65652dbd8a2102fd7619440e3599fd2) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_vrr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 5eac99021875..6615e4153f37 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -110,6 +110,13 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state, if (!intel_vrr_is_capable(connector)) return; + /* + * FIXME all joined pipes share the same transcoder. + * Need to account for that during VRR toggle/push/etc. + */ + if (crtc_state->bigjoiner_pipes) + return; + if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) return; From d29b50a32c274ae660d5e55eda747220a34217ef Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Tue, 26 Mar 2024 15:32:46 -0400 Subject: [PATCH 269/553] drm/amdkfd: Reset GPU on queue preemption failure commit 8bdfb4ea95ca738d33ef71376c21eba20130f2eb upstream. Currently, with F32 HWS GPU reset is only when unmap queue fails. However, if compute queue doesn't repond to preemption request in time unmap will return without any error. In this case, only preemption error is logged and Reset is not triggered. Call GPU reset in this case also. Reviewed-by: Alex Deucher Signed-off-by: Harish Kasiviswanathan Reviewed-by: Mukul Joshi Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 0b87034d9dd5..1b7b29426480 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1805,6 +1805,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); while (halt_if_hws_hang) schedule(); + kfd_hws_hang(dqm); return -ETIME; } From 8a6fea3fcb577a543ef67683ca7105bde49a38fb Mon Sep 17 00:00:00 2001 From: Jammy Huang Date: Wed, 3 Apr 2024 17:02:46 +0800 Subject: [PATCH 270/553] drm/ast: Fix soft lockup commit bc004f5038220b1891ef4107134ccae44be55109 upstream. There is a while-loop in ast_dp_set_on_off() that could lead to infinite-loop. This is because the register, VGACRI-Dx, checked in this API is a scratch register actually controlled by a MCU, named DPMCU, in BMC. These scratch registers are protected by scu-lock. If suc-lock is not off, DPMCU can not update these registers and then host will have soft lockup due to never updated status. DPMCU is used to control DP and relative registers to handshake with host's VGA driver. Even the most time-consuming task, DP's link training, is less than 100ms. 200ms should be enough. Signed-off-by: Jammy Huang Fixes: 594e9c04b586 ("drm/ast: Create the driver for ASPEED proprietory Display-Port") Reviewed-by: Jocelyn Falempe Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Cc: KuoHsiang Chou Cc: Thomas Zimmermann Cc: Dave Airlie Cc: Jocelyn Falempe Cc: dri-devel@lists.freedesktop.org Cc: # v5.19+ Link: https://patchwork.freedesktop.org/patch/msgid/20240403090246.1495487-1-jammy_huang@aspeedtech.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_dp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index 56483860306b..a4a23b9623ad 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -190,6 +190,7 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on) { struct ast_private *ast = to_ast_private(dev); u8 video_on_off = on; + u32 i = 0; // Video On/Off ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xE3, (u8) ~AST_DP_VIDEO_ENABLE, on); @@ -202,6 +203,8 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on) ASTDP_MIRROR_VIDEO_ENABLE) != video_on_off) { // wait 1 ms mdelay(1); + if (++i > 200) + break; } } } From 18c8cc6680ce938d0458859b6a08b4d34f7d8055 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 4 Apr 2024 23:33:25 +0300 Subject: [PATCH 271/553] drm/client: Fully protect modes[] with dev->mode_config.mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3eadd887dbac1df8f25f701e5d404d1b90fd0fea upstream. The modes[] array contains pointers to modes on the connectors' mode lists, which are protected by dev->mode_config.mutex. Thus we need to extend modes[] the same protection or by the time we use it the elements may already be pointing to freed/reused memory. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10583 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240404203336.10454-2-ville.syrjala@linux.intel.com Reviewed-by: Dmitry Baryshkov Reviewed-by: Jani Nikula Reviewed-by: Thomas Zimmermann Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_client_modeset.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 7847020de0a4..9a65806047b5 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -781,6 +781,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, unsigned int total_modes_count = 0; struct drm_client_offset *offsets; unsigned int connector_count = 0; + /* points to modes protected by mode_config.mutex */ struct drm_display_mode **modes; struct drm_crtc **crtcs; int i, ret = 0; @@ -849,7 +850,6 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, drm_client_pick_crtcs(client, connectors, connector_count, crtcs, modes, 0, width, height); } - mutex_unlock(&dev->mode_config.mutex); drm_client_modeset_release(client); @@ -879,6 +879,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, modeset->y = offset->y; } } + mutex_unlock(&dev->mode_config.mutex); mutex_unlock(&client->modeset_mutex); out: From f6e2d61dc1598284eab80c8bec57523a4c088b88 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 28 Mar 2024 10:21:47 +1000 Subject: [PATCH 272/553] vhost: Add smp_rmb() in vhost_vq_avail_empty() commit 22e1992cf7b034db5325660e98c41ca5afa5f519 upstream. A smp_rmb() has been missed in vhost_vq_avail_empty(), spotted by Will. Otherwise, it's not ensured the available ring entries pushed by guest can be observed by vhost in time, leading to stale available ring entries fetched by vhost in vhost_get_vq_desc(), as reported by Yihuang Yu on NVidia's grace-hopper (ARM64) platform. /home/gavin/sandbox/qemu.main/build/qemu-system-aarch64 \ -accel kvm -machine virt,gic-version=host -cpu host \ -smp maxcpus=1,cpus=1,sockets=1,clusters=1,cores=1,threads=1 \ -m 4096M,slots=16,maxmem=64G \ -object memory-backend-ram,id=mem0,size=4096M \ : \ -netdev tap,id=vnet0,vhost=true \ -device virtio-net-pci,bus=pcie.8,netdev=vnet0,mac=52:54:00:f1:26:b0 : guest# netperf -H 10.26.1.81 -l 60 -C -c -t UDP_STREAM virtio_net virtio0: output.0:id 100 is not a head! Add the missed smp_rmb() in vhost_vq_avail_empty(). When tx_can_batch() returns true, it means there's still pending tx buffers. Since it might read indices, so it still can bypass the smp_rmb() in vhost_get_vq_desc(). Note that it should be safe until vq->avail_idx is changed by commit 275bf960ac697 ("vhost: better detection of available buffers"). Fixes: 275bf960ac69 ("vhost: better detection of available buffers") Cc: # v4.11+ Reported-by: Yihuang Yu Suggested-by: Will Deacon Signed-off-by: Gavin Shan Acked-by: Jason Wang Message-Id: <20240328002149.1141302-2-gshan@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 61c72e62abd4..c3f7a77d9fed 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2523,9 +2523,19 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) r = vhost_get_avail_idx(vq, &avail_idx); if (unlikely(r)) return false; - vq->avail_idx = vhost16_to_cpu(vq, avail_idx); - return vq->avail_idx == vq->last_avail_idx; + vq->avail_idx = vhost16_to_cpu(vq, avail_idx); + if (vq->avail_idx != vq->last_avail_idx) { + /* Since we have updated avail_idx, the following + * call to vhost_get_vq_desc() will read available + * ring entries. Make sure that read happens after + * the avail_idx read. + */ + smp_rmb(); + return false; + } + + return true; } EXPORT_SYMBOL_GPL(vhost_vq_avail_empty); From a2c1c0cfab05421320f65f5a2a60d607a47653a1 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 28 Mar 2024 10:21:48 +1000 Subject: [PATCH 273/553] vhost: Add smp_rmb() in vhost_enable_notify() commit df9ace7647d4123209395bb9967e998d5758c645 upstream. A smp_rmb() has been missed in vhost_enable_notify(), inspired by Will. Otherwise, it's not ensured the available ring entries pushed by guest can be observed by vhost in time, leading to stale available ring entries fetched by vhost in vhost_get_vq_desc(), as reported by Yihuang Yu on NVidia's grace-hopper (ARM64) platform. /home/gavin/sandbox/qemu.main/build/qemu-system-aarch64 \ -accel kvm -machine virt,gic-version=host -cpu host \ -smp maxcpus=1,cpus=1,sockets=1,clusters=1,cores=1,threads=1 \ -m 4096M,slots=16,maxmem=64G \ -object memory-backend-ram,id=mem0,size=4096M \ : \ -netdev tap,id=vnet0,vhost=true \ -device virtio-net-pci,bus=pcie.8,netdev=vnet0,mac=52:54:00:f1:26:b0 : guest# netperf -H 10.26.1.81 -l 60 -C -c -t UDP_STREAM virtio_net virtio0: output.0:id 100 is not a head! Add the missed smp_rmb() in vhost_enable_notify(). When it returns true, it means there's still pending tx buffers. Since it might read indices, so it still can bypass the smp_rmb() in vhost_get_vq_desc(). Note that it should be safe until vq->avail_idx is changed by commit d3bb267bbdcb ("vhost: cache avail index in vhost_enable_notify()"). Fixes: d3bb267bbdcb ("vhost: cache avail index in vhost_enable_notify()") Cc: # v5.18+ Reported-by: Yihuang Yu Suggested-by: Will Deacon Signed-off-by: Gavin Shan Acked-by: Jason Wang Message-Id: <20240328002149.1141302-3-gshan@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index c3f7a77d9fed..1b00ed5ef1cf 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2572,9 +2572,19 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) &vq->avail->idx, r); return false; } - vq->avail_idx = vhost16_to_cpu(vq, avail_idx); - return vq->avail_idx != vq->last_avail_idx; + vq->avail_idx = vhost16_to_cpu(vq, avail_idx); + if (vq->avail_idx != vq->last_avail_idx) { + /* Since we have updated avail_idx, the following + * call to vhost_get_vq_desc() will read available + * ring entries. Make sure that read happens after + * the avail_idx read. + */ + smp_rmb(); + return true; + } + + return false; } EXPORT_SYMBOL_GPL(vhost_enable_notify); From 0c182182d6d9e5bcc2156faf5e137001ccd0e14b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 5 Mar 2024 22:10:03 -0800 Subject: [PATCH 274/553] perf/x86: Fix out of range data commit dec8ced871e17eea46f097542dd074d022be4bd1 upstream. On x86 each struct cpu_hw_events maintains a table for counter assignment but it missed to update one for the deleted event in x86_pmu_del(). This can make perf_clear_dirty_counters() reset used counter if it's called before event scheduling or enabling. Then it would return out of range data which doesn't make sense. The following code can reproduce the problem. $ cat repro.c #include #include #include #include #include #include #include #include struct perf_event_attr attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, .disabled = 1, }; void *worker(void *arg) { int cpu = (long)arg; int fd1 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0); int fd2 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0); void *p; do { ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0); p = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd1, 0); ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0); ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); munmap(p, 4096); ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); } while (1); return NULL; } int main(void) { int i; int n = sysconf(_SC_NPROCESSORS_ONLN); pthread_t *th = calloc(n, sizeof(*th)); for (i = 0; i < n; i++) pthread_create(&th[i], NULL, worker, (void *)(long)i); for (i = 0; i < n; i++) pthread_join(th[i], NULL); free(th); return 0; } And you can see the out of range data using perf stat like this. Probably it'd be easier to see on a large machine. $ gcc -o repro repro.c -pthread $ ./repro & $ sudo perf stat -A -I 1000 2>&1 | awk '{ if (length($3) > 15) print }' 1.001028462 CPU6 196,719,295,683,763 cycles # 194290.996 GHz (71.54%) 1.001028462 CPU3 396,077,485,787,730 branch-misses # 15804359784.80% of all branches (71.07%) 1.001028462 CPU17 197,608,350,727,877 branch-misses # 14594186554.56% of all branches (71.22%) 2.020064073 CPU4 198,372,472,612,140 cycles # 194681.113 GHz (70.95%) 2.020064073 CPU6 199,419,277,896,696 cycles # 195720.007 GHz (70.57%) 2.020064073 CPU20 198,147,174,025,639 cycles # 194474.654 GHz (71.03%) 2.020064073 CPU20 198,421,240,580,145 stalled-cycles-frontend # 100.14% frontend cycles idle (70.93%) 3.037443155 CPU4 197,382,689,923,416 cycles # 194043.065 GHz (71.30%) 3.037443155 CPU20 196,324,797,879,414 cycles # 193003.773 GHz (71.69%) 3.037443155 CPU5 197,679,956,608,205 stalled-cycles-backend # 1315606428.66% backend cycles idle (71.19%) 3.037443155 CPU5 198,571,860,474,851 instructions # 13215422.58 insn per cycle It should move the contents in the cpuc->assign as well. Fixes: 5471eea5d3bf ("perf/x86: Reset the dirty counter to prevent the leak for an RDPMC task") Signed-off-by: Namhyung Kim Signed-off-by: Ingo Molnar Reviewed-by: Kan Liang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240306061003.1894224-1-namhyung@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 30fb4931d387..1394312b732a 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) while (++i < cpuc->n_events) { cpuc->event_list[i-1] = cpuc->event_list[i]; cpuc->event_constraint[i-1] = cpuc->event_constraint[i]; + cpuc->assign[i-1] = cpuc->assign[i]; } cpuc->event_constraint[i-1] = NULL; --cpuc->n_events; From 9c09773917fbb77dff85b433e1e89123fc5fb530 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 9 Apr 2024 10:51:05 -0700 Subject: [PATCH 275/553] x86/cpu: Actually turn off mitigations by default for SPECULATION_MITIGATIONS=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f337a6a21e2fd67eadea471e93d05dd37baaa9be upstream. Initialize cpu_mitigations to CPU_MITIGATIONS_OFF if the kernel is built with CONFIG_SPECULATION_MITIGATIONS=n, as the help text quite clearly states that disabling SPECULATION_MITIGATIONS is supposed to turn off all mitigations by default. │ If you say N, all mitigations will be disabled. You really │ should know what you are doing to say so. As is, the kernel still defaults to CPU_MITIGATIONS_AUTO, which results in some mitigations being enabled in spite of SPECULATION_MITIGATIONS=n. Fixes: f43b9876e857 ("x86/retbleed: Add fine grained Kconfig knobs") Signed-off-by: Sean Christopherson Signed-off-by: Ingo Molnar Reviewed-by: Daniel Sneddon Cc: stable@vger.kernel.org Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240409175108.1512861-2-seanjc@google.com Signed-off-by: Greg Kroah-Hartman --- kernel/cpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index e6f0101941ed..2c44dd12a158 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2788,7 +2788,8 @@ enum cpu_mitigations { }; static enum cpu_mitigations cpu_mitigations __ro_after_init = - CPU_MITIGATIONS_AUTO; + IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : + CPU_MITIGATIONS_OFF; static int __init mitigations_parse_cmdline(char *arg) { From 881b495ed26beb0b8c9dcd7aca5343c56c082c1d Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 10 Apr 2024 16:26:30 -0700 Subject: [PATCH 276/553] selftests: timers: Fix abs() warning in posix_timers test commit ed366de8ec89d4f960d66c85fc37d9de22f7bf6d upstream. Building with clang results in the following warning: posix_timers.c:69:6: warning: absolute value function 'abs' given an argument of type 'long long' but has parameter of type 'int' which may cause truncation of value [-Wabsolute-value] if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { ^ So switch to using llabs() instead. Fixes: 0bc4b0cf1570 ("selftests: add basic posix timers selftests") Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240410232637.4135564-3-jstultz@google.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/timers/posix_timers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 0ba500056e63..193a984f512c 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -66,7 +66,7 @@ static int check_diff(struct timeval start, struct timeval end) diff = end.tv_usec - start.tv_usec; diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC; - if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { + if (llabs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { printf("Diff too high: %lld..", diff); return -1; } From 22f51ddb0cc120daf65be585e2df765fe54243ba Mon Sep 17 00:00:00 2001 From: Adam Dunlap Date: Mon, 18 Mar 2024 16:09:27 -0700 Subject: [PATCH 277/553] x86/apic: Force native_apic_mem_read() to use the MOV instruction commit 5ce344beaca688f4cdea07045e0b8f03dc537e74 upstream. When done from a virtual machine, instructions that touch APIC memory must be emulated. By convention, MMIO accesses are typically performed via io.h helpers such as readl() or writeq() to simplify instruction emulation/decoding (ex: in KVM hosts and SEV guests) [0]. Currently, native_apic_mem_read() does not follow this convention, allowing the compiler to emit instructions other than the MOV instruction generated by readl(). In particular, when the kernel is compiled with clang and run as a SEV-ES or SEV-SNP guest, the compiler would emit a TESTL instruction which is not supported by the SEV-ES emulator, causing a boot failure in that environment. It is likely the same problem would happen in a TDX guest as that uses the same instruction emulator as SEV-ES. To make sure all emulators can emulate APIC memory reads via MOV, use the readl() function in native_apic_mem_read(). It is expected that any emulator would support MOV in any addressing mode as it is the most generic and is what is usually emitted currently. The TESTL instruction is emitted when native_apic_mem_read() is inlined into apic_mem_wait_icr_idle(). The emulator comes from insn_decode_mmio() in arch/x86/lib/insn-eval.c. It's not worth it to extend insn_decode_mmio() to support more instructions since, in theory, the compiler could choose to output nearly any instruction for such reads which would bloat the emulator beyond reason. [0] https://lore.kernel.org/all/20220405232939.73860-12-kirill.shutemov@linux.intel.com/ [ bp: Massage commit message, fix typos. ] Signed-off-by: Adam Dunlap Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Reviewed-by: Ard Biesheuvel Tested-by: Kevin Loughlin Cc: Link: https://lore.kernel.org/r/20240318230927.2191933-1-acdunlap@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/apic.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3216da7074ba..36ceecd40fd9 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -12,6 +12,7 @@ #include #include #include +#include #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -109,7 +110,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v) static inline u32 native_apic_mem_read(u32 reg) { - return *((volatile u32 *)(APIC_BASE + reg)); + return readl((void __iomem *)(APIC_BASE + reg)); } extern void native_apic_wait_icr_idle(void); From d447d8de840c26396272f648bfd9438e4bf4b2f5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 8 Apr 2024 09:46:01 +0200 Subject: [PATCH 278/553] irqflags: Explicitly ignore lockdep_hrtimer_exit() argument commit c1d11fc2c8320871b40730991071dd0a0b405bc8 upstream. When building with 'make W=1' but CONFIG_TRACE_IRQFLAGS=n, the unused argument to lockdep_hrtimer_exit() causes a warning: kernel/time/hrtimer.c:1655:14: error: variable 'expires_in_hardirq' set but not used [-Werror=unused-but-set-variable] This is intentional behavior, so add a cast to void to shut up the warning. Fixes: 73d20564e0dc ("hrtimer: Don't dereference the hrtimer pointer after the callback") Reported-by: kernel test robot Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Reviewed-by: Sebastian Andrzej Siewior Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240408074609.3170807-1-arnd@kernel.org Closes: https://lore.kernel.org/oe-kbuild-all/202311191229.55QXHVc6-lkp@intel.com/ Signed-off-by: Greg Kroah-Hartman --- include/linux/irqflags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 2b665c32f5fe..2e09c269bf9d 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -126,7 +126,7 @@ do { \ # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) # define lockdep_hrtimer_enter(__hrtimer) false -# define lockdep_hrtimer_exit(__context) do { } while (0) +# define lockdep_hrtimer_exit(__context) do { (void)(__context); } while (0) # define lockdep_posixtimer_enter() do { } while (0) # define lockdep_posixtimer_exit() do { } while (0) # define lockdep_irq_work_enter(__work) do { } while (0) From 0d433e40827d3a896725219f0df92f418ae28dab Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Tue, 9 Apr 2024 16:08:05 -0700 Subject: [PATCH 279/553] x86/bugs: Fix return type of spectre_bhi_state() commit 04f4230e2f86a4e961ea5466eda3db8c1762004d upstream. The definition of spectre_bhi_state() incorrectly returns a const char * const. This causes the a compiler warning when building with W=1: warning: type qualifiers ignored on function return type [-Wignored-qualifiers] 2812 | static const char * const spectre_bhi_state(void) Remove the const qualifier from the pointer. Fixes: ec9404e40e8f ("x86/bhi: Add BHI mitigation knob") Reported-by: Sean Christopherson Signed-off-by: Daniel Sneddon Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240409230806.1545822-1-daniel.sneddon@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 96bd3ee83a48..e2a357890df6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2788,7 +2788,7 @@ static char *pbrsb_eibrs_state(void) } } -static const char * const spectre_bhi_state(void) +static const char *spectre_bhi_state(void) { if (!boot_cpu_has_bug(X86_BUG_BHI)) return "; BHI: Not affected"; From 662e341e57ccbd178da8c44f9a356175fc75fcbd Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 10 Apr 2024 22:40:45 -0700 Subject: [PATCH 280/553] x86/bugs: Fix BHI documentation commit dfe648903f42296866d79f10d03f8c85c9dfba30 upstream. Fix up some inaccuracies in the BHI documentation. Fixes: ec9404e40e8f ("x86/bhi: Add BHI mitigation knob") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Reviewed-by: Nikolay Borisov Cc: Linus Torvalds Cc: Sean Christopherson Link: https://lore.kernel.org/r/8c84f7451bfe0dd08543c6082a383f390d4aa7e2.1712813475.git.jpoimboe@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 15 ++++++++------- Documentation/admin-guide/kernel-parameters.txt | 12 +++++++----- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 9edb2860a3e1..d4f260634074 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -439,11 +439,11 @@ The possible values in this file are: - System is protected by retpoline * - BHI: BHI_DIS_S - System is protected by BHI_DIS_S - * - BHI: SW loop; KVM SW loop + * - BHI: SW loop, KVM SW loop - System is protected by software clearing sequence * - BHI: Syscall hardening - Syscalls are hardened against BHI - * - BHI: Syscall hardening; KVM: SW loop + * - BHI: Syscall hardening, KVM: SW loop - System is protected from userspace attacks by syscall hardening; KVM is protected by software clearing sequence Full mitigation might require a microcode update from the CPU @@ -666,13 +666,14 @@ kernel command line. of the HW BHI control and the SW BHB clearing sequence. on - unconditionally enable. + (default) Enable the HW or SW mitigation as + needed. off - unconditionally disable. + Disable the mitigation. auto - enable if hardware mitigation - control(BHI_DIS_S) is available, otherwise - enable alternate mitigation in KVM. + Enable the HW mitigation if needed, but + *don't* enable the SW mitigation except for KVM. + The system may be vulnerable. For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b2c7b2f012e9..1ae6ceff0dd3 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3283,6 +3283,7 @@ reg_file_data_sampling=off [X86] retbleed=off [X86] spec_store_bypass_disable=off [X86,PPC] + spectre_bhi=off [X86] spectre_v2_user=off [X86] srbds=off [X86,INTEL] ssbd=force-off [ARM64] @@ -5739,11 +5740,12 @@ deployment of the HW BHI control and the SW BHB clearing sequence. - on - unconditionally enable. - off - unconditionally disable. - auto - (default) enable hardware mitigation - (BHI_DIS_S) if available, otherwise enable - alternate mitigation in KVM. + on - (default) Enable the HW or SW mitigation + as needed. + off - Disable the mitigation. + auto - Enable the HW mitigation if needed, but + *don't* enable the SW mitigation except + for KVM. The system may be vulnerable. spectre_v2= [X86] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. From b1b32586f797bbcde3e286c80ac3e450022e1591 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 10 Apr 2024 22:40:46 -0700 Subject: [PATCH 281/553] x86/bugs: Cache the value of MSR_IA32_ARCH_CAPABILITIES commit cb2db5bb04d7f778fbc1a1ea2507aab436f1bff3 upstream. There's no need to keep reading MSR_IA32_ARCH_CAPABILITIES over and over. It's even read in the BHI sysfs function which is a big no-no. Just read it once and cache it. Fixes: ec9404e40e8f ("x86/bhi: Add BHI mitigation knob") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Reviewed-by: Nikolay Borisov Cc: Linus Torvalds Cc: Sean Christopherson Link: https://lore.kernel.org/r/9592a18a814368e75f8f4b9d74d3883aa4fd1eaf.1712813475.git.jpoimboe@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e2a357890df6..dacd196b71aa 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -60,6 +60,8 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; EXPORT_SYMBOL_GPL(x86_pred_cmd); +static u64 __ro_after_init ia32_cap; + static DEFINE_MUTEX(spec_ctrl_mutex); void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; @@ -143,6 +145,8 @@ void __init cpu_select_mitigations(void) x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK; } + ia32_cap = x86_read_arch_cap_msr(); + /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); @@ -300,8 +304,6 @@ static const char * const taa_strings[] = { static void __init taa_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_TAA)) { taa_mitigation = TAA_MITIGATION_OFF; return; @@ -340,7 +342,6 @@ static void __init taa_select_mitigation(void) * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode * update is required. */ - ia32_cap = x86_read_arch_cap_msr(); if ( (ia32_cap & ARCH_CAP_MDS_NO) && !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; @@ -400,8 +401,6 @@ static const char * const mmio_strings[] = { static void __init mmio_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || cpu_mitigations_off()) { @@ -412,8 +411,6 @@ static void __init mmio_select_mitigation(void) if (mmio_mitigation == MMIO_MITIGATION_OFF) return; - ia32_cap = x86_read_arch_cap_msr(); - /* * Enable CPU buffer clear mitigation for host and VMM, if also affected * by MDS or TAA. Otherwise, enable mitigation for VMM only. @@ -507,7 +504,7 @@ static void __init rfds_select_mitigation(void) if (rfds_mitigation == RFDS_MITIGATION_OFF) return; - if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR) + if (ia32_cap & ARCH_CAP_RFDS_CLEAR) setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; @@ -658,8 +655,6 @@ void update_srbds_msr(void) static void __init srbds_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_SRBDS)) return; @@ -668,7 +663,6 @@ static void __init srbds_select_mitigation(void) * are only exposed to SRBDS when TSX is enabled or when CPU is affected * by Processor MMIO Stale Data vulnerability. */ - ia32_cap = x86_read_arch_cap_msr(); if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; @@ -812,7 +806,7 @@ static void __init gds_select_mitigation(void) /* Will verify below that mitigation _can_ be disabled */ /* No microcode */ - if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) { + if (!(ia32_cap & ARCH_CAP_GDS_CTRL)) { if (gds_mitigation == GDS_MITIGATION_FORCE) { /* * This only needs to be done on the boot CPU so do it @@ -1884,8 +1878,6 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { - u64 ia32_cap = x86_read_arch_cap_msr(); - /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. @@ -2797,7 +2789,7 @@ static const char *spectre_bhi_state(void) else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) return "; BHI: SW loop, KVM: SW loop"; else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && - !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) + !(ia32_cap & ARCH_CAP_RRSBA)) return "; BHI: Retpoline"; else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) return "; BHI: Syscall hardening, KVM: SW loop"; From dc2db3e978c5ae3895d28728c4e4a69d468a00cb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Apr 2024 09:25:36 +0200 Subject: [PATCH 282/553] x86/bugs: Rename various 'ia32_cap' variables to 'x86_arch_cap_msr' commit d0485730d2189ffe5d986d4e9e191f1e4d5ffd24 upstream. So we are using the 'ia32_cap' value in a number of places, which got its name from MSR_IA32_ARCH_CAPABILITIES MSR register. But there's very little 'IA32' about it - this isn't 32-bit only code, nor does it originate from there, it's just a historic quirk that many Intel MSR names are prefixed with IA32_. This is already clear from the helper method around the MSR: x86_read_arch_cap_msr(), which doesn't have the IA32 prefix. So rename 'ia32_cap' to 'x86_arch_cap_msr' to be consistent with its role and with the naming of the helper function. Signed-off-by: Ingo Molnar Cc: Josh Poimboeuf Cc: Nikolay Borisov Cc: Linus Torvalds Cc: Sean Christopherson Link: https://lore.kernel.org/r/9592a18a814368e75f8f4b9d74d3883aa4fd1eaf.1712813475.git.jpoimboe@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/apic.c | 6 ++--- arch/x86/kernel/cpu/bugs.c | 30 +++++++++++----------- arch/x86/kernel/cpu/common.c | 48 ++++++++++++++++++------------------ 3 files changed, 42 insertions(+), 42 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 770557110051..e1672cc77c65 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1760,11 +1760,11 @@ static int x2apic_state; static bool x2apic_hw_locked(void) { - u64 ia32_cap; + u64 x86_arch_cap_msr; u64 msr; - ia32_cap = x86_read_arch_cap_msr(); - if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) { + x86_arch_cap_msr = x86_read_arch_cap_msr(); + if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) { rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr); return (msr & LEGACY_XAPIC_DISABLED); } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index dacd196b71aa..fb2ab86d979a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -60,7 +60,7 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; EXPORT_SYMBOL_GPL(x86_pred_cmd); -static u64 __ro_after_init ia32_cap; +static u64 __ro_after_init x86_arch_cap_msr; static DEFINE_MUTEX(spec_ctrl_mutex); @@ -145,7 +145,7 @@ void __init cpu_select_mitigations(void) x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK; } - ia32_cap = x86_read_arch_cap_msr(); + x86_arch_cap_msr = x86_read_arch_cap_msr(); /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); @@ -342,8 +342,8 @@ static void __init taa_select_mitigation(void) * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode * update is required. */ - if ( (ia32_cap & ARCH_CAP_MDS_NO) && - !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) + if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) && + !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)) taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; /* @@ -433,7 +433,7 @@ static void __init mmio_select_mitigation(void) * be propagated to uncore buffers, clearing the Fill buffers on idle * is required irrespective of SMT state. */ - if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) + if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) static_branch_enable(&mds_idle_clear); /* @@ -443,10 +443,10 @@ static void __init mmio_select_mitigation(void) * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS * affected systems. */ - if ((ia32_cap & ARCH_CAP_FB_CLEAR) || + if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) || (boot_cpu_has(X86_FEATURE_MD_CLEAR) && boot_cpu_has(X86_FEATURE_FLUSH_L1D) && - !(ia32_cap & ARCH_CAP_MDS_NO))) + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO))) mmio_mitigation = MMIO_MITIGATION_VERW; else mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; @@ -504,7 +504,7 @@ static void __init rfds_select_mitigation(void) if (rfds_mitigation == RFDS_MITIGATION_OFF) return; - if (ia32_cap & ARCH_CAP_RFDS_CLEAR) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; @@ -663,7 +663,7 @@ static void __init srbds_select_mitigation(void) * are only exposed to SRBDS when TSX is enabled or when CPU is affected * by Processor MMIO Stale Data vulnerability. */ - if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && + if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) @@ -806,7 +806,7 @@ static void __init gds_select_mitigation(void) /* Will verify below that mitigation _can_ be disabled */ /* No microcode */ - if (!(ia32_cap & ARCH_CAP_GDS_CTRL)) { + if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) { if (gds_mitigation == GDS_MITIGATION_FORCE) { /* * This only needs to be done on the boot CPU so do it @@ -1518,14 +1518,14 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) /* Disable in-kernel use of non-RSB RET predictors */ static void __init spec_ctrl_disable_kernel_rrsba(void) { - u64 ia32_cap; + u64 x86_arch_cap_msr; if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) return; - ia32_cap = x86_read_arch_cap_msr(); + x86_arch_cap_msr = x86_read_arch_cap_msr(); - if (ia32_cap & ARCH_CAP_RRSBA) { + if (x86_arch_cap_msr & ARCH_CAP_RRSBA) { x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; update_spec_ctrl(x86_spec_ctrl_base); } @@ -1892,7 +1892,7 @@ static void update_mds_branch_idle(void) if (sched_smt_active()) { static_branch_enable(&mds_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || - (ia32_cap & ARCH_CAP_FBSDP_NO)) { + (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { static_branch_disable(&mds_idle_clear); } } @@ -2789,7 +2789,7 @@ static const char *spectre_bhi_state(void) else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) return "; BHI: SW loop, KVM: SW loop"; else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && - !(ia32_cap & ARCH_CAP_RRSBA)) + !(x86_arch_cap_msr & ARCH_CAP_RRSBA)) return "; BHI: Retpoline"; else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) return "; BHI: Syscall hardening, KVM: SW loop"; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 08fe77d2a3f9..f2bc651c0dcd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1308,25 +1308,25 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi u64 x86_read_arch_cap_msr(void) { - u64 ia32_cap = 0; + u64 x86_arch_cap_msr = 0; if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr); - return ia32_cap; + return x86_arch_cap_msr; } -static bool arch_cap_mmio_immune(u64 ia32_cap) +static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr) { - return (ia32_cap & ARCH_CAP_FBSDP_NO && - ia32_cap & ARCH_CAP_PSDP_NO && - ia32_cap & ARCH_CAP_SBDR_SSDP_NO); + return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO && + x86_arch_cap_msr & ARCH_CAP_PSDP_NO && + x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO); } -static bool __init vulnerable_to_rfds(u64 ia32_cap) +static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr) { /* The "immunity" bit trumps everything else: */ - if (ia32_cap & ARCH_CAP_RFDS_NO) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO) return false; /* @@ -1334,7 +1334,7 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap) * indicate that mitigation is needed because guest is running on a * vulnerable hardware or may migrate to such hardware: */ - if (ia32_cap & ARCH_CAP_RFDS_CLEAR) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) return true; /* Only consult the blacklist when there is no enumeration: */ @@ -1343,11 +1343,11 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap) static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { - u64 ia32_cap = x86_read_arch_cap_msr(); + u64 x86_arch_cap_msr = x86_read_arch_cap_msr(); /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && - !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO)) setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION)) @@ -1359,7 +1359,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V2); if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && - !(ia32_cap & ARCH_CAP_SSB_NO) && + !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); @@ -1367,15 +1367,15 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature * flag and protect from vendor-specific bugs via the whitelist. */ - if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) { + if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) { setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && - !(ia32_cap & ARCH_CAP_PBRSB_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO)) setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); } if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && - !(ia32_cap & ARCH_CAP_MDS_NO)) { + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) { setup_force_cpu_bug(X86_BUG_MDS); if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY)) setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); @@ -1394,9 +1394,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * TSX_CTRL check alone is not sufficient for cases when the microcode * update is not present or running as guest that don't get TSX_CTRL. */ - if (!(ia32_cap & ARCH_CAP_TAA_NO) && + if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) && (cpu_has(c, X86_FEATURE_RTM) || - (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) + (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))) setup_force_cpu_bug(X86_BUG_TAA); /* @@ -1422,7 +1422,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. */ - if (!arch_cap_mmio_immune(ia32_cap)) { + if (!arch_cap_mmio_immune(x86_arch_cap_msr)) { if (cpu_matches(cpu_vuln_blacklist, MMIO)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) @@ -1430,7 +1430,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) } if (!cpu_has(c, X86_FEATURE_BTC_NO)) { - if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA)) setup_force_cpu_bug(X86_BUG_RETBLEED); } @@ -1443,7 +1443,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * disabling AVX2. The only way to do this in HW is to clear XCR0[2], * which means that AVX will be disabled. */ - if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) && + if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) && boot_cpu_has(X86_FEATURE_AVX)) setup_force_cpu_bug(X86_BUG_GDS); @@ -1452,11 +1452,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SRSO); } - if (vulnerable_to_rfds(ia32_cap)) + if (vulnerable_to_rfds(x86_arch_cap_msr)) setup_force_cpu_bug(X86_BUG_RFDS); /* When virtualized, eIBRS could be hidden, assume vulnerable */ - if (!(ia32_cap & ARCH_CAP_BHI_NO) && + if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) && !cpu_matches(cpu_vuln_whitelist, NO_BHI) && (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || boot_cpu_has(X86_FEATURE_HYPERVISOR))) @@ -1466,7 +1466,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) return; /* Rogue Data Cache Load? No! */ - if (ia32_cap & ARCH_CAP_RDCL_NO) + if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO) return; setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); From 4b0b5d621e89e15e1fce6fcd7ef1b041cb94e405 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 10 Apr 2024 22:40:47 -0700 Subject: [PATCH 283/553] x86/bugs: Fix BHI handling of RRSBA commit 1cea8a280dfd1016148a3820676f2f03e3f5b898 upstream. The ARCH_CAP_RRSBA check isn't correct: RRSBA may have already been disabled by the Spectre v2 mitigation (or can otherwise be disabled by the BHI mitigation itself if needed). In that case retpolines are fine. Fixes: ec9404e40e8f ("x86/bhi: Add BHI mitigation knob") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: Sean Christopherson Link: https://lore.kernel.org/r/6f56f13da34a0834b69163467449be7f58f253dc.1712813475.git.jpoimboe@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fb2ab86d979a..7e7b4cd5304c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1515,20 +1515,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_RETPOLINE; } +static bool __ro_after_init rrsba_disabled; + /* Disable in-kernel use of non-RSB RET predictors */ static void __init spec_ctrl_disable_kernel_rrsba(void) { - u64 x86_arch_cap_msr; + if (rrsba_disabled) + return; + + if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) { + rrsba_disabled = true; + return; + } if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) return; - x86_arch_cap_msr = x86_read_arch_cap_msr(); - - if (x86_arch_cap_msr & ARCH_CAP_RRSBA) { - x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - update_spec_ctrl(x86_spec_ctrl_base); - } + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + rrsba_disabled = true; } static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) @@ -1629,9 +1634,11 @@ static void __init bhi_select_mitigation(void) return; /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ - if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && - !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) - return; + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { + spec_ctrl_disable_kernel_rrsba(); + if (rrsba_disabled) + return; + } if (spec_ctrl_bhi_dis()) return; @@ -2788,8 +2795,7 @@ static const char *spectre_bhi_state(void) return "; BHI: BHI_DIS_S"; else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) return "; BHI: SW loop, KVM: SW loop"; - else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && - !(x86_arch_cap_msr & ARCH_CAP_RRSBA)) + else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && rrsba_disabled) return "; BHI: Retpoline"; else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) return "; BHI: Syscall hardening, KVM: SW loop"; From d737d8cd8e64e34c429e1ac5d3502159aeed3446 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 10 Apr 2024 22:40:48 -0700 Subject: [PATCH 284/553] x86/bugs: Clarify that syscall hardening isn't a BHI mitigation commit 5f882f3b0a8bf0788d5a0ee44b1191de5319bb8a upstream. While syscall hardening helps prevent some BHI attacks, there's still other low-hanging fruit remaining. Don't classify it as a mitigation and make it clear that the system may still be vulnerable if it doesn't have a HW or SW mitigation enabled. Fixes: ec9404e40e8f ("x86/bhi: Add BHI mitigation knob") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: Sean Christopherson Link: https://lore.kernel.org/r/b5951dae3fdee7f1520d5136a27be3bdfe95f88b.1712813475.git.jpoimboe@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 11 +++++------ Documentation/admin-guide/kernel-parameters.txt | 3 +-- arch/x86/kernel/cpu/bugs.c | 6 +++--- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index d4f260634074..081f28900898 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -441,10 +441,10 @@ The possible values in this file are: - System is protected by BHI_DIS_S * - BHI: SW loop, KVM SW loop - System is protected by software clearing sequence - * - BHI: Syscall hardening - - Syscalls are hardened against BHI - * - BHI: Syscall hardening, KVM: SW loop - - System is protected from userspace attacks by syscall hardening; KVM is protected by software clearing sequence + * - BHI: Vulnerable + - System is vulnerable to BHI + * - BHI: Vulnerable, KVM: SW loop + - System is vulnerable; KVM is protected by software clearing sequence Full mitigation might require a microcode update from the CPU vendor. When the necessary microcode is not available, the kernel will @@ -661,8 +661,7 @@ kernel command line. spectre_bhi= [X86] Control mitigation of Branch History Injection - (BHI) vulnerability. Syscalls are hardened against BHI - regardless of this setting. This setting affects the deployment + (BHI) vulnerability. This setting affects the deployment of the HW BHI control and the SW BHB clearing sequence. on diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 1ae6ceff0dd3..e93ecb388656 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5735,8 +5735,7 @@ See Documentation/admin-guide/laptops/sonypi.rst spectre_bhi= [X86] Control mitigation of Branch History Injection - (BHI) vulnerability. Syscalls are hardened against BHI - reglardless of this setting. This setting affects the + (BHI) vulnerability. This setting affects the deployment of the HW BHI control and the SW BHB clearing sequence. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7e7b4cd5304c..d3e1f0614f09 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2797,10 +2797,10 @@ static const char *spectre_bhi_state(void) return "; BHI: SW loop, KVM: SW loop"; else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && rrsba_disabled) return "; BHI: Retpoline"; - else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) - return "; BHI: Syscall hardening, KVM: SW loop"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) + return "; BHI: Vulnerable, KVM: SW loop"; - return "; BHI: Vulnerable (Syscall hardening enabled)"; + return "; BHI: Vulnerable"; } static ssize_t spectre_v2_show_state(char *buf) From 7f18a0df76217683500979dcb0a2faa7f3eccfa5 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 10 Apr 2024 22:40:50 -0700 Subject: [PATCH 285/553] x86/bugs: Remove CONFIG_BHI_MITIGATION_AUTO and spectre_bhi=auto commit 36d4fe147c870f6d3f6602befd7ef44393a1c87a upstream. Unlike most other mitigations' "auto" options, spectre_bhi=auto only mitigates newer systems, which is confusing and not particularly useful. Remove it. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Reviewed-by: Nikolay Borisov Cc: Sean Christopherson Cc: Linus Torvalds Link: https://lore.kernel.org/r/412e9dc87971b622bbbaf64740ebc1f140bff343.1712813475.git.jpoimboe@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 4 ---- Documentation/admin-guide/kernel-parameters.txt | 3 --- arch/x86/Kconfig | 4 ---- arch/x86/kernel/cpu/bugs.c | 10 +--------- 4 files changed, 1 insertion(+), 20 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 081f28900898..e0a1be97fa75 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -669,10 +669,6 @@ kernel command line. needed. off Disable the mitigation. - auto - Enable the HW mitigation if needed, but - *don't* enable the SW mitigation except for KVM. - The system may be vulnerable. For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index e93ecb388656..aebbe2981241 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5742,9 +5742,6 @@ on - (default) Enable the HW or SW mitigation as needed. off - Disable the mitigation. - auto - Enable the HW mitigation if needed, but - *don't* enable the SW mitigation except - for KVM. The system may be vulnerable. spectre_v2= [X86] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ba815ac474a1..be5b3a48c8cd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2581,10 +2581,6 @@ config SPECTRE_BHI_OFF bool "off" help Equivalent to setting spectre_bhi=off command line parameter. -config SPECTRE_BHI_AUTO - bool "auto" - help - Equivalent to setting spectre_bhi=auto command line parameter. endchoice diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d3e1f0614f09..0b0fea179b0d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1602,13 +1602,10 @@ static bool __init spec_ctrl_bhi_dis(void) enum bhi_mitigations { BHI_MITIGATION_OFF, BHI_MITIGATION_ON, - BHI_MITIGATION_AUTO, }; static enum bhi_mitigations bhi_mitigation __ro_after_init = - IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON : - IS_ENABLED(CONFIG_SPECTRE_BHI_OFF) ? BHI_MITIGATION_OFF : - BHI_MITIGATION_AUTO; + IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF; static int __init spectre_bhi_parse_cmdline(char *str) { @@ -1619,8 +1616,6 @@ static int __init spectre_bhi_parse_cmdline(char *str) bhi_mitigation = BHI_MITIGATION_OFF; else if (!strcmp(str, "on")) bhi_mitigation = BHI_MITIGATION_ON; - else if (!strcmp(str, "auto")) - bhi_mitigation = BHI_MITIGATION_AUTO; else pr_err("Ignoring unknown spectre_bhi option (%s)", str); @@ -1650,9 +1645,6 @@ static void __init bhi_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n"); - if (bhi_mitigation == BHI_MITIGATION_AUTO) - return; - /* Mitigate syscalls when the mitigation is forced =on */ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); From d844df110084ef8bd950a52194865f3f63b561ca Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 10 Apr 2024 22:40:51 -0700 Subject: [PATCH 286/553] x86/bugs: Replace CONFIG_SPECTRE_BHI_{ON,OFF} with CONFIG_MITIGATION_SPECTRE_BHI commit 4f511739c54b549061993b53fc0380f48dfca23b upstream. For consistency with the other CONFIG_MITIGATION_* options, replace the CONFIG_SPECTRE_BHI_{ON,OFF} options with a single CONFIG_MITIGATION_SPECTRE_BHI option. [ mingo: Fix ] Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Sean Christopherson Cc: Linus Torvalds Cc: Nikolay Borisov Link: https://lore.kernel.org/r/3833812ea63e7fdbe36bf8b932e63f70d18e2a2a.1712813475.git.jpoimboe@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 17 +++-------------- arch/x86/kernel/cpu/bugs.c | 2 +- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index be5b3a48c8cd..5f7a86f240db 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2563,27 +2563,16 @@ config MITIGATION_RFDS stored in floating point, vector and integer registers. See also -choice - prompt "Clear branch history" +config MITIGATION_SPECTRE_BHI + bool "Mitigate Spectre-BHB (Branch History Injection)" depends on CPU_SUP_INTEL - default SPECTRE_BHI_ON + default y help Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks where the branch history buffer is poisoned to speculatively steer indirect branches. See -config SPECTRE_BHI_ON - bool "on" - help - Equivalent to setting spectre_bhi=on command line parameter. -config SPECTRE_BHI_OFF - bool "off" - help - Equivalent to setting spectre_bhi=off command line parameter. - -endchoice - endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0b0fea179b0d..6d69123de366 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1605,7 +1605,7 @@ enum bhi_mitigations { }; static enum bhi_mitigations bhi_mitigation __ro_after_init = - IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF; + IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF; static int __init spectre_bhi_parse_cmdline(char *str) { From 2bc1796f8eeba648c06d68ece1ea86974363f6d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 2 Apr 2024 18:50:03 +0300 Subject: [PATCH 287/553] drm/i915/cdclk: Fix CDCLK programming order when pipes are active MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7b1f6b5aaec0f849e19c3e99d4eea75876853cdd upstream. Currently we always reprogram CDCLK from the intel_set_cdclk_pre_plane_update() when using squash/crawl. The code only works correctly for the cd2x update or full modeset cases, and it was simply never updated to deal with squash/crawl. If the CDCLK frequency is increasing we must reprogram it before we do anything else that might depend on the new higher frequency, and conversely we must not decrease the frequency until everything that might still depend on the old higher frequency has been dealt with. Since cdclk_state->pipe is only relevant when doing a cd2x update we can't use it to determine the correct sequence during squash/crawl. To that end introduce cdclk_state->disable_pipes which simply indicates that we must perform the update while the pipes are disable (ie. during intel_set_cdclk_pre_plane_update()). Otherwise we use the same old vs. new CDCLK frequency comparsiong as for cd2x updates. The only remaining problem case is when the voltage_level needs to increase due to a DDI port, but the CDCLK frequency is decreasing (and not all pipes are being disabled). The current approach will not bump the voltage level up until after the port has already been enabled, which is too late. But we'll take care of that case separately. v2: Don't break the "must disable pipes case" v3: Keep the on stack 'pipe' for future use Cc: stable@vger.kernel.org Fixes: d62686ba3b54 ("drm/i915/adl_p: CDCLK crawl support for ADL") Reviewed-by: Uma Shankar Reviewed-by: Gustavo Sousa Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240402155016.13733-2-ville.syrjala@linux.intel.com (cherry picked from commit 3aecee90ac12a351905f12dda7643d5b0676d6ca) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_cdclk.c | 7 +++++-- drivers/gpu/drm/i915/display/intel_cdclk.h | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 25dcdde5feb6..5147718f38d6 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2152,7 +2152,7 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state) &new_cdclk_state->actual)) return; - if (pipe == INVALID_PIPE || + if (new_cdclk_state->disable_pipes || old_cdclk_state->actual.cdclk <= new_cdclk_state->actual.cdclk) { drm_WARN_ON(&dev_priv->drm, !new_cdclk_state->base.changed); @@ -2181,7 +2181,7 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state) &new_cdclk_state->actual)) return; - if (pipe != INVALID_PIPE && + if (!new_cdclk_state->disable_pipes && old_cdclk_state->actual.cdclk > new_cdclk_state->actual.cdclk) { drm_WARN_ON(&dev_priv->drm, !new_cdclk_state->base.changed); @@ -2634,6 +2634,7 @@ static struct intel_global_state *intel_cdclk_duplicate_state(struct intel_globa return NULL; cdclk_state->pipe = INVALID_PIPE; + cdclk_state->disable_pipes = false; return &cdclk_state->base; } @@ -2793,6 +2794,8 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) if (ret) return ret; + new_cdclk_state->disable_pipes = true; + drm_dbg_kms(&dev_priv->drm, "Modeset required for cdclk change\n"); } diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index c674879a84a5..c4b3e5938bb3 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -51,6 +51,9 @@ struct intel_cdclk_state { /* bitmask of active pipes */ u8 active_pipes; + + /* update cdclk with pipes disabled */ + bool disable_pipes; }; int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state); From 29bd4d05f2c5b9e84935874d1c0c2c97a8d91a12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Apr 2024 00:34:27 +0300 Subject: [PATCH 288/553] drm/i915: Disable port sync when bigjoiner is used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0653d501409eeb9f1deb7e4c12e4d0d2c9f1cba1 upstream. The current modeset sequence can't handle port sync and bigjoiner at the same time. Refuse port sync when bigjoiner is needed, at least until we fix the modeset sequence. v2: Add a FIXME (Vandite) Cc: stable@vger.kernel.org Tested-by: Vidya Srinivas Reviewed-by: Vandita Kulkarni Link: https://patchwork.freedesktop.org/patch/msgid/20240404213441.17637-4-ville.syrjala@linux.intel.com Signed-off-by: Ville Syrjälä (cherry picked from commit b37e1347b991459c38c56ec2476087854a4f720b) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_ddi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 706e2d956801..76277eb3eb25 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3683,7 +3683,12 @@ static bool m_n_equal(const struct intel_link_m_n *m_n_1, static bool crtcs_port_sync_compatible(const struct intel_crtc_state *crtc_state1, const struct intel_crtc_state *crtc_state2) { + /* + * FIXME the modeset sequence is currently wrong and + * can't deal with bigjoiner + port sync at the same time. + */ return crtc_state1->hw.active && crtc_state2->hw.active && + !crtc_state1->bigjoiner_pipes && !crtc_state2->bigjoiner_pipes && crtc_state1->output_types == crtc_state2->output_types && crtc_state1->output_format == crtc_state2->output_format && crtc_state1->lane_count == crtc_state2->lane_count && From 7cc89dbcb8eabd01ae264dc76a5939ea96c63c90 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 14 Feb 2024 17:55:54 +0530 Subject: [PATCH 289/553] drm/amdgpu: Reset dGPU if suspend got aborted commit 8b2be55f4d6c1099d7f629b0ed7535a5be788c83 upstream. For SOC21 ASICs, there is an issue in re-enabling PM features if a suspend got aborted. In such cases, reset the device during resume phase. This is a workaround till a proper solution is finalized. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Reviewed-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/soc21.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 56af7b5abac1..da11f78826ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -780,10 +780,35 @@ static int soc21_common_suspend(void *handle) return soc21_common_hw_fini(adev); } +static bool soc21_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg1, sol_reg2; + + /* Will reset for the following suspend abort cases. + * 1) Only reset dGPU side. + * 2) S3 suspend got aborted and TOS is active. + */ + if (!(adev->flags & AMD_IS_APU) && adev->in_s3 && + !adev->suspend_complete) { + sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); + msleep(100); + sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); + + return (sol_reg1 != sol_reg2); + } + + return false; +} + static int soc21_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (soc21_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend aborted, resetting..."); + soc21_asic_reset(adev); + } + return soc21_common_hw_init(adev); } From 90819b1830bc26dfacedae28fce72e1f21133b38 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 23 Mar 2024 20:46:53 -0400 Subject: [PATCH 290/553] drm/amdgpu: always force full reset for SOC21 commit 65ff8092e4802f96d87d3d7cde146961f5228265 upstream. There are cases where soft reset seems to succeed, but does not, so always use mode1/2 for now. Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/soc21.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index da11f78826ea..56cc59629d96 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -460,10 +460,8 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev) { switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 0): - return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC); case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): - return false; default: return true; } From 724fbc7c0cb88729ec914a3d778f466e5ea3dbb7 Mon Sep 17 00:00:00 2001 From: Fudongwang Date: Tue, 26 Mar 2024 16:03:16 +0800 Subject: [PATCH 291/553] drm/amd/display: fix disable otg wa logic in DCN316 commit cf79814cb0bf5749b9f0db53ca231aa540c02768 upstream. [Why] Wrong logic cause screen corruption. [How] Port logic from DCN35/314. Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas Acked-by: Hamza Mahfooz Signed-off-by: Fudongwang Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- .../dc/clk_mgr/dcn316/dcn316_clk_mgr.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index 187f5b27fdc8..29d2003fb712 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -112,20 +112,25 @@ static int dcn316_get_active_display_cnt_wa( return display_count; } -static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable) +static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, + bool safe_to_lower, bool disable) { struct dc *dc = clk_mgr_base->ctx->dc; int i; for (i = 0; i < dc->res_pool->pipe_count; ++i) { - struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe = safe_to_lower + ? &context->res_ctx.pipe_ctx[i] + : &dc->current_state->res_ctx.pipe_ctx[i]; if (pipe->top_pipe || pipe->prev_odm_pipe) continue; - if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL || - dc_is_virtual_signal(pipe->stream->signal))) { + if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || + !pipe->stream->link_enc)) { if (disable) { - pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); + if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) + pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); + reset_sync_context_for_pipe(dc, context, i); } else pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg); @@ -222,11 +227,11 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base, } if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - dcn316_disable_otg_wa(clk_mgr_base, context, true); + dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; dcn316_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); - dcn316_disable_otg_wa(clk_mgr_base, context, false); + dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; } From 6741e066ec7633450d3186946035c1f80c4226b8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 17 Apr 2024 11:18:29 +0200 Subject: [PATCH 292/553] Linux 6.1.87 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240415141946.165870434@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Pavel Machek (CIP) Tested-by: Kelsey Steele Tested-by: Mark Brown Tested-by: Ron Economos Tested-by: Yann Sionneau Tested-by: Jon Hunter Tested-by: Mateusz Jończyk Tested-by: Linux Kernel Functional Testing Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index baddd8ed8186..e46a57006a34 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 86 +SUBLEVEL = 87 EXTRAVERSION = NAME = Curry Ramen From 1e053399681ccc87f8dc73af4ffe09b66d9fef95 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sun, 7 Apr 2024 22:28:02 -0400 Subject: [PATCH 293/553] drm/vmwgfx: Enable DMA mappings with SEV [ Upstream commit 4c08f01934ab67d1d283d5cbaa52b923abcfe4cd ] Enable DMA mappings in vmwgfx after TTM has been fixed in commit 3bf3710e3718 ("drm/ttm: Add a generic TTM memcpy move for page-based iomem") This enables full guest-backed memory support and in particular allows usage of screen targets as the presentation mechanism. Signed-off-by: Zack Rusin Reported-by: Ye Li Tested-by: Ye Li Fixes: 3b0d6458c705 ("drm/vmwgfx: Refuse DMA operation when SEV encryption is active") Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Cc: # v6.6+ Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20240408022802.358641-1-zack.rusin@broadcom.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 9d7a1b710f48..53f63ad656a4 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -663,11 +663,12 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) [vmw_dma_map_populate] = "Caching DMA mappings.", [vmw_dma_map_bind] = "Giving up DMA mappings early."}; - /* TTM currently doesn't fully support SEV encryption. */ - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) - return -EINVAL; - - if (vmw_force_coherent) + /* + * When running with SEV we always want dma mappings, because + * otherwise ttm tt pool pages will bounce through swiotlb running + * out of available space. + */ + if (vmw_force_coherent || cc_platform_has(CC_ATTR_MEM_ENCRYPT)) dev_priv->map_mode = vmw_dma_alloc_coherent; else if (vmw_restrict_iommu) dev_priv->map_mode = vmw_dma_map_bind; From 87f8aac740f17ee140ff90dfb592954596492aec Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 20 Feb 2023 09:06:53 +0800 Subject: [PATCH 294/553] drm/amdgpu: fix incorrect active rb bitmap for gfx11 [ Upstream commit f9c35f4fffc6cb5bbb23f546f48c045aef012518 ] GFX v11 changes RB_BACKEND_DISABLE related registers from per SA to global ones. The approach to query active rb bitmap needs to be changed accordingly. Query per SE setting returns wrong active RB bitmap especially in the case when some of SA are disabled. With the new approach, driver will generate the active rb bitmap based on active SA bitmap and global active RB bitmap. Signed-off-by: Hawking Zhang Reviewed-by: Likun Gao Signed-off-by: Alex Deucher Stable-dep-of: bbca7f414ae9 ("drm/amdgpu: fix incorrect number of active RBs for gfx11") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 80 +++++++++++++++++--------- 1 file changed, 53 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 66a6f7a37ebc..ec40f88da00c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1531,44 +1531,70 @@ static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); } +static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev) +{ + u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; + + gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE); + gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, + CC_GC_SA_UNIT_DISABLE, + SA_DISABLE); + gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE); + gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, + GC_USER_SA_UNIT_DISABLE, + SA_DISABLE); + sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se * + adev->gfx.config.max_shader_engines); + + return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); +} + static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) { - u32 data, mask; + u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; + u32 rb_mask; - data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); - data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); + gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); + gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, + CC_RB_BACKEND_DISABLE, + BACKEND_DISABLE); + gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); + gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, + GC_USER_RB_BACKEND_DISABLE, + BACKEND_DISABLE); + rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se * + adev->gfx.config.max_shader_engines); - data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; - data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; - - mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / - adev->gfx.config.max_sh_per_se); - - return (~data) & mask; + return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); } static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) { - int i, j; - u32 data; - u32 active_rbs = 0; - u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / - adev->gfx.config.max_sh_per_se; + u32 rb_bitmap_width_per_sa; + u32 max_sa; + u32 active_sa_bitmap; + u32 global_active_rb_bitmap; + u32 active_rb_bitmap = 0; + u32 i; - mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { - for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff); - data = gfx_v11_0_get_rb_active_bitmap(adev); - active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * - rb_bitmap_width_per_sh); - } + /* query sa bitmap from SA_UNIT_DISABLE registers */ + active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev); + /* query rb bitmap from RB_BACKEND_DISABLE registers */ + global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev); + + /* generate active rb bitmap according to active sa bitmap */ + max_sa = adev->gfx.config.max_shader_engines * + adev->gfx.config.max_sh_per_se; + rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se; + for (i = 0; i < max_sa; i++) { + if (active_sa_bitmap & (1 << i)) + active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); } - gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - mutex_unlock(&adev->grbm_idx_mutex); - adev->gfx.config.backend_enable_mask = active_rbs; - adev->gfx.config.num_rbs = hweight32(active_rbs); + active_rb_bitmap |= global_active_rb_bitmap; + adev->gfx.config.backend_enable_mask = active_rb_bitmap; + adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); } #define DEFAULT_SH_MEM_BASES (0x6000) From 01c227f5a72b08f7da4fa06c67a60555d21c17f9 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Wed, 3 Apr 2024 17:28:44 +0800 Subject: [PATCH 295/553] drm/amdgpu: fix incorrect number of active RBs for gfx11 [ Upstream commit bbca7f414ae9a12ea231cdbafd79c607e3337ea8 ] The RB bitmap should be global active RB bitmap & active RB bitmap based on active SA. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index ec40f88da00c..5a5787bfbce7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1592,7 +1592,7 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); } - active_rb_bitmap |= global_active_rb_bitmap; + active_rb_bitmap &= global_active_rb_bitmap; adev->gfx.config.backend_enable_mask = active_rb_bitmap; adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); } From 121a83be215ba5e53c4dbe5ce696927f91eefee1 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 21 Mar 2024 13:49:43 -0400 Subject: [PATCH 296/553] drm/amd/display: Do not recursively call manual trigger programming [ Upstream commit 953927587f37b731abdeabe46ad44a3b3ec67a52 ] [WHY&HOW] We should not be recursively calling the manual trigger programming function when FAMS is not in use. Cc: stable@vger.kernel.org Reviewed-by: Alvin Lee Acked-by: Hamza Mahfooz Signed-off-by: Dillon Varone Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c index a974f86e718a..37c645a882dd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c @@ -216,9 +216,6 @@ static void optc32_setup_manual_trigger(struct timing_generator *optc) OTG_V_TOTAL_MAX_SEL, 1, OTG_FORCE_LOCK_ON_EVENT, 0, OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ - - // Setup manual flow control for EOF via TRIG_A - optc->funcs->setup_manual_trigger(optc); } } From cbe7b911e0c9139347b6ec5ba088283a699cb95f Mon Sep 17 00:00:00 2001 From: Alexey Izbyshev Date: Fri, 5 Apr 2024 15:55:51 +0300 Subject: [PATCH 297/553] io_uring: Fix io_cqring_wait() not restoring sigmask on get_timespec64() failure Commit 978e5c19dfefc271e5550efba92fcef0d3f62864 upstream. This bug was introduced in commit 950e79dd7313 ("io_uring: minor io_cqring_wait() optimization"), which was made in preparation for adc8682ec690 ("io_uring: Add support for napi_busy_poll"). The latter got reverted in cb3182167325 ("Revert "io_uring: Add support for napi_busy_poll""), so simply undo the former as well. Cc: stable@vger.kernel.org Fixes: 950e79dd7313 ("io_uring: minor io_cqring_wait() optimization") Signed-off-by: Alexey Izbyshev Link: https://lore.kernel.org/r/20240405125551.237142-1-izbyshev@ispras.ru Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 68f1b6f8699a..958c3b619020 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2426,6 +2426,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return 0; } while (ret > 0); + if (uts) { + struct timespec64 ts; + + if (get_timespec64(&ts, uts)) + return -EFAULT; + timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); + } + if (sig) { #ifdef CONFIG_COMPAT if (in_compat_syscall()) @@ -2439,14 +2447,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return ret; } - if (uts) { - struct timespec64 ts; - - if (get_timespec64(&ts, uts)) - return -EFAULT; - timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); - } - init_waitqueue_func_entry(&iowq.wq, io_wake_function); iowq.wq.private = current; INIT_LIST_HEAD(&iowq.wq.entry); From 45eec81eaca624f9278f8be9e62f8ae1b78e46a0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 10 Apr 2024 12:38:13 -0400 Subject: [PATCH 298/553] SUNRPC: Fix rpcgss_context trace event acceptor field commit a4833e3abae132d613ce7da0e0c9a9465d1681fa upstream. The rpcgss_context trace event acceptor field is a dynamically sized string that records the "data" parameter. But this parameter is also dependent on the "len" field to determine the size of the data. It needs to use __string_len() helper macro where the length can be passed in. It also incorrectly uses strncpy() to save it instead of __assign_str(). As these macros can change, it is not wise to open code them in trace events. As of commit c759e609030c ("tracing: Remove __assign_str_len()"), __assign_str() can be used for both __string() and __string_len() fields. Before that commit, __assign_str_len() is required to be used. This needs to be noted for backporting. (In actuality, commit c1fa617caeb0 ("tracing: Rework __assign_str() and __string() to not duplicate getting the string") is the commit that makes __string_str_len() obsolete). Cc: stable@vger.kernel.org Fixes: 0c77668ddb4e ("SUNRPC: Introduce trace points in rpc_auth_gss.ko") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- include/trace/events/rpcgss.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index 3f121eed369e..894d9fc8bd94 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -587,7 +587,7 @@ TRACE_EVENT(rpcgss_context, __field(unsigned int, timeout) __field(u32, window_size) __field(int, len) - __string(acceptor, data) + __string_len(acceptor, data, len) ), TP_fast_assign( @@ -596,7 +596,7 @@ TRACE_EVENT(rpcgss_context, __entry->timeout = timeout; __entry->window_size = window_size; __entry->len = len; - strncpy(__get_str(acceptor), data, len); + __assign_str(acceptor, data); ), TP_printk("win_size=%u expiry=%lu now=%lu timeout=%u acceptor=%.*s", From bcdd9ce78d66c601d60dd23888af4f4ee9d743bd Mon Sep 17 00:00:00 2001 From: Yuanhe Shu Date: Mon, 26 Feb 2024 11:18:16 +0800 Subject: [PATCH 299/553] selftests/ftrace: Limit length in subsystem-enable tests commit 1a4ea83a6e67f1415a1f17c1af5e9c814c882bb5 upstream. While sched* events being traced and sched* events continuously happen, "[xx] event tracing - enable/disable with subsystem level files" would not stop as on some slower systems it seems to take forever. Select the first 100 lines of output would be enough to judge whether there are more than 3 types of sched events. Fixes: 815b18ea66d6 ("ftracetest: Add basic event tracing test cases") Cc: stable@vger.kernel.org Signed-off-by: Yuanhe Shu Acked-by: Masami Hiramatsu (Google) Acked-by: Steven Rostedt (Google) Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- .../selftests/ftrace/test.d/event/subsystem-enable.tc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc index b1ede6249866..b7c8f29c09a9 100644 --- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc @@ -18,7 +18,7 @@ echo 'sched:*' > set_event yield -count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then fail "at least fork, exec and exit events should be recorded" fi @@ -29,7 +29,7 @@ echo 1 > events/sched/enable yield -count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then fail "at least fork, exec and exit events should be recorded" fi @@ -40,7 +40,7 @@ echo 0 > events/sched/enable yield -count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -ne 0 ]; then fail "any of scheduler events should not be recorded" fi From 07b37f227c8daa27e68f57b1c691fab34a06731e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 17 Apr 2024 13:38:29 +0200 Subject: [PATCH 300/553] random: handle creditable entropy from atomic process context commit e871abcda3b67d0820b4182ebe93435624e9c6a4 upstream. The entropy accounting changes a static key when the RNG has initialized, since it only ever initializes once. Static key changes, however, cannot be made from atomic context, so depending on where the last creditable entropy comes from, the static key change might need to be deferred to a worker. Previously the code used the execute_in_process_context() helper function, which accounts for whether or not the caller is in_interrupt(). However, that doesn't account for the case where the caller is actually in process context but is holding a spinlock. This turned out to be the case with input_handle_event() in drivers/input/input.c contributing entropy: [] die+0xa8/0x2fc [] bug_handler+0x44/0xec [] brk_handler+0x90/0x144 [] do_debug_exception+0xa0/0x148 [] el1_dbg+0x60/0x7c [] el1h_64_sync_handler+0x38/0x90 [] el1h_64_sync+0x64/0x6c [] __might_resched+0x1fc/0x2e8 [] __might_sleep+0x44/0x7c [] cpus_read_lock+0x1c/0xec [] static_key_enable+0x14/0x38 [] crng_set_ready+0x14/0x28 [] execute_in_process_context+0xb8/0xf8 [] _credit_init_bits+0x118/0x1dc [] add_timer_randomness+0x264/0x270 [] add_input_randomness+0x38/0x48 [] input_handle_event+0x2b8/0x490 [] input_event+0x6c/0x98 According to Guoyong, it's not really possible to refactor the various drivers to never hold a spinlock there. And in_atomic() isn't reliable. So, rather than trying to be too fancy, just punt the change in the static key to a workqueue always. There's basically no drawback of doing this, as the code already needed to account for the static key not changing immediately, and given that it's just an optimization, there's not exactly a hurry to change the static key right away, so deferal is fine. Reported-by: Guoyong Wang Cc: stable@vger.kernel.org Fixes: f5bda35fba61 ("random: use static branch for crng_ready()") Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 5d1c8e1c99b5..fd57eb372d49 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -683,7 +683,7 @@ static void extract_entropy(void *buf, size_t len) static void __cold _credit_init_bits(size_t bits) { - static struct execute_work set_ready; + static DECLARE_WORK(set_ready, crng_set_ready); unsigned int new, orig, add; unsigned long flags; @@ -699,8 +699,8 @@ static void __cold _credit_init_bits(size_t bits) if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) { crng_reseed(); /* Sets crng_init to CRNG_READY under base_crng.lock. */ - if (static_key_initialized) - execute_in_process_context(crng_set_ready, &set_ready); + if (static_key_initialized && system_unbound_wq) + queue_work(system_unbound_wq, &set_ready); wake_up_interruptible(&crng_init_wait); kill_fasync(&fasync, SIGIO, POLL_IN); pr_notice("crng init done\n"); @@ -870,8 +870,8 @@ void __init random_init(void) /* * If we were initialized by the cpu or bootloader before jump labels - * are initialized, then we should enable the static branch here, where - * it's guaranteed that jump labels have been initialized. + * or workqueues are initialized, then we should enable the static + * branch here, where it's guaranteed that these have been initialized. */ if (!static_branch_likely(&crng_is_ready) && crng_init >= CRNG_READY) crng_set_ready(NULL); From add0ff34863e973fb3a968e3620bb302a5990f4b Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Wed, 17 Apr 2024 10:55:13 +0200 Subject: [PATCH 301/553] net: usb: ax88179_178a: avoid writing the mac address before first reading commit 56f78615bcb1c3ba58a5d9911bad3d9185cf141b upstream. After the commit d2689b6a86b9 ("net: usb: ax88179_178a: avoid two consecutive device resets"), reset operation, in which the default mac address from the device is read, is not executed from bind operation and the random address, that is pregenerated just in case, is direclty written the first time in the device, so the default one from the device is not even read. This writing is not dangerous because is volatile and the default mac address is not missed. In order to avoid this and keep the simplification to have only one reset and reduce the delays, restore the reset from bind operation and remove the reset that is commanded from open operation. The behavior is the same but everything is ready for usbnet_probe. Tested with ASIX AX88179 USB Gigabit Ethernet devices. Restore the old behavior for the rest of possible devices because I don't have the hardware to test. cc: stable@vger.kernel.org # 6.6+ Fixes: d2689b6a86b9 ("net: usb: ax88179_178a: avoid two consecutive device resets") Reported-by: Jarkko Palviainen Signed-off-by: Jose Ignacio Tornos Martinez Link: https://lore.kernel.org/r/20240417085524.219532-1-jtornosm@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/ax88179_178a.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index e0e9b4c53cb0..3078511f7608 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1317,6 +1317,8 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) netif_set_tso_max_size(dev->net, 16384); + ax88179_reset(dev); + return 0; } @@ -1695,7 +1697,6 @@ static const struct driver_info ax88179_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1708,7 +1709,6 @@ static const struct driver_info ax88178a_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, From 704edc9252f4988ae1ad7dafa23d0db8d90d7190 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 5 Mar 2024 15:35:06 +0100 Subject: [PATCH 302/553] drm/i915/vma: Fix UAF on destroy against retire race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0e45882ca829b26b915162e8e86dbb1095768e9e upstream. Object debugging tools were sporadically reporting illegal attempts to free a still active i915 VMA object when parking a GT believed to be idle. [161.359441] ODEBUG: free active (active state 0) object: ffff88811643b958 object type: i915_active hint: __i915_vma_active+0x0/0x50 [i915] [161.360082] WARNING: CPU: 5 PID: 276 at lib/debugobjects.c:514 debug_print_object+0x80/0xb0 ... [161.360304] CPU: 5 PID: 276 Comm: kworker/5:2 Not tainted 6.5.0-rc1-CI_DRM_13375-g003f860e5577+ #1 [161.360314] Hardware name: Intel Corporation Rocket Lake Client Platform/RocketLake S UDIMM 6L RVP, BIOS RKLSFWI1.R00.3173.A03.2204210138 04/21/2022 [161.360322] Workqueue: i915-unordered __intel_wakeref_put_work [i915] [161.360592] RIP: 0010:debug_print_object+0x80/0xb0 ... [161.361347] debug_object_free+0xeb/0x110 [161.361362] i915_active_fini+0x14/0x130 [i915] [161.361866] release_references+0xfe/0x1f0 [i915] [161.362543] i915_vma_parked+0x1db/0x380 [i915] [161.363129] __gt_park+0x121/0x230 [i915] [161.363515] ____intel_wakeref_put_last+0x1f/0x70 [i915] That has been tracked down to be happening when another thread is deactivating the VMA inside __active_retire() helper, after the VMA's active counter has been already decremented to 0, but before deactivation of the VMA's object is reported to the object debugging tool. We could prevent from that race by serializing i915_active_fini() with __active_retire() via ref->tree_lock, but that wouldn't stop the VMA from being used, e.g. from __i915_vma_retire() called at the end of __active_retire(), after that VMA has been already freed by a concurrent i915_vma_destroy() on return from the i915_active_fini(). Then, we should rather fix the issue at the VMA level, not in i915_active. Since __i915_vma_parked() is called from __gt_park() on last put of the GT's wakeref, the issue could be addressed by holding the GT wakeref long enough for __active_retire() to complete before that wakeref is released and the GT parked. I believe the issue was introduced by commit d93939730347 ("drm/i915: Remove the vma refcount") which moved a call to i915_active_fini() from a dropped i915_vma_release(), called on last put of the removed VMA kref, to i915_vma_parked() processing path called on last put of a GT wakeref. However, its visibility to the object debugging tool was suppressed by a bug in i915_active that was fixed two weeks later with commit e92eb246feb9 ("drm/i915/active: Fix missing debug object activation"). A VMA associated with a request doesn't acquire a GT wakeref by itself. Instead, it depends on a wakeref held directly by the request's active intel_context for a GT associated with its VM, and indirectly on that intel_context's engine wakeref if the engine belongs to the same GT as the VMA's VM. Those wakerefs are released asynchronously to VMA deactivation. Fix the issue by getting a wakeref for the VMA's GT when activating it, and putting that wakeref only after the VMA is deactivated. However, exclude global GTT from that processing path, otherwise the GPU never goes idle. Since __i915_vma_retire() may be called from atomic contexts, use async variant of wakeref put. Also, to avoid circular locking dependency, take care of acquiring the wakeref before VM mutex when both are needed. v7: Add inline comments with justifications for: - using untracked variants of intel_gt_pm_get/put() (Nirmoy), - using async variant of _put(), - not getting the wakeref in case of a global GTT, - always getting the first wakeref outside vm->mutex. v6: Since __i915_vma_active/retire() callbacks are not serialized, storing a wakeref tracking handle inside struct i915_vma is not safe, and there is no other good place for that. Use untracked variants of intel_gt_pm_get/put_async(). v5: Replace "tile" with "GT" across commit description (Rodrigo), - avoid mentioning multi-GT case in commit description (Rodrigo), - explain why we need to take a temporary wakeref unconditionally inside i915_vma_pin_ww() (Rodrigo). v4: Refresh on top of commit 5e4e06e4087e ("drm/i915: Track gt pm wakerefs") (Andi), - for more easy backporting, split out removal of former insufficient workarounds and move them to separate patches (Nirmoy). - clean up commit message and description a bit. v3: Identify root cause more precisely, and a commit to blame, - identify and drop former workarounds, - update commit message and description. v2: Get the wakeref before VM mutex to avoid circular locking dependency, - drop questionable Fixes: tag. Fixes: d93939730347 ("drm/i915: Remove the vma refcount") Closes: https://gitlab.freedesktop.org/drm/intel/issues/8875 Signed-off-by: Janusz Krzysztofik Cc: Thomas Hellström Cc: Nirmoy Das Cc: Andi Shyti Cc: Rodrigo Vivi Cc: stable@vger.kernel.org # v5.19+ Reviewed-by: Nirmoy Das Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240305143747.335367-6-janusz.krzysztofik@linux.intel.com (cherry picked from commit f3c71b2ded5c4367144a810ef25f998fd1d6c381) Signed-off-by: Rodrigo Vivi Signed-off-by: Janusz Krzysztofik Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_vma.c | 42 +++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index c8ad8f37e5cf..58a03da16a10 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -32,6 +32,7 @@ #include "gt/intel_engine.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" #include "gt/intel_gt_requests.h" #include "i915_drv.h" @@ -98,12 +99,34 @@ static inline struct i915_vma *active_to_vma(struct i915_active *ref) static int __i915_vma_active(struct i915_active *ref) { - return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT; + struct i915_vma *vma = active_to_vma(ref); + + if (!i915_vma_tryget(vma)) + return -ENOENT; + + /* + * Exclude global GTT VMA from holding a GT wakeref + * while active, otherwise GPU never goes idle. + */ + if (!i915_vma_is_ggtt(vma)) + intel_gt_pm_get(vma->vm->gt); + + return 0; } static void __i915_vma_retire(struct i915_active *ref) { - i915_vma_put(active_to_vma(ref)); + struct i915_vma *vma = active_to_vma(ref); + + if (!i915_vma_is_ggtt(vma)) { + /* + * Since we can be called from atomic contexts, + * use an async variant of intel_gt_pm_put(). + */ + intel_gt_pm_put_async(vma->vm->gt); + } + + i915_vma_put(vma); } static struct i915_vma * @@ -1365,7 +1388,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, struct i915_vma_work *work = NULL; struct dma_fence *moving = NULL; struct i915_vma_resource *vma_res = NULL; - intel_wakeref_t wakeref = 0; + intel_wakeref_t wakeref; unsigned int bound; int err; @@ -1385,8 +1408,14 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (err) return err; - if (flags & PIN_GLOBAL) - wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); + /* + * In case of a global GTT, we must hold a runtime-pm wakeref + * while global PTEs are updated. In other cases, we hold + * the rpm reference while the VMA is active. Since runtime + * resume may require allocations, which are forbidden inside + * vm->mutex, get the first rpm wakeref outside of the mutex. + */ + wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); if (flags & vma->vm->bind_async_flags) { /* lock VM */ @@ -1522,8 +1551,7 @@ err_fence: if (work) dma_fence_work_commit_imm(&work->base); err_rpm: - if (wakeref) - intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); + intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); if (moving) dma_fence_put(moving); From f5603f9e13ad2fdacdfea57d303de3b0cdb3cb6f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:07 +0200 Subject: [PATCH 303/553] x86/efi: Drop EFI stub .bss from .data section [ Commit 5f51c5d0e905608ba7be126737f7c84a793ae1aa upstream ] Now that the EFI stub always zero inits its BSS section upon entry, there is no longer a need to place the BSS symbols carried by the stub into the .data section. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230912090051.4014114-18-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/vmlinux.lds.S | 1 - drivers/firmware/efi/libstub/Makefile | 7 ------- 2 files changed, 8 deletions(-) diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index 112b2375d021..32892e81bf61 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -46,7 +46,6 @@ SECTIONS _data = . ; *(.data) *(.data.*) - *(.bss.efistub) _edata = . ; } . = ALIGN(L1_CACHE_BYTES); diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 473ef18421db..748781c25787 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -102,13 +102,6 @@ lib-y := $(patsubst %.o,%.stub.o,$(lib-y)) # https://bugs.llvm.org/show_bug.cgi?id=46480 STUBCOPY_FLAGS-y += --remove-section=.note.gnu.property -# -# For x86, bootloaders like systemd-boot or grub-efi do not zero-initialize the -# .bss section, so the .bss section of the EFI stub needs to be included in the -# .data section of the compressed kernel to ensure initialization. Rename the -# .bss section here so it's easy to pick out in the linker script. -# -STUBCOPY_FLAGS-$(CONFIG_X86) += --rename-section .bss=.bss.efistub,load,alloc STUBCOPY_RELOC-$(CONFIG_X86_32) := R_386_32 STUBCOPY_RELOC-$(CONFIG_X86_64) := R_X86_64_64 From f46e0e9fbef59927f0926a427bd841ede177837a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:08 +0200 Subject: [PATCH 304/553] x86/efi: Disregard setup header of loaded image [ Commit 7e50262229faad0c7b8c54477cd1c883f31cc4a7 upstream ] The native EFI entrypoint does not take a struct boot_params from the loader, but instead, it constructs one from scratch, using the setup header data placed at the start of the image. This setup header is placed in a way that permits legacy loaders to manipulate the contents (i.e., to pass the kernel command line or the address and size of an initial ramdisk), but EFI boot does not use it in that way - it only copies the contents that were placed there at build time, but EFI loaders will not (and should not) manipulate the setup header to configure the boot. (Commit 63bf28ceb3ebbe76 "efi: x86: Wipe setup_data on pure EFI boot" deals with some of the fallout of using setup_data in a way that breaks EFI boot.) Given that none of the non-zero values that are copied from the setup header into the EFI stub's struct boot_params are relevant to the boot now that the EFI stub no longer enters via the legacy decompressor, the copy can be omitted altogether. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230912090051.4014114-19-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/x86-stub.c | 46 ++++--------------------- 1 file changed, 6 insertions(+), 40 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index dc50dda40239..c592ecd40dab 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -426,9 +426,8 @@ void __noreturn efi_stub_entry(efi_handle_t handle, efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg) { - struct boot_params *boot_params; - struct setup_header *hdr; - void *image_base; + static struct boot_params boot_params __page_aligned_bss; + struct setup_header *hdr = &boot_params.hdr; efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; int options_size = 0; efi_status_t status; @@ -449,30 +448,9 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, efi_exit(handle, status); } - image_base = efi_table_attr(image, image_base); - - status = efi_allocate_pages(sizeof(struct boot_params), - (unsigned long *)&boot_params, ULONG_MAX); - if (status != EFI_SUCCESS) { - efi_err("Failed to allocate lowmem for boot params\n"); - efi_exit(handle, status); - } - - memset(boot_params, 0x0, sizeof(struct boot_params)); - - hdr = &boot_params->hdr; - - /* Copy the setup header from the second sector to boot_params */ - memcpy(&hdr->jump, image_base + 512, - sizeof(struct setup_header) - offsetof(struct setup_header, jump)); - - /* - * Fill out some of the header fields ourselves because the - * EFI firmware loader doesn't load the first sector. - */ + /* Assign the setup_header fields that the kernel actually cares about */ hdr->root_flags = 1; hdr->vid_mode = 0xffff; - hdr->boot_flag = 0xAA55; hdr->type_of_loader = 0x21; @@ -481,25 +459,13 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, if (!cmdline_ptr) goto fail; - efi_set_u64_split((unsigned long)cmdline_ptr, - &hdr->cmd_line_ptr, &boot_params->ext_cmd_line_ptr); + efi_set_u64_split((unsigned long)cmdline_ptr, &hdr->cmd_line_ptr, + &boot_params.ext_cmd_line_ptr); - hdr->ramdisk_image = 0; - hdr->ramdisk_size = 0; - - /* - * Disregard any setup data that was provided by the bootloader: - * setup_data could be pointing anywhere, and we have no way of - * authenticating or validating the payload. - */ - hdr->setup_data = 0; - - efi_stub_entry(handle, sys_table_arg, boot_params); + efi_stub_entry(handle, sys_table_arg, &boot_params); /* not reached */ fail: - efi_free(sizeof(struct boot_params), (unsigned long)boot_params); - efi_exit(handle, status); } From 1800c9628ece13571935d59ce502391c0f86868c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:09 +0200 Subject: [PATCH 305/553] x86/efistub: Reinstate soft limit for initrd loading [ Commit decd347c2a75d32984beb8807d470b763a53b542 upstream ] Commit 8117961d98fb2 ("x86/efi: Disregard setup header of loaded image") dropped the memcopy of the image's setup header into the boot_params struct provided to the core kernel, on the basis that EFI boot does not need it and should rely only on a single protocol to interface with the boot chain. It is also a prerequisite for being able to increase the section alignment to 4k, which is needed to enable memory protections when running in the boot services. So only the setup_header fields that matter to the core kernel are populated explicitly, and everything else is ignored. One thing was overlooked, though: the initrd_addr_max field in the setup_header is not used by the core kernel, but it is used by the EFI stub itself when it loads the initrd, where its default value of INT_MAX is used as the soft limit for memory allocation. This means that, in the old situation, the initrd was virtually always loaded in the lower 2G of memory, but now, due to initrd_addr_max being 0x0, the initrd may end up anywhere in memory. This should not be an issue principle, as most systems can deal with this fine. However, it does appear to tickle some problems in older UEFI implementations, where the memory ends up being corrupted, resulting in errors when unpacking the initramfs. So set the initrd_addr_max field to INT_MAX like it was before. Fixes: 8117961d98fb2 ("x86/efi: Disregard setup header of loaded image") Reported-by: Radek Podgorny Closes: https://lore.kernel.org/all/a99a831a-8ad5-4cb0-bff9-be637311f771@podgorny.cz Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/x86-stub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index c592ecd40dab..1f5edcb6339a 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -453,6 +453,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, hdr->vid_mode = 0xffff; hdr->type_of_loader = 0x21; + hdr->initrd_addr_max = INT_MAX; /* Convert unicode cmdline to ascii */ cmdline_ptr = efi_convert_cmdline(image, &options_size); From 0db16d1dce52faa2129c49b43d9b0edd9930d08f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:10 +0200 Subject: [PATCH 306/553] x86/efi: Drop alignment flags from PE section headers [ Commit bfab35f552ab3dd6d017165bf9de1d1d20f198cc upstream ] The section header flags for alignment are documented in the PE/COFF spec as being applicable to PE object files only, not to PE executables such as the Linux bzImage, so let's drop them from the PE header. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230912090051.4014114-20-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index d31982509654..38b611eb1a3c 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -208,8 +208,7 @@ section_table: .word 0 # NumberOfLineNumbers .long IMAGE_SCN_CNT_CODE | \ IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_EXECUTE | \ - IMAGE_SCN_ALIGN_16BYTES # Characteristics + IMAGE_SCN_MEM_EXECUTE # Characteristics # # The EFI application loader requires a relocation section @@ -229,8 +228,7 @@ section_table: .word 0 # NumberOfLineNumbers .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_DISCARDABLE | \ - IMAGE_SCN_ALIGN_1BYTES # Characteristics + IMAGE_SCN_MEM_DISCARDABLE # Characteristics #ifdef CONFIG_EFI_MIXED # @@ -248,8 +246,7 @@ section_table: .word 0 # NumberOfLineNumbers .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_DISCARDABLE | \ - IMAGE_SCN_ALIGN_1BYTES # Characteristics + IMAGE_SCN_MEM_DISCARDABLE # Characteristics #endif # @@ -270,8 +267,7 @@ section_table: .word 0 # NumberOfLineNumbers .long IMAGE_SCN_CNT_CODE | \ IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_EXECUTE | \ - IMAGE_SCN_ALIGN_16BYTES # Characteristics + IMAGE_SCN_MEM_EXECUTE # Characteristics .set section_count, (. - section_table) / 40 #endif /* CONFIG_EFI_STUB */ From f31f521ad27e3e476a58c07bf0825c4441317dba Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:11 +0200 Subject: [PATCH 307/553] x86/boot: Remove the 'bugger off' message [ Commit 768171d7ebbce005210e1cf8456f043304805c15 upstream ] Ancient (pre-2003) x86 kernels could boot from a floppy disk straight from the BIOS, using a small real mode boot stub at the start of the image where the BIOS would expect the boot record (or boot block) to appear. Due to its limitations (kernel size < 1 MiB, no support for IDE, USB or El Torito floppy emulation), this support was dropped, and a Linux aware bootloader is now always required to boot the kernel from a legacy BIOS. To smoothen this transition, the boot stub was not removed entirely, but replaced with one that just prints an error message telling the user to install a bootloader. As it is unlikely that anyone doing direct floppy boot with such an ancient kernel is going to upgrade to v6.5+ and expect that this boot method still works, printing this message is kind of pointless, and so it should be possible to remove the logic that emits it. Let's free up this space so it can be used to expand the PE header in a subsequent patch. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Acked-by: H. Peter Anvin (Intel) Link: https://lore.kernel.org/r/20230912090051.4014114-21-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 49 ------------------------------------------ arch/x86/boot/setup.ld | 7 +++--- 2 files changed, 4 insertions(+), 52 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 38b611eb1a3c..b8d241e57b49 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -38,63 +38,14 @@ SYSSEG = 0x1000 /* historical load address >> 4 */ .code16 .section ".bstext", "ax" - - .global bootsect_start -bootsect_start: #ifdef CONFIG_EFI_STUB # "MZ", MS-DOS header .word MZ_MAGIC -#endif - - # Normalize the start address - ljmp $BOOTSEG, $start2 - -start2: - movw %cs, %ax - movw %ax, %ds - movw %ax, %es - movw %ax, %ss - xorw %sp, %sp - sti - cld - - movw $bugger_off_msg, %si - -msg_loop: - lodsb - andb %al, %al - jz bs_die - movb $0xe, %ah - movw $7, %bx - int $0x10 - jmp msg_loop - -bs_die: - # Allow the user to press a key, then reboot - xorw %ax, %ax - int $0x16 - int $0x19 - - # int 0x19 should never return. In case it does anyway, - # invoke the BIOS reset code... - ljmp $0xf000,$0xfff0 - -#ifdef CONFIG_EFI_STUB .org 0x3c # # Offset to the PE header. # .long pe_header -#endif /* CONFIG_EFI_STUB */ - - .section ".bsdata", "a" -bugger_off_msg: - .ascii "Use a boot loader.\r\n" - .ascii "\n" - .ascii "Remove disk and press any key to reboot...\r\n" - .byte 0 - -#ifdef CONFIG_EFI_STUB pe_header: .long PE_MAGIC diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 49546c247ae2..b11c45b9e51e 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -10,10 +10,11 @@ ENTRY(_start) SECTIONS { . = 0; - .bstext : { *(.bstext) } - .bsdata : { *(.bsdata) } + .bstext : { + *(.bstext) + . = 495; + } =0xffffffff - . = 495; .header : { *(.header) } .entrytext : { *(.entrytext) } .inittext : { *(.inittext) } From 5c3e92ad493dd46e0b2345825924d68f5364e93b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:12 +0200 Subject: [PATCH 308/553] x86/boot: Omit compression buffer from PE/COFF image memory footprint [ Commit 8eace5b3555606e684739bef5bcdfcfe68235257 upstream ] Now that the EFI stub decompresses the kernel and hands over to the decompressed image directly, there is no longer a need to provide a decompression buffer as part of the .BSS allocation of the PE/COFF image. It also means the PE/COFF image can be loaded anywhere in memory, and setting the preferred image base is unnecessary. So drop the handling of this from the header and from the build tool. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230912090051.4014114-22-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 6 ++--- arch/x86/boot/tools/build.c | 50 +++++-------------------------------- 2 files changed, 8 insertions(+), 48 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b8d241e57b49..98dd4c36ccca 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -89,12 +89,10 @@ optional_header: #endif extra_header_fields: - # PE specification requires ImageBase to be 64k aligned - .set image_base, (LOAD_PHYSICAL_ADDR + 0xffff) & ~0xffff #ifdef CONFIG_X86_32 - .long image_base # ImageBase + .long 0 # ImageBase #else - .quad image_base # ImageBase + .quad 0 # ImageBase #endif .long 0x20 # SectionAlignment .long 0x20 # FileAlignment diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index bd247692b701..0354c223e354 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -65,7 +65,6 @@ static unsigned long efi_pe_entry; static unsigned long efi32_pe_entry; static unsigned long kernel_info; static unsigned long startup_64; -static unsigned long _ehead; static unsigned long _end; /*----------------------------------------------------------------------*/ @@ -229,27 +228,14 @@ static void update_pecoff_setup_and_reloc(unsigned int size) #endif } -static void update_pecoff_text(unsigned int text_start, unsigned int file_sz, - unsigned int init_sz) +static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) { unsigned int pe_header; unsigned int text_sz = file_sz - text_start; - unsigned int bss_sz = init_sz - file_sz; + unsigned int bss_sz = _end - text_sz; pe_header = get_unaligned_le32(&buf[0x3c]); - /* - * The PE/COFF loader may load the image at an address which is - * misaligned with respect to the kernel_alignment field in the setup - * header. - * - * In order to avoid relocating the kernel to correct the misalignment, - * add slack to allow the buffer to be aligned within the declared size - * of the image. - */ - bss_sz += CONFIG_PHYSICAL_ALIGN; - init_sz += CONFIG_PHYSICAL_ALIGN; - /* * Size of code: Subtract the size of the first sector (512 bytes) * which includes the header. @@ -257,7 +243,7 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz, put_unaligned_le32(file_sz - 512 + bss_sz, &buf[pe_header + 0x1c]); /* Size of image */ - put_unaligned_le32(init_sz, &buf[pe_header + 0x50]); + put_unaligned_le32(file_sz + bss_sz, &buf[pe_header + 0x50]); /* * Address of entry point for PE/COFF executable @@ -308,8 +294,7 @@ static void efi_stub_entry_update(void) static inline void update_pecoff_setup_and_reloc(unsigned int size) {} static inline void update_pecoff_text(unsigned int text_start, - unsigned int file_sz, - unsigned int init_sz) {} + unsigned int file_sz) {} static inline void efi_stub_defaults(void) {} static inline void efi_stub_entry_update(void) {} @@ -360,7 +345,6 @@ static void parse_zoffset(char *fname) PARSE_ZOFS(p, efi32_pe_entry); PARSE_ZOFS(p, kernel_info); PARSE_ZOFS(p, startup_64); - PARSE_ZOFS(p, _ehead); PARSE_ZOFS(p, _end); p = strchr(p, '\n'); @@ -371,7 +355,7 @@ static void parse_zoffset(char *fname) int main(int argc, char ** argv) { - unsigned int i, sz, setup_sectors, init_sz; + unsigned int i, sz, setup_sectors; int c; u32 sys_size; struct stat sb; @@ -442,31 +426,9 @@ int main(int argc, char ** argv) buf[0x1f1] = setup_sectors-1; put_unaligned_le32(sys_size, &buf[0x1f4]); - init_sz = get_unaligned_le32(&buf[0x260]); -#ifdef CONFIG_EFI_STUB - /* - * The decompression buffer will start at ImageBase. When relocating - * the compressed kernel to its end, we must ensure that the head - * section does not get overwritten. The head section occupies - * [i, i + _ehead), and the destination is [init_sz - _end, init_sz). - * - * At present these should never overlap, because 'i' is at most 32k - * because of SETUP_SECT_MAX, '_ehead' is less than 1k, and the - * calculation of INIT_SIZE in boot/header.S ensures that - * 'init_sz - _end' is at least 64k. - * - * For future-proofing, increase init_sz if necessary. - */ - - if (init_sz - _end < i + _ehead) { - init_sz = (i + _ehead + _end + 4095) & ~4095; - put_unaligned_le32(init_sz, &buf[0x260]); - } -#endif - update_pecoff_text(setup_sectors * 512, i + (sys_size * 16), init_sz); + update_pecoff_text(setup_sectors * 512, i + (sys_size * 16)); efi_stub_entry_update(); - /* Update kernel_info offset. */ put_unaligned_le32(kernel_info, &buf[0x268]); From 33d38d9b35e98993b11ec607d8a65570cf246d81 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:13 +0200 Subject: [PATCH 309/553] x86/boot: Drop redundant code setting the root device [ Commit 7448e8e5d15a3c4df649bf6d6d460f78396f7e1e upstream ] The root device defaults to 0,0 and is no longer configurable at build time [0], so there is no need for the build tool to ever write to this field. [0] 079f85e624189292 ("x86, build: Do not set the root_dev field in bzImage") This change has no impact on the resulting bzImage binary. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230912090051.4014114-23-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 2 +- arch/x86/boot/tools/build.c | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 98dd4c36ccca..f63bf3ec6869 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -235,7 +235,7 @@ root_flags: .word ROOT_RDONLY syssize: .long 0 /* Filled in by build.c */ ram_size: .word 0 /* Obsolete */ vid_mode: .word SVGA_MODE -root_dev: .word 0 /* Filled in by build.c */ +root_dev: .word 0 /* Default to major/minor 0/0 */ boot_flag: .word 0xAA55 # offset 512, entry point diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 0354c223e354..efa4e9c7d713 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -40,10 +40,6 @@ typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; -#define DEFAULT_MAJOR_ROOT 0 -#define DEFAULT_MINOR_ROOT 0 -#define DEFAULT_ROOT_DEV (DEFAULT_MAJOR_ROOT << 8 | DEFAULT_MINOR_ROOT) - /* Minimal number of setup sectors */ #define SETUP_SECT_MIN 5 #define SETUP_SECT_MAX 64 @@ -399,9 +395,6 @@ int main(int argc, char ** argv) update_pecoff_setup_and_reloc(i); - /* Set the default root device */ - put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); - /* Open and stat the kernel file */ fd = open(argv[2], O_RDONLY); if (fd < 0) From 67b8dc50890bd1b2a87cc8636672c6186193b17d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:14 +0200 Subject: [PATCH 310/553] x86/boot: Drop references to startup_64 [ Commit b618d31f112bea3d2daea19190d63e567f32a4db upstream ] The x86 boot image generation tool assign a default value to startup_64 and subsequently parses the actual value from zoffset.h but it never actually uses the value anywhere. So remove this code. This change has no impact on the resulting bzImage binary. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230912090051.4014114-25-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/Makefile | 2 +- arch/x86/boot/tools/build.c | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 9e38ffaadb5d..10ea28469788 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -91,7 +91,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|efi32_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|efi32_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index efa4e9c7d713..10b0207a6b18 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -60,7 +60,6 @@ static unsigned long efi64_stub_entry; static unsigned long efi_pe_entry; static unsigned long efi32_pe_entry; static unsigned long kernel_info; -static unsigned long startup_64; static unsigned long _end; /*----------------------------------------------------------------------*/ @@ -264,7 +263,6 @@ static void efi_stub_defaults(void) efi_pe_entry = 0x10; #else efi_pe_entry = 0x210; - startup_64 = 0x200; #endif } @@ -340,7 +338,6 @@ static void parse_zoffset(char *fname) PARSE_ZOFS(p, efi_pe_entry); PARSE_ZOFS(p, efi32_pe_entry); PARSE_ZOFS(p, kernel_info); - PARSE_ZOFS(p, startup_64); PARSE_ZOFS(p, _end); p = strchr(p, '\n'); From e1380c923ccd7f68327372b329dc7c83428405fa Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:15 +0200 Subject: [PATCH 311/553] x86/boot: Grab kernel_info offset from zoffset header directly [ Commit 2e765c02dcbfc2a8a4527c621a84b9502f6b9bd2 upstream ] Instead of parsing zoffset.h and poking the kernel_info offset value into the header from the build tool, just grab the value directly in the asm file that describes this header. This change has no impact on the resulting bzImage binary. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915171623.655440-11-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 2 +- arch/x86/boot/tools/build.c | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index f63bf3ec6869..becf39d8115c 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -525,7 +525,7 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr init_size: .long INIT_SIZE # kernel initialization size handover_offset: .long 0 # Filled in by build.c -kernel_info_offset: .long 0 # Filled in by build.c +kernel_info_offset: .long ZO_kernel_info # End of setup header ##################################################### diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 10b0207a6b18..14ef13fe7ab0 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -59,7 +59,6 @@ static unsigned long efi32_stub_entry; static unsigned long efi64_stub_entry; static unsigned long efi_pe_entry; static unsigned long efi32_pe_entry; -static unsigned long kernel_info; static unsigned long _end; /*----------------------------------------------------------------------*/ @@ -337,7 +336,6 @@ static void parse_zoffset(char *fname) PARSE_ZOFS(p, efi64_stub_entry); PARSE_ZOFS(p, efi_pe_entry); PARSE_ZOFS(p, efi32_pe_entry); - PARSE_ZOFS(p, kernel_info); PARSE_ZOFS(p, _end); p = strchr(p, '\n'); @@ -419,8 +417,6 @@ int main(int argc, char ** argv) update_pecoff_text(setup_sectors * 512, i + (sys_size * 16)); efi_stub_entry_update(); - /* Update kernel_info offset. */ - put_unaligned_le32(kernel_info, &buf[0x268]); crc = partial_crc32(buf, i, crc); if (fwrite(buf, 1, i, dest) != i) From f9d68334dd9913f4eb7b49b298473e4d60f4fce3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:16 +0200 Subject: [PATCH 312/553] x86/boot: Set EFI handover offset directly in header asm [ Commit eac956345f99dda3d68f4ae6cf7b494105e54780 upstream ] The offsets of the EFI handover entrypoints are available to the assembler when constructing the header, so there is no need to set them from the build tool afterwards. This change has no impact on the resulting bzImage binary. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915171623.655440-12-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 18 +++++++++++++++++- arch/x86/boot/tools/build.c | 24 ------------------------ 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index becf39d8115c..34ab46b891e3 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -523,8 +523,24 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr # define INIT_SIZE VO_INIT_SIZE #endif + .macro __handover_offset +#ifndef CONFIG_EFI_HANDOVER_PROTOCOL + .long 0 +#elif !defined(CONFIG_X86_64) + .long ZO_efi32_stub_entry +#else + /* Yes, this is really how we defined it :( */ + .long ZO_efi64_stub_entry - 0x200 +#ifdef CONFIG_EFI_MIXED + .if ZO_efi32_stub_entry != ZO_efi64_stub_entry - 0x200 + .error "32-bit and 64-bit EFI entry points do not match" + .endif +#endif +#endif + .endm + init_size: .long INIT_SIZE # kernel initialization size -handover_offset: .long 0 # Filled in by build.c +handover_offset: __handover_offset kernel_info_offset: .long ZO_kernel_info # End of setup header ##################################################### diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 14ef13fe7ab0..069497543164 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -55,8 +55,6 @@ u8 buf[SETUP_SECT_MAX*512]; #define PECOFF_COMPAT_RESERVE 0x0 #endif -static unsigned long efi32_stub_entry; -static unsigned long efi64_stub_entry; static unsigned long efi_pe_entry; static unsigned long efi32_pe_entry; static unsigned long _end; @@ -265,31 +263,12 @@ static void efi_stub_defaults(void) #endif } -static void efi_stub_entry_update(void) -{ - unsigned long addr = efi32_stub_entry; - -#ifdef CONFIG_EFI_HANDOVER_PROTOCOL -#ifdef CONFIG_X86_64 - /* Yes, this is really how we defined it :( */ - addr = efi64_stub_entry - 0x200; -#endif - -#ifdef CONFIG_EFI_MIXED - if (efi32_stub_entry != addr) - die("32-bit and 64-bit EFI entry points do not match\n"); -#endif -#endif - put_unaligned_le32(addr, &buf[0x264]); -} - #else static inline void update_pecoff_setup_and_reloc(unsigned int size) {} static inline void update_pecoff_text(unsigned int text_start, unsigned int file_sz) {} static inline void efi_stub_defaults(void) {} -static inline void efi_stub_entry_update(void) {} static inline int reserve_pecoff_reloc_section(int c) { @@ -332,8 +311,6 @@ static void parse_zoffset(char *fname) p = (char *)buf; while (p && *p) { - PARSE_ZOFS(p, efi32_stub_entry); - PARSE_ZOFS(p, efi64_stub_entry); PARSE_ZOFS(p, efi_pe_entry); PARSE_ZOFS(p, efi32_pe_entry); PARSE_ZOFS(p, _end); @@ -416,7 +393,6 @@ int main(int argc, char ** argv) update_pecoff_text(setup_sectors * 512, i + (sys_size * 16)); - efi_stub_entry_update(); crc = partial_crc32(buf, i, crc); if (fwrite(buf, 1, i, dest) != i) From 1c754c6ec978354afa44977a8183c0656815cf03 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:17 +0200 Subject: [PATCH 313/553] x86/boot: Define setup size in linker script [ Commit 093ab258e3fb1d1d3afdfd4a69403d44ce90e360 upstream ] The setup block contains the real mode startup code that is used when booting from a legacy BIOS, along with the boot_params/setup_data that is used by legacy x86 bootloaders to pass the command line and initial ramdisk parameters, among other things. The setup block also contains the PE/COFF header of the entire combined image, which includes the compressed kernel image, the decompressor and the EFI stub. This PE header describes the layout of the executable image in memory, and currently, the fact that the setup block precedes it makes it rather fiddly to get the right values into the right place in the final image. Let's make things a bit easier by defining the setup_size in the linker script so it can be referenced from the asm code directly, rather than having to rely on the build tool to calculate it. For the time being, add 64 bytes of fixed padding for the .reloc and .compat sections - this will be removed in a subsequent patch after the PE/COFF header has been reorganized. This change has no impact on the resulting bzImage binary when configured with CONFIG_EFI_MIXED=y. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915171623.655440-13-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 2 +- arch/x86/boot/setup.ld | 4 ++++ arch/x86/boot/tools/build.c | 6 ------ 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 34ab46b891e3..6dddf469ca60 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -230,7 +230,7 @@ sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */ .globl hdr hdr: -setup_sects: .byte 0 /* Filled in by build.c */ + .byte setup_sects - 1 root_flags: .word ROOT_RDONLY syssize: .long 0 /* Filled in by build.c */ ram_size: .word 0 /* Obsolete */ diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index b11c45b9e51e..9bd5c1ada599 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -39,6 +39,10 @@ SECTIONS .signature : { setup_sig = .; LONG(0x5a5aaa55) + + /* Reserve some extra space for the reloc and compat sections */ + setup_size = ALIGN(ABSOLUTE(.) + 64, 512); + setup_sects = ABSOLUTE(setup_size / 512); } diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 069497543164..745d64b6d930 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -48,12 +48,7 @@ typedef unsigned int u32; u8 buf[SETUP_SECT_MAX*512]; #define PECOFF_RELOC_RESERVE 0x20 - -#ifdef CONFIG_EFI_MIXED #define PECOFF_COMPAT_RESERVE 0x20 -#else -#define PECOFF_COMPAT_RESERVE 0x0 -#endif static unsigned long efi_pe_entry; static unsigned long efi32_pe_entry; @@ -388,7 +383,6 @@ int main(int argc, char ** argv) #endif /* Patch the setup code with the appropriate size parameters */ - buf[0x1f1] = setup_sectors-1; put_unaligned_le32(sys_size, &buf[0x1f4]); update_pecoff_text(setup_sectors * 512, i + (sys_size * 16)); From 1fa0a2147590930f50b9098a9d81f60c6d096d9e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:18 +0200 Subject: [PATCH 314/553] x86/boot: Derive file size from _edata symbol [ Commit aeb92067f6ae994b541d7f9752fe54ed3d108bcc upstream ] Tweak the linker script so that the value of _edata represents the decompressor binary's file size rounded up to the appropriate alignment. This removes the need to calculate it in the build tool, and will make it easier to refer to the file size from the header directly in subsequent changes to the PE header layout. While adding _edata to the sed regex that parses the compressed vmlinux's symbol list, tweak the regex a bit for conciseness. This change has no impact on the resulting bzImage binary when configured with CONFIG_EFI_STUB=y. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915171623.655440-14-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/Makefile | 2 +- arch/x86/boot/compressed/vmlinux.lds.S | 3 +++ arch/x86/boot/header.S | 2 +- arch/x86/boot/tools/build.c | 30 ++++++-------------------- 4 files changed, 12 insertions(+), 25 deletions(-) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 10ea28469788..b92e00836f69 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -91,7 +91,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|efi32_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|efi.._stub_entry\|efi\(32\)\?_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|_edata\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index 32892e81bf61..aa7e9848cd8f 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -46,6 +46,9 @@ SECTIONS _data = . ; *(.data) *(.data.*) + + /* Add 4 bytes of extra space for a CRC-32 checksum */ + . = ALIGN(. + 4, 0x20); _edata = . ; } . = ALIGN(L1_CACHE_BYTES); diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 6dddf469ca60..b43b55130855 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -232,7 +232,7 @@ sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */ hdr: .byte setup_sects - 1 root_flags: .word ROOT_RDONLY -syssize: .long 0 /* Filled in by build.c */ +syssize: .long ZO__edata / 16 ram_size: .word 0 /* Obsolete */ vid_mode: .word SVGA_MODE root_dev: .word 0 /* Default to major/minor 0/0 */ diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 745d64b6d930..e792c6c5a634 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -52,6 +52,7 @@ u8 buf[SETUP_SECT_MAX*512]; static unsigned long efi_pe_entry; static unsigned long efi32_pe_entry; +static unsigned long _edata; static unsigned long _end; /*----------------------------------------------------------------------*/ @@ -308,6 +309,7 @@ static void parse_zoffset(char *fname) while (p && *p) { PARSE_ZOFS(p, efi_pe_entry); PARSE_ZOFS(p, efi32_pe_entry); + PARSE_ZOFS(p, _edata); PARSE_ZOFS(p, _end); p = strchr(p, '\n'); @@ -320,7 +322,6 @@ int main(int argc, char ** argv) { unsigned int i, sz, setup_sectors; int c; - u32 sys_size; struct stat sb; FILE *file, *dest; int fd; @@ -368,24 +369,14 @@ int main(int argc, char ** argv) die("Unable to open `%s': %m", argv[2]); if (fstat(fd, &sb)) die("Unable to stat `%s': %m", argv[2]); - sz = sb.st_size; + if (_edata != sb.st_size) + die("Unexpected file size `%s': %u != %u", argv[2], _edata, + sb.st_size); + sz = _edata - 4; kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); if (kernel == MAP_FAILED) die("Unable to mmap '%s': %m", argv[2]); - /* Number of 16-byte paragraphs, including space for a 4-byte CRC */ - sys_size = (sz + 15 + 4) / 16; -#ifdef CONFIG_EFI_STUB - /* - * COFF requires minimum 32-byte alignment of sections, and - * adding a signature is problematic without that alignment. - */ - sys_size = (sys_size + 1) & ~1; -#endif - - /* Patch the setup code with the appropriate size parameters */ - put_unaligned_le32(sys_size, &buf[0x1f4]); - - update_pecoff_text(setup_sectors * 512, i + (sys_size * 16)); + update_pecoff_text(setup_sectors * 512, i + _edata); crc = partial_crc32(buf, i, crc); @@ -397,13 +388,6 @@ int main(int argc, char ** argv) if (fwrite(kernel, 1, sz, dest) != sz) die("Writing kernel failed"); - /* Add padding leaving 4 bytes for the checksum */ - while (sz++ < (sys_size*16) - 4) { - crc = partial_crc32_one('\0', crc); - if (fwrite("\0", 1, 1, dest) != 1) - die("Writing padding failed"); - } - /* Write the CRC */ put_unaligned_le32(crc, buf); if (fwrite(buf, 1, 4, dest) != 4) From d03399c3e2526e40ffa69c4c06be71e1a09e2361 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:19 +0200 Subject: [PATCH 315/553] x86/boot: Construct PE/COFF .text section from assembler [ Commit efa089e63b56bdc5eca754b995cb039dd7a5457e upstream ] Now that the size of the setup block is visible to the assembler, it is possible to populate the PE/COFF header fields from the asm code directly, instead of poking the values into the binary using the build tool. This will make it easier to reorganize the section layout without having to tweak the build tool in lockstep. This change has no impact on the resulting bzImage binary. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915171623.655440-15-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 22 ++++++----------- arch/x86/boot/tools/build.c | 47 ------------------------------------- 2 files changed, 7 insertions(+), 62 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b43b55130855..f8f609fb8709 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -74,14 +74,12 @@ optional_header: .byte 0x02 # MajorLinkerVersion .byte 0x14 # MinorLinkerVersion - # Filled in by build.c - .long 0 # SizeOfCode + .long setup_size + ZO__end - 0x200 # SizeOfCode .long 0 # SizeOfInitializedData .long 0 # SizeOfUninitializedData - # Filled in by build.c - .long 0x0000 # AddressOfEntryPoint + .long setup_size + ZO_efi_pe_entry # AddressOfEntryPoint .long 0x0200 # BaseOfCode #ifdef CONFIG_X86_32 @@ -104,10 +102,7 @@ extra_header_fields: .word 0 # MinorSubsystemVersion .long 0 # Win32VersionValue - # - # The size of the bzImage is written in tools/build.c - # - .long 0 # SizeOfImage + .long setup_size + ZO__end # SizeOfImage .long 0x200 # SizeOfHeaders .long 0 # CheckSum @@ -198,18 +193,15 @@ section_table: IMAGE_SCN_MEM_DISCARDABLE # Characteristics #endif - # - # The offset & size fields are filled in by build.c. - # .ascii ".text" .byte 0 .byte 0 .byte 0 - .long 0 - .long 0x0 # startup_{32,64} - .long 0 # Size of initialized data + .long ZO__end + .long setup_size + .long ZO__edata # Size of initialized data # on disk - .long 0x0 # startup_{32,64} + .long setup_size .long 0 # PointerToRelocations .long 0 # PointerToLineNumbers .word 0 # NumberOfRelocations diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index e792c6c5a634..9712f27e32c1 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -50,10 +50,8 @@ u8 buf[SETUP_SECT_MAX*512]; #define PECOFF_RELOC_RESERVE 0x20 #define PECOFF_COMPAT_RESERVE 0x20 -static unsigned long efi_pe_entry; static unsigned long efi32_pe_entry; static unsigned long _edata; -static unsigned long _end; /*----------------------------------------------------------------------*/ @@ -216,32 +214,6 @@ static void update_pecoff_setup_and_reloc(unsigned int size) #endif } -static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) -{ - unsigned int pe_header; - unsigned int text_sz = file_sz - text_start; - unsigned int bss_sz = _end - text_sz; - - pe_header = get_unaligned_le32(&buf[0x3c]); - - /* - * Size of code: Subtract the size of the first sector (512 bytes) - * which includes the header. - */ - put_unaligned_le32(file_sz - 512 + bss_sz, &buf[pe_header + 0x1c]); - - /* Size of image */ - put_unaligned_le32(file_sz + bss_sz, &buf[pe_header + 0x50]); - - /* - * Address of entry point for PE/COFF executable - */ - put_unaligned_le32(text_start + efi_pe_entry, &buf[pe_header + 0x28]); - - update_pecoff_section_header_fields(".text", text_start, text_sz + bss_sz, - text_sz, text_start); -} - static int reserve_pecoff_reloc_section(int c) { /* Reserve 0x20 bytes for .reloc section */ @@ -249,22 +221,9 @@ static int reserve_pecoff_reloc_section(int c) return PECOFF_RELOC_RESERVE; } -static void efi_stub_defaults(void) -{ - /* Defaults for old kernel */ -#ifdef CONFIG_X86_32 - efi_pe_entry = 0x10; -#else - efi_pe_entry = 0x210; -#endif -} - #else static inline void update_pecoff_setup_and_reloc(unsigned int size) {} -static inline void update_pecoff_text(unsigned int text_start, - unsigned int file_sz) {} -static inline void efi_stub_defaults(void) {} static inline int reserve_pecoff_reloc_section(int c) { @@ -307,10 +266,8 @@ static void parse_zoffset(char *fname) p = (char *)buf; while (p && *p) { - PARSE_ZOFS(p, efi_pe_entry); PARSE_ZOFS(p, efi32_pe_entry); PARSE_ZOFS(p, _edata); - PARSE_ZOFS(p, _end); p = strchr(p, '\n'); while (p && (*p == '\r' || *p == '\n')) @@ -328,8 +285,6 @@ int main(int argc, char ** argv) void *kernel; u32 crc = 0xffffffffUL; - efi_stub_defaults(); - if (argc != 5) usage(); parse_zoffset(argv[3]); @@ -376,8 +331,6 @@ int main(int argc, char ** argv) kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); if (kernel == MAP_FAILED) die("Unable to mmap '%s': %m", argv[2]); - update_pecoff_text(setup_sectors * 512, i + _edata); - crc = partial_crc32(buf, i, crc); if (fwrite(buf, 1, i, dest) != i) From 43b1920588fa7852ced850c8351f10bdde393140 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:20 +0200 Subject: [PATCH 316/553] x86/boot: Drop PE/COFF .reloc section [ Commit fa5750521e0a4efbc1af05223da9c4bbd6c21c83 upstream ] Ancient buggy EFI loaders may have required a .reloc section to be present at some point in time, but this has not been true for a long time so the .reloc section can just be dropped. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915171623.655440-16-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 20 -------------------- arch/x86/boot/setup.ld | 4 ++-- arch/x86/boot/tools/build.c | 34 +++++----------------------------- 3 files changed, 7 insertions(+), 51 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index f8f609fb8709..a01e55ce506f 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -154,26 +154,6 @@ section_table: IMAGE_SCN_MEM_READ | \ IMAGE_SCN_MEM_EXECUTE # Characteristics - # - # The EFI application loader requires a relocation section - # because EFI applications must be relocatable. The .reloc - # offset & size fields are filled in by build.c. - # - .ascii ".reloc" - .byte 0 - .byte 0 - .long 0 - .long 0 - .long 0 # SizeOfRawData - .long 0 # PointerToRawData - .long 0 # PointerToRelocations - .long 0 # PointerToLineNumbers - .word 0 # NumberOfRelocations - .word 0 # NumberOfLineNumbers - .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ - IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_DISCARDABLE # Characteristics - #ifdef CONFIG_EFI_MIXED # # The offset & size fields are filled in by build.c. diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 9bd5c1ada599..6d389499565c 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -40,8 +40,8 @@ SECTIONS setup_sig = .; LONG(0x5a5aaa55) - /* Reserve some extra space for the reloc and compat sections */ - setup_size = ALIGN(ABSOLUTE(.) + 64, 512); + /* Reserve some extra space for the compat section */ + setup_size = ALIGN(ABSOLUTE(.) + 32, 512); setup_sects = ABSOLUTE(setup_size / 512); } diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 9712f27e32c1..faccff9743a3 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -47,7 +47,6 @@ typedef unsigned int u32; /* This must be large enough to hold the entire setup */ u8 buf[SETUP_SECT_MAX*512]; -#define PECOFF_RELOC_RESERVE 0x20 #define PECOFF_COMPAT_RESERVE 0x20 static unsigned long efi32_pe_entry; @@ -180,24 +179,13 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz update_pecoff_section_header_fields(section_name, offset, size, size, offset); } -static void update_pecoff_setup_and_reloc(unsigned int size) +static void update_pecoff_setup(unsigned int size) { u32 setup_offset = 0x200; - u32 reloc_offset = size - PECOFF_RELOC_RESERVE - PECOFF_COMPAT_RESERVE; -#ifdef CONFIG_EFI_MIXED - u32 compat_offset = reloc_offset + PECOFF_RELOC_RESERVE; -#endif - u32 setup_size = reloc_offset - setup_offset; + u32 compat_offset = size - PECOFF_COMPAT_RESERVE; + u32 setup_size = compat_offset - setup_offset; update_pecoff_section_header(".setup", setup_offset, setup_size); - update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE); - - /* - * Modify .reloc section contents with a single entry. The - * relocation is applied to offset 10 of the relocation section. - */ - put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]); - put_unaligned_le32(10, &buf[reloc_offset + 4]); #ifdef CONFIG_EFI_MIXED update_pecoff_section_header(".compat", compat_offset, PECOFF_COMPAT_RESERVE); @@ -214,21 +202,10 @@ static void update_pecoff_setup_and_reloc(unsigned int size) #endif } -static int reserve_pecoff_reloc_section(int c) -{ - /* Reserve 0x20 bytes for .reloc section */ - memset(buf+c, 0, PECOFF_RELOC_RESERVE); - return PECOFF_RELOC_RESERVE; -} - #else -static inline void update_pecoff_setup_and_reloc(unsigned int size) {} +static inline void update_pecoff_setup(unsigned int size) {} -static inline int reserve_pecoff_reloc_section(int c) -{ - return 0; -} #endif /* CONFIG_EFI_STUB */ static int reserve_pecoff_compat_section(int c) @@ -307,7 +284,6 @@ int main(int argc, char ** argv) fclose(file); c += reserve_pecoff_compat_section(c); - c += reserve_pecoff_reloc_section(c); /* Pad unused space with zeros */ setup_sectors = (c + 511) / 512; @@ -316,7 +292,7 @@ int main(int argc, char ** argv) i = setup_sectors*512; memset(buf+c, 0, i-c); - update_pecoff_setup_and_reloc(i); + update_pecoff_setup(i); /* Open and stat the kernel file */ fd = open(argv[2], O_RDONLY); From 581f5d5e02effba6e6be2c2d9df6dc62ad839760 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:21 +0200 Subject: [PATCH 317/553] x86/boot: Split off PE/COFF .data section [ Commit 34951f3c28bdf6481d949a20413b2ce7693687b2 upstream ] Describe the code and data of the decompressor binary using separate .text and .data PE/COFF sections, so that we will be able to map them using restricted permissions once we increase the section and file alignment sufficiently. This avoids the need for memory mappings that are writable and executable at the same time, which is something that is best avoided for security reasons. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915171623.655440-17-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/Makefile | 2 +- arch/x86/boot/header.S | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index b92e00836f69..3c1b3520361c 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -91,7 +91,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|efi.._stub_entry\|efi\(32\)\?_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|_edata\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|efi.._stub_entry\|efi\(32\)\?_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|_e\?data\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index a01e55ce506f..178252cdccf5 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -74,9 +74,9 @@ optional_header: .byte 0x02 # MajorLinkerVersion .byte 0x14 # MinorLinkerVersion - .long setup_size + ZO__end - 0x200 # SizeOfCode + .long ZO__data # SizeOfCode - .long 0 # SizeOfInitializedData + .long ZO__end - ZO__data # SizeOfInitializedData .long 0 # SizeOfUninitializedData .long setup_size + ZO_efi_pe_entry # AddressOfEntryPoint @@ -177,9 +177,9 @@ section_table: .byte 0 .byte 0 .byte 0 - .long ZO__end + .long ZO__data .long setup_size - .long ZO__edata # Size of initialized data + .long ZO__data # Size of initialized data # on disk .long setup_size .long 0 # PointerToRelocations @@ -190,6 +190,17 @@ section_table: IMAGE_SCN_MEM_READ | \ IMAGE_SCN_MEM_EXECUTE # Characteristics + .ascii ".data\0\0\0" + .long ZO__end - ZO__data # VirtualSize + .long setup_size + ZO__data # VirtualAddress + .long ZO__edata - ZO__data # SizeOfRawData + .long setup_size + ZO__data # PointerToRawData + + .long 0, 0, 0 + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_WRITE # Characteristics + .set section_count, (. - section_table) / 40 #endif /* CONFIG_EFI_STUB */ From c4421279b6c278efe129bde7abc64af59ea2dfbd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:22 +0200 Subject: [PATCH 318/553] x86/boot: Increase section and file alignment to 4k/512 [ Commit 3e3eabe26dc88692d34cf76ca0e0dd331481cc15 upstream ] Align x86 with other EFI architectures, and increase the section alignment to the EFI page size (4k), so that firmware is able to honour the section permission attributes and map code read-only and data non-executable. There are a number of requirements that have to be taken into account: - the sign tools get cranky when there are gaps between sections in the file view of the image - the virtual offset of each section must be aligned to the image's section alignment - the file offset *and size* of each section must be aligned to the image's file alignment - the image size must be aligned to the section alignment - each section's virtual offset must be greater than or equal to the size of the headers. In order to meet all these requirements, while avoiding the need for lots of padding to accommodate the .compat section, the latter is placed at an arbitrary offset towards the end of the image, but aligned to the minimum file alignment (512 bytes). The space before the .text section is therefore distributed between the PE header, the .setup section and the .compat section, leaving no gaps in the file coverage, making the signing tools happy. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230915171623.655440-18-ardb@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/vmlinux.lds.S | 4 +- arch/x86/boot/header.S | 81 +++++++++++++---------- arch/x86/boot/setup.ld | 7 +- arch/x86/boot/tools/build.c | 90 +------------------------- 4 files changed, 54 insertions(+), 128 deletions(-) diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index aa7e9848cd8f..bcf0e4e4c98e 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -42,13 +42,13 @@ SECTIONS *(.rodata.*) _erodata = . ; } - .data : { + .data : ALIGN(0x1000) { _data = . ; *(.data) *(.data.*) /* Add 4 bytes of extra space for a CRC-32 checksum */ - . = ALIGN(. + 4, 0x20); + . = ALIGN(. + 4, 0x200); _edata = . ; } . = ALIGN(L1_CACHE_BYTES); diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 178252cdccf5..6264bbf54fbc 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -36,6 +36,9 @@ SYSSEG = 0x1000 /* historical load address >> 4 */ #define ROOT_RDONLY 1 #endif + .set salign, 0x1000 + .set falign, 0x200 + .code16 .section ".bstext", "ax" #ifdef CONFIG_EFI_STUB @@ -81,7 +84,7 @@ optional_header: .long setup_size + ZO_efi_pe_entry # AddressOfEntryPoint - .long 0x0200 # BaseOfCode + .long setup_size # BaseOfCode #ifdef CONFIG_X86_32 .long 0 # data #endif @@ -92,8 +95,8 @@ extra_header_fields: #else .quad 0 # ImageBase #endif - .long 0x20 # SectionAlignment - .long 0x20 # FileAlignment + .long salign # SectionAlignment + .long falign # FileAlignment .word 0 # MajorOperatingSystemVersion .word 0 # MinorOperatingSystemVersion .word LINUX_EFISTUB_MAJOR_VERSION # MajorImageVersion @@ -102,9 +105,10 @@ extra_header_fields: .word 0 # MinorSubsystemVersion .long 0 # Win32VersionValue - .long setup_size + ZO__end # SizeOfImage + .long setup_size + ZO__end + pecompat_vsize + # SizeOfImage - .long 0x200 # SizeOfHeaders + .long salign # SizeOfHeaders .long 0 # CheckSum .word IMAGE_SUBSYSTEM_EFI_APPLICATION # Subsystem (EFI application) #ifdef CONFIG_EFI_DXE_MEM_ATTRIBUTES @@ -135,44 +139,51 @@ extra_header_fields: # Section table section_table: - # - # The offset & size fields are filled in by build.c. - # .ascii ".setup" .byte 0 .byte 0 - .long 0 - .long 0x0 # startup_{32,64} - .long 0 # Size of initialized data - # on disk - .long 0x0 # startup_{32,64} - .long 0 # PointerToRelocations - .long 0 # PointerToLineNumbers - .word 0 # NumberOfRelocations - .word 0 # NumberOfLineNumbers - .long IMAGE_SCN_CNT_CODE | \ - IMAGE_SCN_MEM_READ | \ - IMAGE_SCN_MEM_EXECUTE # Characteristics + .long setup_size - salign # VirtualSize + .long salign # VirtualAddress + .long pecompat_fstart - salign # SizeOfRawData + .long salign # PointerToRawData -#ifdef CONFIG_EFI_MIXED - # - # The offset & size fields are filled in by build.c. - # - .asciz ".compat" - .long 0 - .long 0x0 - .long 0 # Size of initialized data - # on disk - .long 0x0 - .long 0 # PointerToRelocations - .long 0 # PointerToLineNumbers - .word 0 # NumberOfRelocations - .word 0 # NumberOfLineNumbers + .long 0, 0, 0 .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ IMAGE_SCN_MEM_READ | \ IMAGE_SCN_MEM_DISCARDABLE # Characteristics -#endif +#ifdef CONFIG_EFI_MIXED + .asciz ".compat" + + .long 8 # VirtualSize + .long setup_size + ZO__end # VirtualAddress + .long pecompat_fsize # SizeOfRawData + .long pecompat_fstart # PointerToRawData + + .long 0, 0, 0 + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_DISCARDABLE # Characteristics + + /* + * Put the IA-32 machine type and the associated entry point address in + * the .compat section, so loaders can figure out which other execution + * modes this image supports. + */ + .pushsection ".pecompat", "a", @progbits + .balign falign + .set pecompat_vsize, salign + .globl pecompat_fstart +pecompat_fstart: + .byte 0x1 # Version + .byte 8 # Size + .word IMAGE_FILE_MACHINE_I386 # PE machine type + .long setup_size + ZO_efi32_pe_entry # Entrypoint + .popsection +#else + .set pecompat_vsize, 0 + .set pecompat_fstart, setup_size +#endif .ascii ".text" .byte 0 .byte 0 diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 6d389499565c..83bb7efad8ae 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -36,16 +36,17 @@ SECTIONS . = ALIGN(16); .data : { *(.data*) } + .pecompat : { *(.pecompat) } + PROVIDE(pecompat_fsize = setup_size - pecompat_fstart); + .signature : { setup_sig = .; LONG(0x5a5aaa55) - /* Reserve some extra space for the compat section */ - setup_size = ALIGN(ABSOLUTE(.) + 32, 512); + setup_size = ALIGN(ABSOLUTE(.), 4096); setup_sects = ABSOLUTE(setup_size / 512); } - . = ALIGN(16); .bss : { diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index faccff9743a3..10311d77c67f 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -47,9 +47,6 @@ typedef unsigned int u32; /* This must be large enough to hold the entire setup */ u8 buf[SETUP_SECT_MAX*512]; -#define PECOFF_COMPAT_RESERVE 0x20 - -static unsigned long efi32_pe_entry; static unsigned long _edata; /*----------------------------------------------------------------------*/ @@ -136,85 +133,6 @@ static void usage(void) die("Usage: build setup system zoffset.h image"); } -#ifdef CONFIG_EFI_STUB - -static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset) -{ - unsigned int pe_header; - unsigned short num_sections; - u8 *section; - - pe_header = get_unaligned_le32(&buf[0x3c]); - num_sections = get_unaligned_le16(&buf[pe_header + 6]); - -#ifdef CONFIG_X86_32 - section = &buf[pe_header + 0xa8]; -#else - section = &buf[pe_header + 0xb8]; -#endif - - while (num_sections > 0) { - if (strncmp((char*)section, section_name, 8) == 0) { - /* section header size field */ - put_unaligned_le32(size, section + 0x8); - - /* section header vma field */ - put_unaligned_le32(vma, section + 0xc); - - /* section header 'size of initialised data' field */ - put_unaligned_le32(datasz, section + 0x10); - - /* section header 'file offset' field */ - put_unaligned_le32(offset, section + 0x14); - - break; - } - section += 0x28; - num_sections--; - } -} - -static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) -{ - update_pecoff_section_header_fields(section_name, offset, size, size, offset); -} - -static void update_pecoff_setup(unsigned int size) -{ - u32 setup_offset = 0x200; - u32 compat_offset = size - PECOFF_COMPAT_RESERVE; - u32 setup_size = compat_offset - setup_offset; - - update_pecoff_section_header(".setup", setup_offset, setup_size); - -#ifdef CONFIG_EFI_MIXED - update_pecoff_section_header(".compat", compat_offset, PECOFF_COMPAT_RESERVE); - - /* - * Put the IA-32 machine type (0x14c) and the associated entry point - * address in the .compat section, so loaders can figure out which other - * execution modes this image supports. - */ - buf[compat_offset] = 0x1; - buf[compat_offset + 1] = 0x8; - put_unaligned_le16(0x14c, &buf[compat_offset + 2]); - put_unaligned_le32(efi32_pe_entry + size, &buf[compat_offset + 4]); -#endif -} - -#else - -static inline void update_pecoff_setup(unsigned int size) {} - -#endif /* CONFIG_EFI_STUB */ - -static int reserve_pecoff_compat_section(int c) -{ - /* Reserve 0x20 bytes for .compat section */ - memset(buf+c, 0, PECOFF_COMPAT_RESERVE); - return PECOFF_COMPAT_RESERVE; -} - /* * Parse zoffset.h and find the entry points. We could just #include zoffset.h * but that would mean tools/build would have to be rebuilt every time. It's @@ -243,7 +161,6 @@ static void parse_zoffset(char *fname) p = (char *)buf; while (p && *p) { - PARSE_ZOFS(p, efi32_pe_entry); PARSE_ZOFS(p, _edata); p = strchr(p, '\n'); @@ -283,17 +200,14 @@ int main(int argc, char ** argv) die("Boot block hasn't got boot flag (0xAA55)"); fclose(file); - c += reserve_pecoff_compat_section(c); - /* Pad unused space with zeros */ - setup_sectors = (c + 511) / 512; + setup_sectors = (c + 4095) / 4096; + setup_sectors *= 8; if (setup_sectors < SETUP_SECT_MIN) setup_sectors = SETUP_SECT_MIN; i = setup_sectors*512; memset(buf+c, 0, i-c); - update_pecoff_setup(i); - /* Open and stat the kernel file */ fd = open(argv[2], O_RDONLY); if (fd < 0) From d327e961573fc335af0ae8a160302205327e1f4e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:23 +0200 Subject: [PATCH 319/553] x86/efistub: Use 1:1 file:memory mapping for PE/COFF .compat section [ Commit 1ad55cecf22f05f1c884adf63cc09d3c3e609ebf upstream ] The .compat section is a dummy PE section that contains the address of the 32-bit entrypoint of the 64-bit kernel image if it is bootable from 32-bit firmware (i.e., CONFIG_EFI_MIXED=y) This section is only 8 bytes in size and is only referenced from the loader, and so it is placed at the end of the memory view of the image, to avoid the need for padding it to 4k, which is required for sections appearing in the middle of the image. Unfortunately, this violates the PE/COFF spec, and even if most EFI loaders will work correctly (including the Tianocore reference implementation), PE loaders do exist that reject such images, on the basis that both the file and memory views of the file contents should be described by the section headers in a monotonically increasing manner without leaving any gaps. So reorganize the sections to avoid this issue. This results in a slight padding overhead (< 4k) which can be avoided if desired by disabling CONFIG_EFI_MIXED (which is only needed in rare cases these days) Fixes: 3e3eabe26dc8 ("x86/boot: Increase section and file alignment to 4k/512") Reported-by: Mike Beaton Link: https://lkml.kernel.org/r/CAHzAAWQ6srV6LVNdmfbJhOwhBw5ZzxxZZ07aHt9oKkfYAdvuQQ%40mail.gmail.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/header.S | 14 ++++++-------- arch/x86/boot/setup.ld | 6 +++--- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 6264bbf54fbc..7593339b529a 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -105,8 +105,7 @@ extra_header_fields: .word 0 # MinorSubsystemVersion .long 0 # Win32VersionValue - .long setup_size + ZO__end + pecompat_vsize - # SizeOfImage + .long setup_size + ZO__end # SizeOfImage .long salign # SizeOfHeaders .long 0 # CheckSum @@ -142,7 +141,7 @@ section_table: .ascii ".setup" .byte 0 .byte 0 - .long setup_size - salign # VirtualSize + .long pecompat_fstart - salign # VirtualSize .long salign # VirtualAddress .long pecompat_fstart - salign # SizeOfRawData .long salign # PointerToRawData @@ -155,8 +154,8 @@ section_table: #ifdef CONFIG_EFI_MIXED .asciz ".compat" - .long 8 # VirtualSize - .long setup_size + ZO__end # VirtualAddress + .long pecompat_fsize # VirtualSize + .long pecompat_fstart # VirtualAddress .long pecompat_fsize # SizeOfRawData .long pecompat_fstart # PointerToRawData @@ -171,17 +170,16 @@ section_table: * modes this image supports. */ .pushsection ".pecompat", "a", @progbits - .balign falign - .set pecompat_vsize, salign + .balign salign .globl pecompat_fstart pecompat_fstart: .byte 0x1 # Version .byte 8 # Size .word IMAGE_FILE_MACHINE_I386 # PE machine type .long setup_size + ZO_efi32_pe_entry # Entrypoint + .byte 0x0 # Sentinel .popsection #else - .set pecompat_vsize, 0 .set pecompat_fstart, setup_size #endif .ascii ".text" diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 83bb7efad8ae..3a2d1360abb0 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -24,6 +24,9 @@ SECTIONS .text : { *(.text .text.*) } .text32 : { *(.text32) } + .pecompat : { *(.pecompat) } + PROVIDE(pecompat_fsize = setup_size - pecompat_fstart); + . = ALIGN(16); .rodata : { *(.rodata*) } @@ -36,9 +39,6 @@ SECTIONS . = ALIGN(16); .data : { *(.data*) } - .pecompat : { *(.pecompat) } - PROVIDE(pecompat_fsize = setup_size - pecompat_fstart); - .signature : { setup_sig = .; LONG(0x5a5aaa55) From 20dc656b06570a29274cbc669fa9356ccae24f91 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Fri, 19 Apr 2024 10:11:24 +0200 Subject: [PATCH 320/553] x86/mm: Remove P*D_PAGE_MASK and P*D_PAGE_SIZE macros [ Commit 82328227db8f0b9b5f77bb5afcd47e59d0e4d08f upstream ] Other architectures and the common mm/ use P*D_MASK, and P*D_SIZE. Remove the duplicated P*D_PAGE_MASK and P*D_PAGE_SIZE which are only used in x86/*. Signed-off-by: Pasha Tatashin Signed-off-by: Borislav Petkov Reviewed-by: Anshuman Khandual Acked-by: Mike Rapoport Link: https://lore.kernel.org/r/20220516185202.604654-1-tatashin@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/page_types.h | 12 +++--------- arch/x86/kernel/amd_gart_64.c | 2 +- arch/x86/kernel/head64.c | 2 +- arch/x86/mm/mem_encrypt_boot.S | 4 ++-- arch/x86/mm/mem_encrypt_identity.c | 18 +++++++++--------- arch/x86/mm/pat/set_memory.c | 6 +++--- arch/x86/mm/pti.c | 2 +- 7 files changed, 20 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index a506a411474d..86bd4311daf8 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -11,20 +11,14 @@ #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) -#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) - -#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) -#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) - #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) -/* Cast *PAGE_MASK to a signed type so that it is sign-extended if +/* Cast P*D_MASK to a signed type so that it is sign-extended if virtual addresses are 32-bits but physical addresses are larger (ie, 32-bit PAE). */ #define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) -#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK) -#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK) +#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_MASK) & __PHYSICAL_MASK) +#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_MASK) & __PHYSICAL_MASK) #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 19a0207e529f..56a917df410d 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -504,7 +504,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) } a = aper + iommu_size; - iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; + iommu_size -= round_up(a, PMD_SIZE) - a; if (iommu_size < 64*1024*1024) { pr_warn("PCI-DMA: Warning: Small IOMMU %luMB." diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 84adf12a76d3..34580e1a4cdb 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -203,7 +203,7 @@ unsigned long __head __startup_64(unsigned long physaddr, load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map); /* Is the address not 2M aligned? */ - if (load_delta & ~PMD_PAGE_MASK) + if (load_delta & ~PMD_MASK) for (;;); /* Include the SME encryption mask in the fixup value */ diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 9de3d900bc92..e25288ee33c2 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -26,7 +26,7 @@ SYM_FUNC_START(sme_encrypt_execute) * RCX - virtual address of the encryption workarea, including: * - stack page (PAGE_SIZE) * - encryption routine page (PAGE_SIZE) - * - intermediate copy buffer (PMD_PAGE_SIZE) + * - intermediate copy buffer (PMD_SIZE) * R8 - physical address of the pagetables to use for encryption */ @@ -123,7 +123,7 @@ SYM_FUNC_START(__enc_copy) wbinvd /* Invalidate any cache entries */ /* Copy/encrypt up to 2MB at a time */ - movq $PMD_PAGE_SIZE, %r12 + movq $PMD_SIZE, %r12 1: cmpq %r12, %r9 jnb 2f diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 06ccbd36e8dc..f7dfc3f89921 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -93,7 +93,7 @@ struct sme_populate_pgd_data { * section is 2MB aligned to allow for simple pagetable setup using only * PMD entries (see vmlinux.lds.S). */ -static char sme_workarea[2 * PMD_PAGE_SIZE] __section(".init.scratch"); +static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch"); static char sme_cmdline_arg[] __initdata = "mem_encrypt"; static char sme_cmdline_on[] __initdata = "on"; @@ -197,8 +197,8 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd_large(ppd); - ppd->vaddr += PMD_PAGE_SIZE; - ppd->paddr += PMD_PAGE_SIZE; + ppd->vaddr += PMD_SIZE; + ppd->paddr += PMD_SIZE; } } @@ -224,11 +224,11 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, vaddr_end = ppd->vaddr_end; /* If start is not 2MB aligned, create PTE entries */ - ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); + ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_SIZE); __sme_map_range_pte(ppd); /* Create PMD entries */ - ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; + ppd->vaddr_end = vaddr_end & PMD_MASK; __sme_map_range_pmd(ppd); /* If end is not 2MB aligned, create PTE entries */ @@ -325,7 +325,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) /* Physical addresses gives us the identity mapped virtual addresses */ kernel_start = __pa_symbol(_text); - kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); + kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE); kernel_len = kernel_end - kernel_start; initrd_start = 0; @@ -355,12 +355,12 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * executable encryption area size: * stack page (PAGE_SIZE) * encryption routine page (PAGE_SIZE) - * intermediate copy buffer (PMD_PAGE_SIZE) + * intermediate copy buffer (PMD_SIZE) * pagetable structures for the encryption of the kernel * pagetable structures for workarea (in case not currently mapped) */ execute_start = workarea_start; - execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; + execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE; execute_len = execute_end - execute_start; /* @@ -383,7 +383,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * before it is mapped. */ workarea_len = execute_len + pgtable_area_len; - workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); + workarea_end = ALIGN(workarea_start + workarea_len, PMD_SIZE); /* * Set the address to the start of where newly created pagetable diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 5f0ce77a259d..68d4f328f169 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -747,11 +747,11 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr) switch (level) { case PG_LEVEL_1G: phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT; - offset = virt_addr & ~PUD_PAGE_MASK; + offset = virt_addr & ~PUD_MASK; break; case PG_LEVEL_2M: phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT; - offset = virt_addr & ~PMD_PAGE_MASK; + offset = virt_addr & ~PMD_MASK; break; default: phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; @@ -1041,7 +1041,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, case PG_LEVEL_1G: ref_prot = pud_pgprot(*(pud_t *)kpte); ref_pfn = pud_pfn(*(pud_t *)kpte); - pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; + pfninc = PMD_SIZE >> PAGE_SHIFT; lpaddr = address & PUD_MASK; lpinc = PMD_SIZE; /* diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index ffe3b3a087fe..78414c6d1b5e 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -592,7 +592,7 @@ static void pti_set_kernel_image_nonglobal(void) * of the image. */ unsigned long start = PFN_ALIGN(_text); - unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE); + unsigned long end = ALIGN((unsigned long)_end, PMD_SIZE); /* * This clears _PAGE_GLOBAL from the entire kernel image. From bbcd0534a3eb7fcedd3e7e56eaf6dc837d252776 Mon Sep 17 00:00:00 2001 From: Hou Wenlong Date: Fri, 19 Apr 2024 10:11:25 +0200 Subject: [PATCH 321/553] x86/head/64: Add missing __head annotation to startup_64_load_idt() [ Commit 7f6874eddd81cb2ed784642a7a4321671e158ffe upstream ] This function is currently only used in the head code and is only called from startup_64_setup_env(). Although it would be inlined by the compiler, it would be better to mark it as __head too in case it doesn't. Signed-off-by: Hou Wenlong Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/efcc5b5e18af880e415d884e072bf651c1fa7c34.1689130310.git.houwenlong.hwl@antgroup.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/head64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 34580e1a4cdb..78f3f6756538 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -588,7 +588,7 @@ static void set_bringup_idt_handler(gate_desc *idt, int n, void *handler) } /* This runs while still in the direct mapping */ -static void startup_64_load_idt(unsigned long physbase) +static void __head startup_64_load_idt(unsigned long physbase) { struct desc_ptr *desc = fixup_pointer(&bringup_idt_descr, physbase); gate_desc *idt = fixup_pointer(bringup_idt_table, physbase); From e6489cc45fa711e4522c1129e9a43ed44b7aaa96 Mon Sep 17 00:00:00 2001 From: Hou Wenlong Date: Fri, 19 Apr 2024 10:11:26 +0200 Subject: [PATCH 322/553] x86/head/64: Move the __head definition to [ Commit d2a285d65bfde3218fd0c3b88794d0135ced680b upstream ] Move the __head section definition to a header to widen its use. An upcoming patch will mark the code as __head in mem_encrypt_identity.c too. Signed-off-by: Hou Wenlong Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/0583f57977be184689c373fe540cbd7d85ca2047.1697525407.git.houwenlong.hwl@antgroup.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/init.h | 2 ++ arch/x86/kernel/head64.c | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 5f1d3c421f68..cc9ccf61b6bd 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_INIT_H #define _ASM_X86_INIT_H +#define __head __section(".head.text") + struct x86_mapping_info { void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ void *context; /* context for alloc_pgt_page */ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 78f3f6756538..4fae511b2e2b 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -41,6 +41,7 @@ #include #include #include +#include /* * Manage page tables very early on. @@ -84,8 +85,6 @@ static struct desc_ptr startup_gdt_descr = { .address = 0, }; -#define __head __section(".head.text") - static void __head *fixup_pointer(void *ptr, unsigned long physaddr) { return ptr - (void *)_text + (void *)physaddr; From d6f5bc5ff04ff2e282ec5be05a0cde2091e4e222 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:27 +0200 Subject: [PATCH 323/553] x86/sme: Move early SME kernel encryption handling into .head.text [ Commit 48204aba801f1b512b3abed10b8e1a63e03f3dd1 upstream ] The .head.text section is the initial primary entrypoint of the core kernel, and is entered with the CPU executing from a 1:1 mapping of memory. Such code must never access global variables using absolute references, as these are based on the kernel virtual mapping which is not active yet at this point. Given that the SME startup code is also called from this early execution context, move it into .head.text as well. This will allow more thorough build time checks in the future to ensure that early startup code only uses RIP-relative references to global variables. Also replace some occurrences of __pa_symbol() [which relies on the compiler generating an absolute reference, which is not guaranteed] and an open coded RIP-relative access with RIP_REL_REF(). Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Tested-by: Tom Lendacky Link: https://lore.kernel.org/r/20240227151907.387873-18-ardb+git@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mem_encrypt.h | 8 +++--- arch/x86/mm/mem_encrypt_identity.c | 42 ++++++++++++------------------ 2 files changed, 21 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 41d06822bc8c..853f423b1d13 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -46,8 +46,8 @@ void __init sme_unmap_bootdata(char *real_mode_data); void __init sme_early_init(void); void __init sev_setup_arch(void); -void __init sme_encrypt_kernel(struct boot_params *bp); -void __init sme_enable(struct boot_params *bp); +void sme_encrypt_kernel(struct boot_params *bp); +void sme_enable(struct boot_params *bp); int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); @@ -80,8 +80,8 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { } static inline void __init sme_early_init(void) { } static inline void __init sev_setup_arch(void) { } -static inline void __init sme_encrypt_kernel(struct boot_params *bp) { } -static inline void __init sme_enable(struct boot_params *bp) { } +static inline void sme_encrypt_kernel(struct boot_params *bp) { } +static inline void sme_enable(struct boot_params *bp) { } static inline void sev_es_init_vc_handling(void) { } diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index f7dfc3f89921..f17609884874 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -41,6 +41,7 @@ #include #include +#include #include #include #include @@ -98,7 +99,7 @@ static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch"); static char sme_cmdline_arg[] __initdata = "mem_encrypt"; static char sme_cmdline_on[] __initdata = "on"; -static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) +static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd) { unsigned long pgd_start, pgd_end, pgd_size; pgd_t *pgd_p; @@ -113,7 +114,7 @@ static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) memset(pgd_p, 0, pgd_size); } -static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) +static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) { pgd_t *pgd; p4d_t *p4d; @@ -150,7 +151,7 @@ static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) return pud; } -static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) +static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) { pud_t *pud; pmd_t *pmd; @@ -166,7 +167,7 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags)); } -static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) +static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd) { pud_t *pud; pmd_t *pmd; @@ -192,7 +193,7 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) set_pte(pte, __pte(ppd->paddr | ppd->pte_flags)); } -static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) +static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) { while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd_large(ppd); @@ -202,7 +203,7 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) } } -static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) +static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd) { while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd(ppd); @@ -212,7 +213,7 @@ static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) } } -static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, +static void __head __sme_map_range(struct sme_populate_pgd_data *ppd, pmdval_t pmd_flags, pteval_t pte_flags) { unsigned long vaddr_end; @@ -236,22 +237,22 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, __sme_map_range_pte(ppd); } -static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) +static void __head sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); } -static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) +static void __head sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); } -static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) +static void __head sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); } -static unsigned long __init sme_pgtable_calc(unsigned long len) +static unsigned long __head sme_pgtable_calc(unsigned long len) { unsigned long entries = 0, tables = 0; @@ -288,7 +289,7 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) return entries + tables; } -void __init sme_encrypt_kernel(struct boot_params *bp) +void __head sme_encrypt_kernel(struct boot_params *bp) { unsigned long workarea_start, workarea_end, workarea_len; unsigned long execute_start, execute_end, execute_len; @@ -323,9 +324,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * memory from being cached. */ - /* Physical addresses gives us the identity mapped virtual addresses */ - kernel_start = __pa_symbol(_text); - kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE); + kernel_start = (unsigned long)RIP_REL_REF(_text); + kernel_end = ALIGN((unsigned long)RIP_REL_REF(_end), PMD_SIZE); kernel_len = kernel_end - kernel_start; initrd_start = 0; @@ -342,14 +342,6 @@ void __init sme_encrypt_kernel(struct boot_params *bp) } #endif - /* - * We're running identity mapped, so we must obtain the address to the - * SME encryption workarea using rip-relative addressing. - */ - asm ("lea sme_workarea(%%rip), %0" - : "=r" (workarea_start) - : "p" (sme_workarea)); - /* * Calculate required number of workarea bytes needed: * executable encryption area size: @@ -359,7 +351,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * pagetable structures for the encryption of the kernel * pagetable structures for workarea (in case not currently mapped) */ - execute_start = workarea_start; + execute_start = workarea_start = (unsigned long)RIP_REL_REF(sme_workarea); execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE; execute_len = execute_end - execute_start; @@ -502,7 +494,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) native_write_cr3(__native_read_cr3()); } -void __init sme_enable(struct boot_params *bp) +void __head sme_enable(struct boot_params *bp) { const char *cmdline_ptr, *cmdline_arg, *cmdline_on; unsigned int eax, ebx, ecx, edx; From f56faf87c1e9008e5d5a58c96b89db20934c5473 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:28 +0200 Subject: [PATCH 324/553] x86/sev: Move early startup code into .head.text section [ Commit 428080c9b19bfda37c478cd626dbd3851db1aff9 upstream ] In preparation for implementing rigorous build time checks to enforce that only code that can support it will be called from the early 1:1 mapping of memory, move SEV init code that is called in this manner to the .head.text section. Signed-off-by: Ard Biesheuvel Signed-off-by: Borislav Petkov (AMD) Tested-by: Tom Lendacky Link: https://lore.kernel.org/r/20240227151907.387873-19-ardb+git@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/sev.c | 3 +++ arch/x86/include/asm/sev.h | 10 +++++----- arch/x86/kernel/sev-shared.c | 23 ++++++++++------------- arch/x86/kernel/sev.c | 11 ++++++----- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index d07e665bb265..3c5d5c97f8f7 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -118,6 +118,9 @@ static bool fault_in_kernel_space(unsigned long address) #define __init #define __pa(x) ((unsigned long)(x)) +#undef __head +#define __head + #define __BOOT_COMPRESSED /* Basic instruction decoding support needed */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index c57dd21155bd..bcac2e53d50b 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -192,15 +192,15 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) struct snp_guest_request_ioctl; void setup_ghcb(void); -void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, - unsigned long npages); -void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, - unsigned long npages); +void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, + unsigned long npages); +void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, + unsigned long npages); void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); void snp_set_memory_private(unsigned long vaddr, unsigned long npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); -void __init __noreturn snp_abort(void); +void __noreturn snp_abort(void); void snp_dmi_setup(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); u64 snp_get_unsupported_features(u64 status); diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 271e70d5748e..3fe76bf17d95 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -86,7 +86,8 @@ static bool __init sev_es_check_cpu_features(void) return true; } -static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason) +static void __head __noreturn +sev_es_terminate(unsigned int set, unsigned int reason) { u64 val = GHCB_MSR_TERM_REQ; @@ -323,13 +324,7 @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid */ static const struct snp_cpuid_table *snp_cpuid_get_table(void) { - void *ptr; - - asm ("lea cpuid_table_copy(%%rip), %0" - : "=r" (ptr) - : "p" (&cpuid_table_copy)); - - return ptr; + return &RIP_REL_REF(cpuid_table_copy); } /* @@ -388,7 +383,7 @@ static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted) return xsave_size; } -static bool +static bool __head snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -525,7 +520,8 @@ static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value * should be treated as fatal by caller. */ -static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +static int __head +snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -567,7 +563,7 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le * page yet, so it only supports the MSR based communication with the * hypervisor and only the CPUID exit-code. */ -void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) +void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) { unsigned int subfn = lower_bits(regs->cx, 32); unsigned int fn = lower_bits(regs->ax, 32); @@ -1013,7 +1009,8 @@ struct cc_setup_data { * Search for a Confidential Computing blob passed in as a setup_data entry * via the Linux Boot Protocol. */ -static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) +static __head +struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) { struct cc_setup_data *sd = NULL; struct setup_data *hdr; @@ -1040,7 +1037,7 @@ static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) * mapping needs to be updated in sync with all the changes to virtual memory * layout and related mapping facilities throughout the boot process. */ -static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info) +static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info) { const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table; int i; diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index e35fcc8d4bae..f8a8249ae117 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -690,7 +691,7 @@ static void pvalidate_pages(unsigned long vaddr, unsigned long npages, bool vali } } -static void __init early_set_pages_state(unsigned long paddr, unsigned long npages, enum psc_op op) +static void __head early_set_pages_state(unsigned long paddr, unsigned long npages, enum psc_op op) { unsigned long paddr_end; u64 val; @@ -728,7 +729,7 @@ e_term: sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); } -void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, +void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { /* @@ -2085,7 +2086,7 @@ fail: * * Scan for the blob in that order. */ -static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) +static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) { struct cc_blob_sev_info *cc_info; @@ -2111,7 +2112,7 @@ found_cc_info: return cc_info; } -bool __init snp_init(struct boot_params *bp) +bool __head snp_init(struct boot_params *bp) { struct cc_blob_sev_info *cc_info; @@ -2133,7 +2134,7 @@ bool __init snp_init(struct boot_params *bp) return true; } -void __init __noreturn snp_abort(void) +void __head __noreturn snp_abort(void) { sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } From 408a43b6c948421fea5e8527623f912f19179e59 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Apr 2024 10:11:29 +0200 Subject: [PATCH 325/553] x86/efistub: Remap kernel text read-only before dropping NX attribute [ Commit 9c55461040a9264b7e44444c53d26480b438eda6 upstream ] Currently, the EFI stub invokes the EFI memory attributes protocol to strip any NX restrictions from the entire loaded kernel, resulting in all code and data being mapped read-write-execute. The point of the EFI memory attributes protocol is to remove the need for all memory allocations to be mapped with both write and execute permissions by default, and make it the OS loader's responsibility to transition data mappings to code mappings where appropriate. Even though the UEFI specification does not appear to leave room for denying memory attribute changes based on security policy, let's be cautious and avoid relying on the ability to create read-write-execute mappings. This is trivially achievable, given that the amount of kernel code executing via the firmware's 1:1 mapping is rather small and limited to the .head.text region. So let's drop the NX restrictions only on that subregion, but not before remapping it as read-only first. Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/misc.c | 1 + arch/x86/include/asm/boot.h | 1 + drivers/firmware/efi/libstub/x86-stub.c | 11 ++++++++++- 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 3965b2c9efee..6e61baff223f 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -84,7 +84,7 @@ LDFLAGS_vmlinux += -T hostprogs := mkpiggy HOST_EXTRACFLAGS += -I$(srctree)/tools/include -sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' +sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__start_rodata\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' quiet_cmd_voffset = VOFFSET $@ cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 8ae7893d712f..45435ff88363 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -330,6 +330,7 @@ static size_t parse_elf(void *output) return ehdr.e_entry - LOAD_PHYSICAL_ADDR; } +const unsigned long kernel_text_size = VO___start_rodata - VO__text; const unsigned long kernel_total_size = VO__end - VO__text; static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4); diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index a38cc0afc90a..a3e0be0470a4 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -81,6 +81,7 @@ #ifndef __ASSEMBLY__ extern unsigned int output_len; +extern const unsigned long kernel_text_size; extern const unsigned long kernel_total_size; unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 1f5edcb6339a..55468debd55d 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -227,6 +227,15 @@ efi_status_t efi_adjust_memory_range_protection(unsigned long start, rounded_end = roundup(start + size, EFI_PAGE_SIZE); if (memattr != NULL) { + status = efi_call_proto(memattr, set_memory_attributes, + rounded_start, + rounded_end - rounded_start, + EFI_MEMORY_RO); + if (status != EFI_SUCCESS) { + efi_warn("Failed to set EFI_MEMORY_RO attribute\n"); + return status; + } + status = efi_call_proto(memattr, clear_memory_attributes, rounded_start, rounded_end - rounded_start, @@ -778,7 +787,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) *kernel_entry = addr + entry; - return efi_adjust_memory_range_protection(addr, kernel_total_size); + return efi_adjust_memory_range_protection(addr, kernel_text_size); } static void __noreturn enter_kernel(unsigned long kernel_addr, From 8d56bad42ac4c43c6c72ddd6a654a2628bf839c5 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sun, 7 Apr 2024 14:56:04 +0800 Subject: [PATCH 326/553] netfilter: nf_tables: Fix potential data-race in __nft_expr_type_get() [ Upstream commit f969eb84ce482331a991079ab7a5c4dc3b7f89bf ] nft_unregister_expr() can concurrent with __nft_expr_type_get(), and there is not any protection when iterate over nf_tables_expressions list in __nft_expr_type_get(). Therefore, there is potential data-race of nf_tables_expressions list entry. Use list_for_each_entry_rcu() to iterate over nf_tables_expressions list in __nft_expr_type_get(), and use rcu_read_lock() in the caller nft_expr_type_get() to protect the entire type query process. Fixes: ef1f7df9170d ("netfilter: nf_tables: expression ops overloading") Signed-off-by: Ziyang Xuan Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8152a69d8268..ba63866914f1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2891,7 +2891,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family, { const struct nft_expr_type *type, *candidate = NULL; - list_for_each_entry(type, &nf_tables_expressions, list) { + list_for_each_entry_rcu(type, &nf_tables_expressions, list) { if (!nla_strcmp(nla, type->name)) { if (!type->family && !candidate) candidate = type; @@ -2923,9 +2923,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net, if (nla == NULL) return ERR_PTR(-EINVAL); + rcu_read_lock(); type = __nft_expr_type_get(family, nla); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES From df7c0fb8c2b9f9cac65659332581b19682a71349 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sun, 7 Apr 2024 14:56:05 +0800 Subject: [PATCH 327/553] netfilter: nf_tables: Fix potential data-race in __nft_obj_type_get() [ Upstream commit d78d867dcea69c328db30df665be5be7d0148484 ] nft_unregister_obj() can concurrent with __nft_obj_type_get(), and there is not any protection when iterate over nf_tables_objects list in __nft_obj_type_get(). Therefore, there is potential data-race of nf_tables_objects list entry. Use list_for_each_entry_rcu() to iterate over nf_tables_objects list in __nft_obj_type_get(), and use rcu_read_lock() in the caller nft_obj_type_get() to protect the entire type query process. Fixes: e50092404c1b ("netfilter: nf_tables: add stateful objects") Signed-off-by: Ziyang Xuan Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ba63866914f1..1c4b7a8ec2cc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7175,7 +7175,7 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family) { const struct nft_object_type *type; - list_for_each_entry(type, &nf_tables_objects, list) { + list_for_each_entry_rcu(type, &nf_tables_objects, list) { if (type->family != NFPROTO_UNSPEC && type->family != family) continue; @@ -7191,9 +7191,13 @@ nft_obj_type_get(struct net *net, u32 objtype, u8 family) { const struct nft_object_type *type; + rcu_read_lock(); type = __nft_obj_type_get(objtype, family); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES From b13db0d16bc7b2a52abcf5cb71334f63faa5dbd6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 9 Apr 2024 11:24:59 +0200 Subject: [PATCH 328/553] netfilter: br_netfilter: skip conntrack input hook for promisc packets [ Upstream commit 751de2012eafa4d46d8081056761fa0e9cc8a178 ] For historical reasons, when bridge device is in promisc mode, packets that are directed to the taps follow bridge input hook path. This patch adds a workaround to reset conntrack for these packets. Jianbo Liu reports warning splats in their test infrastructure where cloned packets reach the br_netfilter input hook to confirm the conntrack object. Scratch one bit from BR_INPUT_SKB_CB to annotate that this packet has reached the input hook because it is passed up to the bridge device to reach the taps. [ 57.571874] WARNING: CPU: 1 PID: 0 at net/bridge/br_netfilter_hooks.c:616 br_nf_local_in+0x157/0x180 [br_netfilter] [ 57.572749] Modules linked in: xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_isc si ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core mlx5ctl mlx5_core [ 57.575158] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.8.0+ #19 [ 57.575700] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 57.576662] RIP: 0010:br_nf_local_in+0x157/0x180 [br_netfilter] [ 57.577195] Code: fe ff ff 41 bd 04 00 00 00 be 04 00 00 00 e9 4a ff ff ff be 04 00 00 00 48 89 ef e8 f3 a9 3c e1 66 83 ad b4 00 00 00 04 eb 91 <0f> 0b e9 f1 fe ff ff 0f 0b e9 df fe ff ff 48 89 df e8 b3 53 47 e1 [ 57.578722] RSP: 0018:ffff88885f845a08 EFLAGS: 00010202 [ 57.579207] RAX: 0000000000000002 RBX: ffff88812dfe8000 RCX: 0000000000000000 [ 57.579830] RDX: ffff88885f845a60 RSI: ffff8881022dc300 RDI: 0000000000000000 [ 57.580454] RBP: ffff88885f845a60 R08: 0000000000000001 R09: 0000000000000003 [ 57.581076] R10: 00000000ffff1300 R11: 0000000000000002 R12: 0000000000000000 [ 57.581695] R13: ffff8881047ffe00 R14: ffff888108dbee00 R15: ffff88814519b800 [ 57.582313] FS: 0000000000000000(0000) GS:ffff88885f840000(0000) knlGS:0000000000000000 [ 57.583040] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 57.583564] CR2: 000000c4206aa000 CR3: 0000000103847001 CR4: 0000000000370eb0 [ 57.584194] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 57.584820] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 57.585440] Call Trace: [ 57.585721] [ 57.585976] ? __warn+0x7d/0x130 [ 57.586323] ? br_nf_local_in+0x157/0x180 [br_netfilter] [ 57.586811] ? report_bug+0xf1/0x1c0 [ 57.587177] ? handle_bug+0x3f/0x70 [ 57.587539] ? exc_invalid_op+0x13/0x60 [ 57.587929] ? asm_exc_invalid_op+0x16/0x20 [ 57.588336] ? br_nf_local_in+0x157/0x180 [br_netfilter] [ 57.588825] nf_hook_slow+0x3d/0xd0 [ 57.589188] ? br_handle_vlan+0x4b/0x110 [ 57.589579] br_pass_frame_up+0xfc/0x150 [ 57.589970] ? br_port_flags_change+0x40/0x40 [ 57.590396] br_handle_frame_finish+0x346/0x5e0 [ 57.590837] ? ipt_do_table+0x32e/0x430 [ 57.591221] ? br_handle_local_finish+0x20/0x20 [ 57.591656] br_nf_hook_thresh+0x4b/0xf0 [br_netfilter] [ 57.592286] ? br_handle_local_finish+0x20/0x20 [ 57.592802] br_nf_pre_routing_finish+0x178/0x480 [br_netfilter] [ 57.593348] ? br_handle_local_finish+0x20/0x20 [ 57.593782] ? nf_nat_ipv4_pre_routing+0x25/0x60 [nf_nat] [ 57.594279] br_nf_pre_routing+0x24c/0x550 [br_netfilter] [ 57.594780] ? br_nf_hook_thresh+0xf0/0xf0 [br_netfilter] [ 57.595280] br_handle_frame+0x1f3/0x3d0 [ 57.595676] ? br_handle_local_finish+0x20/0x20 [ 57.596118] ? br_handle_frame_finish+0x5e0/0x5e0 [ 57.596566] __netif_receive_skb_core+0x25b/0xfc0 [ 57.597017] ? __napi_build_skb+0x37/0x40 [ 57.597418] __netif_receive_skb_list_core+0xfb/0x220 Fixes: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack") Reported-by: Jianbo Liu Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/bridge/br_input.c | 15 +++++++++++---- net/bridge/br_netfilter_hooks.c | 6 ++++++ net/bridge/br_private.h | 1 + net/bridge/netfilter/nf_conntrack_bridge.c | 14 ++++++++++---- 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 6bb272894c96..b94a1783902e 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -30,7 +30,7 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) return netif_receive_skb(skb); } -static int br_pass_frame_up(struct sk_buff *skb) +static int br_pass_frame_up(struct sk_buff *skb, bool promisc) { struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; struct net_bridge *br = netdev_priv(brdev); @@ -65,6 +65,8 @@ static int br_pass_frame_up(struct sk_buff *skb) br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb), BR_MCAST_DIR_TX); + BR_INPUT_SKB_CB(skb)->promisc = promisc; + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(indev), NULL, skb, indev, NULL, br_netif_receive_skb); @@ -82,6 +84,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb struct net_bridge_mcast *brmctx; struct net_bridge_vlan *vlan; struct net_bridge *br; + bool promisc; u16 vid = 0; u8 state; @@ -120,7 +123,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (p->flags & BR_LEARNING) br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0); - local_rcv = !!(br->dev->flags & IFF_PROMISC); + promisc = !!(br->dev->flags & IFF_PROMISC); + local_rcv = promisc; + if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) { /* by definition the broadcast is also a multicast address */ if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) { @@ -183,7 +188,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb unsigned long now = jiffies; if (test_bit(BR_FDB_LOCAL, &dst->flags)) - return br_pass_frame_up(skb); + return br_pass_frame_up(skb, false); if (now != dst->used) dst->used = now; @@ -196,7 +201,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb } if (local_rcv) - return br_pass_frame_up(skb); + return br_pass_frame_up(skb, promisc); out: return 0; @@ -368,6 +373,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) goto forward; } + BR_INPUT_SKB_CB(skb)->promisc = false; + /* The else clause should be hit when nf_hook(): * - returns < 0 (drop/error) * - returns = 0 (stolen/nf_queue) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index bff48d576363..9ac70c27da83 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -600,11 +600,17 @@ static unsigned int br_nf_local_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { + bool promisc = BR_INPUT_SKB_CB(skb)->promisc; struct nf_conntrack *nfct = skb_nfct(skb); const struct nf_ct_hook *ct_hook; struct nf_conn *ct; int ret; + if (promisc) { + nf_reset_ct(skb); + return NF_ACCEPT; + } + if (!nfct || skb->pkt_type == PACKET_HOST) return NF_ACCEPT; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 51d010f64e06..940de9516768 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -559,6 +559,7 @@ struct br_input_skb_cb { #endif u8 proxyarp_replied:1; u8 src_port_isolated:1; + u8 promisc:1; #ifdef CONFIG_BRIDGE_VLAN_FILTERING u8 vlan_filtered:1; #endif diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index c7c27ada6704..e60c38670f22 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -294,18 +294,24 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - enum ip_conntrack_info ctinfo; + bool promisc = BR_INPUT_SKB_CB(skb)->promisc; + struct nf_conntrack *nfct = skb_nfct(skb); struct nf_conn *ct; - if (skb->pkt_type == PACKET_HOST) + if (promisc) { + nf_reset_ct(skb); + return NF_ACCEPT; + } + + if (!nfct || skb->pkt_type == PACKET_HOST) return NF_ACCEPT; /* nf_conntrack_confirm() cannot handle concurrent clones, * this happens for broad/multicast frames with e.g. macvlan on top * of the bridge device. */ - ct = nf_ct_get(skb, &ctinfo); - if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) + ct = container_of(nfct, struct nf_conn, ct_general); + if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) return NF_ACCEPT; /* let inet prerouting call conntrack again */ From 41d8fdf3afaff312e17466e4ab732937738d5644 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 10 Apr 2024 21:05:13 +0200 Subject: [PATCH 329/553] netfilter: nft_set_pipapo: do not free live element [ Upstream commit 3cfc9ec039af60dbd8965ae085b2c2ccdcfbe1cc ] Pablo reports a crash with large batches of elements with a back-to-back add/remove pattern. Quoting Pablo: add_elem("00000000") timeout 100 ms ... add_elem("0000000X") timeout 100 ms del_elem("0000000X") <---------------- delete one that was just added ... add_elem("00005000") timeout 100 ms 1) nft_pipapo_remove() removes element 0000000X Then, KASAN shows a splat. Looking at the remove function there is a chance that we will drop a rule that maps to a non-deactivated element. Removal happens in two steps, first we do a lookup for key k and return the to-be-removed element and mark it as inactive in the next generation. Then, in a second step, the element gets removed from the set/map. The _remove function does not work correctly if we have more than one element that share the same key. This can happen if we insert an element into a set when the set already holds an element with same key, but the element mapping to the existing key has timed out or is not active in the next generation. In such case its possible that removal will unmap the wrong element. If this happens, we will leak the non-deactivated element, it becomes unreachable. The element that got deactivated (and will be freed later) will remain reachable in the set data structure, this can result in a crash when such an element is retrieved during lookup (stale pointer). Add a check that the fully matching key does in fact map to the element that we have marked as inactive in the deactivation step. If not, we need to continue searching. Add a bug/warn trap at the end of the function as well, the remove function must not ever be called with an invisible/unreachable/non-existent element. v2: avoid uneeded temporary variable (Stefano) Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Reported-by: Pablo Neira Ayuso Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_set_pipapo.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 58eca2616273..2299ced939c4 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1994,6 +1994,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, rules_fx = rules_f0; nft_pipapo_for_each_field(f, i, m) { + bool last = i == m->field_count - 1; + if (!pipapo_match_field(f, start, rules_fx, match_start, match_end)) break; @@ -2006,16 +2008,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); - } - if (i == m->field_count) { - priv->dirty = true; - pipapo_drop(m, rulemap); - return; + if (last && f->mt[rulemap[i].to].e == e) { + priv->dirty = true; + pipapo_drop(m, rulemap); + return; + } } first_rule += rules_f0; } + + WARN_ON_ONCE(1); /* elem_priv not found */ } /** From 8bf7c76a2a207ca2b4cfda0a279192adf27678d7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 9 Apr 2024 13:47:33 +0200 Subject: [PATCH 330/553] netfilter: flowtable: validate pppoe header [ Upstream commit 87b3593bed1868b2d9fe096c01bcdf0ea86cbebf ] Ensure there is sufficient room to access the protocol field of the PPPoe header. Validate it once before the flowtable lookup, then use a helper function to access protocol field. Reported-by: syzbot+b6f07e1c07ef40199081@syzkaller.appspotmail.com Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_flow_table.h | 12 +++++++++++- net/netfilter/nf_flow_table_inet.c | 3 ++- net/netfilter/nf_flow_table_ip.c | 8 +++++--- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 4a767b3d20b9..df7775afb92b 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -335,7 +335,7 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow, int nf_flow_table_offload_init(void); void nf_flow_table_offload_exit(void); -static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) +static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb) { __be16 proto; @@ -351,6 +351,16 @@ static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) return 0; } +static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto) +{ + if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) + return false; + + *inner_proto = __nf_flow_pppoe_proto(skb); + + return true; +} + #define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count) #define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count) #define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \ diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 9505f9d188ff..6eef15648b7b 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -21,7 +21,8 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, proto = veth->h_vlan_encapsulated_proto; break; case htons(ETH_P_PPP_SES): - proto = nf_flow_pppoe_proto(skb); + if (!nf_flow_pppoe_proto(skb, &proto)) + return NF_ACCEPT; break; default: proto = skb->protocol; diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 6feaac9ab05c..306e1ba6012e 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -267,10 +267,11 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, return NF_STOLEN; } -static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, +static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, u32 *offset) { struct vlan_ethhdr *veth; + __be16 inner_proto; switch (skb->protocol) { case htons(ETH_P_8021Q): @@ -281,7 +282,8 @@ static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, } break; case htons(ETH_P_PPP_SES): - if (nf_flow_pppoe_proto(skb) == proto) { + if (nf_flow_pppoe_proto(skb, &inner_proto) && + inner_proto == proto) { *offset += PPPOE_SES_HLEN; return true; } @@ -310,7 +312,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb, skb_reset_network_header(skb); break; case htons(ETH_P_PPP_SES): - skb->protocol = nf_flow_pppoe_proto(skb); + skb->protocol = __nf_flow_pppoe_proto(skb); skb_pull(skb, PPPOE_SES_HLEN); skb_reset_network_header(skb); break; From f1c3c61701a0b12f4906152c1626a5de580ea3d2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 11 Apr 2024 00:09:00 +0200 Subject: [PATCH 331/553] netfilter: flowtable: incorrect pppoe tuple [ Upstream commit 6db5dc7b351b9569940cd1cf445e237c42cd6d27 ] pppoe traffic reaching ingress path does not match the flowtable entry because the pppoe header is expected to be at the network header offset. This bug causes a mismatch in the flow table lookup, so pppoe packets enter the classical forwarding path. Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_flow_table_ip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 306e1ba6012e..22bc0e3d8a0b 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -156,7 +156,7 @@ static void nf_flow_tuple_encap(struct sk_buff *skb, tuple->encap[i].proto = skb->protocol; break; case htons(ETH_P_PPP_SES): - phdr = (struct pppoe_hdr *)skb_mac_header(skb); + phdr = (struct pppoe_hdr *)skb_network_header(skb); tuple->encap[i].id = ntohs(phdr->sid); tuple->encap[i].proto = skb->protocol; break; From a1d3e3521f02cac2a215c19c3294a2ce72b69fd4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 10 Apr 2024 10:10:15 -0700 Subject: [PATCH 332/553] af_unix: Call manage_oob() for every skb in unix_stream_read_generic(). [ Upstream commit 283454c8a123072e5c386a5a2b5fc576aa455b6f ] When we call recv() for AF_UNIX socket, we first peek one skb and calls manage_oob() to check if the skb is sent with MSG_OOB. However, when we fetch the next (and the following) skb, manage_oob() is not called now, leading a wrong behaviour. Let's say a socket send()s "hello" with MSG_OOB and the peer tries to recv() 5 bytes with MSG_PEEK. Here, we should get only "hell" without 'o', but actually not: >>> from socket import * >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) >>> c1.send(b'hello', MSG_OOB) 5 >>> c2.recv(5, MSG_PEEK) b'hello' The first skb fills 4 bytes, and the next skb is peeked but not properly checked by manage_oob(). Let's move up the again label to call manage_oob() for evry skb. With this patch: >>> from socket import * >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) >>> c1.send(b'hello', MSG_OOB) 5 >>> c2.recv(5, MSG_PEEK) b'hell' Fixes: 314001f0bf92 ("af_unix: Add OOB support") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240410171016.7621-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0a75d76535f7..6af6f82e8946 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2753,6 +2753,7 @@ redo: last = skb = skb_peek(&sk->sk_receive_queue); last_len = last ? last->len : 0; +again: #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (skb) { skb = manage_oob(skb, sk, flags, copied); @@ -2764,7 +2765,6 @@ redo: } } #endif -again: if (skb == NULL) { if (copied >= target) goto unlock; From ba0db4638525b8b054b3d546b45f7e473f477027 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 10 Apr 2024 10:10:16 -0700 Subject: [PATCH 333/553] af_unix: Don't peek OOB data without MSG_OOB. [ Upstream commit 22dd70eb2c3d754862964377a75abafd3167346b ] Currently, we can read OOB data without MSG_OOB by using MSG_PEEK when OOB data is sitting on the front row, which is apparently wrong. >>> from socket import * >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) >>> c1.send(b'a', MSG_OOB) 1 >>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) b'a' If manage_oob() is called when no data has been copied, we only check if the socket enables SO_OOBINLINE or MSG_PEEK is not used. Otherwise, the skb is returned as is. However, here we should return NULL if MSG_PEEK is set and no data has been copied. Also, in such a case, we should not jump to the redo label because we will be caught in the loop and hog the CPU until normal data comes in. Then, we need to handle skb == NULL case with the if-clause below the manage_oob() block. With this patch: >>> from socket import * >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) >>> c1.send(b'a', MSG_OOB) 1 >>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) Traceback (most recent call last): File "", line 1, in BlockingIOError: [Errno 11] Resource temporarily unavailable Fixes: 314001f0bf92 ("af_unix: Add OOB support") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240410171016.7621-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/unix/af_unix.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 6af6f82e8946..f28e2956fea5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2675,7 +2675,9 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, WRITE_ONCE(u->oob_skb, NULL); consume_skb(skb); } - } else if (!(flags & MSG_PEEK)) { + } else if (flags & MSG_PEEK) { + skb = NULL; + } else { skb_unlink(skb, &sk->sk_receive_queue); WRITE_ONCE(u->oob_skb, NULL); if (!WARN_ON_ONCE(skb_unref(skb))) @@ -2757,11 +2759,9 @@ again: #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (skb) { skb = manage_oob(skb, sk, flags, copied); - if (!skb) { + if (!skb && copied) { unix_state_unlock(sk); - if (copied) - break; - goto redo; + break; } } #endif From 8635ac7dd9cf033df69d52ecc4d2cf10aea7c98b Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 11 Apr 2024 14:54:39 +0300 Subject: [PATCH 334/553] net/mlx5: Lag, restore buckets number to default after hash LAG deactivation [ Upstream commit 37cc10da3a50e6d0cb9808a90b7da9b4868794dd ] The cited patch introduces the concept of buckets in LAG in hash mode. However, the patch doesn't clear the number of buckets in the LAG deactivation. This results in using the wrong number of buckets in case user create a hash mode LAG and afterwards create a non-hash mode LAG. Hence, restore buckets number to default after hash mode LAG deactivation. Fixes: 352899f384d4 ("net/mlx5: Lag, use buckets in hash mode") Signed-off-by: Shay Drory Reviewed-by: Maor Gottlieb Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240411115444.374475-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index ad32b80e8501..01c0e1ee918d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -679,8 +679,10 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) return err; } - if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { mlx5_lag_port_sel_destroy(ldev); + ldev->buckets = 1; + } if (mlx5_lag_has_drop_rule(ldev)) mlx5_lag_drop_rule_cleanup(ldev); From 46efa4d5930cf3c2af8c01f75e0a47e4fc045e3b Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 11 Apr 2024 14:54:44 +0300 Subject: [PATCH 335/553] net/mlx5e: Prevent deadlock while disabling aRFS [ Upstream commit fef965764cf562f28afb997b626fc7c3cec99693 ] When disabling aRFS under the `priv->state_lock`, any scheduled aRFS works are canceled using the `cancel_work_sync` function, which waits for the work to end if it has already started. However, while waiting for the work handler, the handler will try to acquire the `state_lock` which is already acquired. The worker acquires the lock to delete the rules if the state is down, which is not the worker's responsibility since disabling aRFS deletes the rules. Add an aRFS state variable, which indicates whether the aRFS is enabled and prevent adding rules when the aRFS is disabled. Kernel log: ====================================================== WARNING: possible circular locking dependency detected 6.7.0-rc4_net_next_mlx5_5483eb2 #1 Tainted: G I ------------------------------------------------------ ethtool/386089 is trying to acquire lock: ffff88810f21ce68 ((work_completion)(&rule->arfs_work)){+.+.}-{0:0}, at: __flush_work+0x74/0x4e0 but task is already holding lock: ffff8884a1808cc0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_ethtool_set_channels+0x53/0x200 [mlx5_core] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&priv->state_lock){+.+.}-{3:3}: __mutex_lock+0x80/0xc90 arfs_handle_work+0x4b/0x3b0 [mlx5_core] process_one_work+0x1dc/0x4a0 worker_thread+0x1bf/0x3c0 kthread+0xd7/0x100 ret_from_fork+0x2d/0x50 ret_from_fork_asm+0x11/0x20 -> #0 ((work_completion)(&rule->arfs_work)){+.+.}-{0:0}: __lock_acquire+0x17b4/0x2c80 lock_acquire+0xd0/0x2b0 __flush_work+0x7a/0x4e0 __cancel_work_timer+0x131/0x1c0 arfs_del_rules+0x143/0x1e0 [mlx5_core] mlx5e_arfs_disable+0x1b/0x30 [mlx5_core] mlx5e_ethtool_set_channels+0xcb/0x200 [mlx5_core] ethnl_set_channels+0x28f/0x3b0 ethnl_default_set_doit+0xec/0x240 genl_family_rcv_msg_doit+0xd0/0x120 genl_rcv_msg+0x188/0x2c0 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1a1/0x270 netlink_sendmsg+0x214/0x460 __sock_sendmsg+0x38/0x60 __sys_sendto+0x113/0x170 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x40/0xe0 entry_SYSCALL_64_after_hwframe+0x46/0x4e other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&priv->state_lock); lock((work_completion)(&rule->arfs_work)); lock(&priv->state_lock); lock((work_completion)(&rule->arfs_work)); *** DEADLOCK *** 3 locks held by ethtool/386089: #0: ffffffff82ea7210 (cb_lock){++++}-{3:3}, at: genl_rcv+0x15/0x40 #1: ffffffff82e94c88 (rtnl_mutex){+.+.}-{3:3}, at: ethnl_default_set_doit+0xd3/0x240 #2: ffff8884a1808cc0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_ethtool_set_channels+0x53/0x200 [mlx5_core] stack backtrace: CPU: 15 PID: 386089 Comm: ethtool Tainted: G I 6.7.0-rc4_net_next_mlx5_5483eb2 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x60/0xa0 check_noncircular+0x144/0x160 __lock_acquire+0x17b4/0x2c80 lock_acquire+0xd0/0x2b0 ? __flush_work+0x74/0x4e0 ? save_trace+0x3e/0x360 ? __flush_work+0x74/0x4e0 __flush_work+0x7a/0x4e0 ? __flush_work+0x74/0x4e0 ? __lock_acquire+0xa78/0x2c80 ? lock_acquire+0xd0/0x2b0 ? mark_held_locks+0x49/0x70 __cancel_work_timer+0x131/0x1c0 ? mark_held_locks+0x49/0x70 arfs_del_rules+0x143/0x1e0 [mlx5_core] mlx5e_arfs_disable+0x1b/0x30 [mlx5_core] mlx5e_ethtool_set_channels+0xcb/0x200 [mlx5_core] ethnl_set_channels+0x28f/0x3b0 ethnl_default_set_doit+0xec/0x240 genl_family_rcv_msg_doit+0xd0/0x120 genl_rcv_msg+0x188/0x2c0 ? ethnl_ops_begin+0xb0/0xb0 ? genl_family_rcv_msg_dumpit+0xf0/0xf0 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1a1/0x270 netlink_sendmsg+0x214/0x460 __sock_sendmsg+0x38/0x60 __sys_sendto+0x113/0x170 ? do_user_addr_fault+0x53f/0x8f0 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x40/0xe0 entry_SYSCALL_64_after_hwframe+0x46/0x4e Fixes: 45bf454ae884 ("net/mlx5e: Enabling aRFS mechanism") Signed-off-by: Carolina Jubran Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240411115444.374475-7-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en_arfs.c | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 58eacba6de8c..ad51edf55318 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -45,6 +45,10 @@ struct arfs_table { struct hlist_head rules_hash[ARFS_HASH_SIZE]; }; +enum { + MLX5E_ARFS_STATE_ENABLED, +}; + enum arfs_type { ARFS_IPV4_TCP, ARFS_IPV6_TCP, @@ -60,6 +64,7 @@ struct mlx5e_arfs_tables { struct list_head rules; int last_filter_id; struct workqueue_struct *wq; + unsigned long state; }; struct arfs_tuple { @@ -170,6 +175,8 @@ int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs) return err; } } + set_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state); + return 0; } @@ -454,6 +461,8 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs) int i; int j; + clear_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state); + spin_lock_bh(&arfs->arfs_lock); mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) { hlist_del_init(&rule->hlist); @@ -621,17 +630,8 @@ static void arfs_handle_work(struct work_struct *work) struct mlx5_flow_handle *rule; arfs = mlx5e_fs_get_arfs(priv->fs); - mutex_lock(&priv->state_lock); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - spin_lock_bh(&arfs->arfs_lock); - hlist_del(&arfs_rule->hlist); - spin_unlock_bh(&arfs->arfs_lock); - - mutex_unlock(&priv->state_lock); - kfree(arfs_rule); - goto out; - } - mutex_unlock(&priv->state_lock); + if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state)) + return; if (!arfs_rule->rule) { rule = arfs_add_rule(priv, arfs_rule); @@ -744,6 +744,11 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, return -EPROTONOSUPPORT; spin_lock_bh(&arfs->arfs_lock); + if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state)) { + spin_unlock_bh(&arfs->arfs_lock); + return -EPERM; + } + arfs_rule = arfs_find_rule(arfs_t, &fk); if (arfs_rule) { if (arfs_rule->rxq == rxq_index) { From 6a6ebec40820230a5806cafa6ad500cd543ca29e Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 15 Mar 2024 12:08:21 +0100 Subject: [PATCH 336/553] ice: tc: allow zero flags in parsing tc flower [ Upstream commit 73278715725a8347032acf233082ca4eb31e6a56 ] The check for flags is done to not pass empty lookups to adding switch rule functions. Since metadata is always added to lookups there is no need to check against the flag. It is also fixing the problem with such rule: $ tc filter add dev gtp_dev ingress protocol ip prio 0 flower \ enc_dst_port 2123 action drop Switch block in case of GTP can't parse the destination port, because it should always be set to GTP specific value. The same with ethertype. The result is that there is no other matching criteria than GTP tunnel. In this case flags is 0, rule can't be added only because of defensive check against flags. Fixes: 9a225f81f540 ("ice: Support GTP-U and GTP-C offload in switchdev") Reviewed-by: Wojciech Drewek Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_tc_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 652ef09eeb30..ec6628aacc13 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -663,7 +663,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) int ret; int i; - if (!flags || (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT)) { + if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT) { NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported encap field(s)"); return -EOPNOTSUPP; } From 62e27ef18eb4f0d33bbae8e9ef56b99696a74713 Mon Sep 17 00:00:00 2001 From: Lei Chen Date: Sun, 14 Apr 2024 22:02:46 -0400 Subject: [PATCH 337/553] tun: limit printing rate when illegal packet received by tun dev [ Upstream commit f8bbc07ac535593139c875ffa19af924b1084540 ] vhost_worker will call tun call backs to receive packets. If too many illegal packets arrives, tun_do_read will keep dumping packet contents. When console is enabled, it will costs much more cpu time to dump packet and soft lockup will be detected. net_ratelimit mechanism can be used to limit the dumping rate. PID: 33036 TASK: ffff949da6f20000 CPU: 23 COMMAND: "vhost-32980" #0 [fffffe00003fce50] crash_nmi_callback at ffffffff89249253 #1 [fffffe00003fce58] nmi_handle at ffffffff89225fa3 #2 [fffffe00003fceb0] default_do_nmi at ffffffff8922642e #3 [fffffe00003fced0] do_nmi at ffffffff8922660d #4 [fffffe00003fcef0] end_repeat_nmi at ffffffff89c01663 [exception RIP: io_serial_in+20] RIP: ffffffff89792594 RSP: ffffa655314979e8 RFLAGS: 00000002 RAX: ffffffff89792500 RBX: ffffffff8af428a0 RCX: 0000000000000000 RDX: 00000000000003fd RSI: 0000000000000005 RDI: ffffffff8af428a0 RBP: 0000000000002710 R8: 0000000000000004 R9: 000000000000000f R10: 0000000000000000 R11: ffffffff8acbf64f R12: 0000000000000020 R13: ffffffff8acbf698 R14: 0000000000000058 R15: 0000000000000000 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #5 [ffffa655314979e8] io_serial_in at ffffffff89792594 #6 [ffffa655314979e8] wait_for_xmitr at ffffffff89793470 #7 [ffffa65531497a08] serial8250_console_putchar at ffffffff897934f6 #8 [ffffa65531497a20] uart_console_write at ffffffff8978b605 #9 [ffffa65531497a48] serial8250_console_write at ffffffff89796558 #10 [ffffa65531497ac8] console_unlock at ffffffff89316124 #11 [ffffa65531497b10] vprintk_emit at ffffffff89317c07 #12 [ffffa65531497b68] printk at ffffffff89318306 #13 [ffffa65531497bc8] print_hex_dump at ffffffff89650765 #14 [ffffa65531497ca8] tun_do_read at ffffffffc0b06c27 [tun] #15 [ffffa65531497d38] tun_recvmsg at ffffffffc0b06e34 [tun] #16 [ffffa65531497d68] handle_rx at ffffffffc0c5d682 [vhost_net] #17 [ffffa65531497ed0] vhost_worker at ffffffffc0c644dc [vhost] #18 [ffffa65531497f10] kthread at ffffffff892d2e72 #19 [ffffa65531497f50] ret_from_fork at ffffffff89c0022f Fixes: ef3db4a59542 ("tun: avoid BUG, dump packet on GSO errors") Signed-off-by: Lei Chen Reviewed-by: Willem de Bruijn Acked-by: Jason Wang Reviewed-by: Eric Dumazet Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20240415020247.2207781-1-lei.chen@smartx.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/tun.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 922d6f16d99d..4af1ba5d074c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2121,14 +2121,16 @@ static ssize_t tun_put_user(struct tun_struct *tun, tun_is_little_endian(tun), true, vlan_hlen)) { struct skb_shared_info *sinfo = skb_shinfo(skb); - pr_err("unexpected GSO type: " - "0x%x, gso_size %d, hdr_len %d\n", - sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), - tun16_to_cpu(tun, gso.hdr_len)); - print_hex_dump(KERN_ERR, "tun: ", - DUMP_PREFIX_NONE, - 16, 1, skb->head, - min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); + + if (net_ratelimit()) { + netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", + sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), + tun16_to_cpu(tun, gso.hdr_len)); + print_hex_dump(KERN_ERR, "tun: ", + DUMP_PREFIX_NONE, + 16, 1, skb->head, + min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); + } WARN_ON_ONCE(1); return -EINVAL; } From e86c9db58eba290e858e2bb80efcde9e3973a5ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Sat, 13 Apr 2024 16:01:39 +0300 Subject: [PATCH 338/553] net: dsa: mt7530: fix mirroring frames received on local port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d59cf049c8378677053703e724808836f180888e ] This switch intellectual property provides a bit on the ARL global control register which controls allowing mirroring frames which are received on the local port (monitor port). This bit is unset after reset. This ability must be enabled to fully support the port mirroring feature on this switch intellectual property. Therefore, this patch fixes the traffic not being reflected on a port, which would be configured like below: tc qdisc add dev swp0 clsact tc filter add dev swp0 ingress matchall skip_sw \ action mirred egress mirror dev swp0 As a side note, this configuration provides the hairpinning feature for a single port. Fixes: 37feab6076aa ("net: dsa: mt7530: add support for port mirroring") Signed-off-by: Arınç ÜNAL Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/mt7530.c | 6 ++++++ drivers/net/dsa/mt7530.h | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index d4515c19a5f3..b5f61a9a378e 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2461,6 +2461,9 @@ mt7530_setup(struct dsa_switch *ds) PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); } + /* Allow mirroring frames received on the local port (monitor port). */ + mt7530_set(priv, MT753X_AGC, LOCAL_EN); + /* Setup VLAN ID 0 for VLAN-unaware bridges */ ret = mt7530_setup_vlan0(priv); if (ret) @@ -2577,6 +2580,9 @@ mt7531_setup_common(struct dsa_switch *ds) PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); } + /* Allow mirroring frames received on the local port (monitor port). */ + mt7530_set(priv, MT753X_AGC, LOCAL_EN); + /* Flush the FDB table */ ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL); if (ret < 0) diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 2d1ea390f05a..af18f47f2214 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -31,6 +31,10 @@ enum mt753x_id { #define SYSC_REG_RSTCTRL 0x34 #define RESET_MCM BIT(2) +/* Register for ARL global control */ +#define MT753X_AGC 0xc +#define LOCAL_EN BIT(7) + /* Registers to mac forward control for unknown frames */ #define MT7530_MFC 0x10 #define BC_FFP(x) (((x) & 0xff) << 24) From 45e811bab2d088a43fb5199e4cc2900d64363df7 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Wed, 17 Apr 2024 15:24:25 +0530 Subject: [PATCH 339/553] net: ethernet: ti: am65-cpsw-nuss: cleanup DMA Channels before using them [ Upstream commit c24cd679b075b0e953ea167b0aa2b2d59e4eba7f ] The TX and RX DMA Channels used by the driver to exchange data with CPSW are not guaranteed to be in a clean state during driver initialization. The Bootloader could have used the same DMA Channels without cleaning them up in the event of failure. Thus, reset and disable the DMA Channels to ensure that they are in a clean state before using them. Fixes: 93a76530316a ("net: ethernet: ti: introduce am65x/j721e gigabit eth subsystem driver") Reported-by: Schuyler Patton Signed-off-by: Siddharth Vadapalli Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20240417095425.2253876-1-s-vadapalli@ti.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 76fabeae512d..33df06a2de13 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2549,6 +2549,8 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common) static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) { + struct am65_cpsw_rx_chn *rx_chan = &common->rx_chns; + struct am65_cpsw_tx_chn *tx_chan = common->tx_chns; struct device *dev = common->dev; struct devlink_port *dl_port; struct am65_cpsw_port *port; @@ -2567,6 +2569,22 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) return ret; } + /* The DMA Channels are not guaranteed to be in a clean state. + * Reset and disable them to ensure that they are back to the + * clean state and ready to be used. + */ + for (i = 0; i < common->tx_ch_num; i++) { + k3_udma_glue_reset_tx_chn(tx_chan[i].tx_chn, &tx_chan[i], + am65_cpsw_nuss_tx_cleanup); + k3_udma_glue_disable_tx_chn(tx_chan[i].tx_chn); + } + + for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++) + k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i, rx_chan, + am65_cpsw_nuss_rx_cleanup, !!i); + + k3_udma_glue_disable_rx_chn(rx_chan->rx_chn); + ret = am65_cpsw_nuss_register_devlink(common); if (ret) return ret; From 196617d07dd66d7d53de5ce1ca934e8ed92b5d13 Mon Sep 17 00:00:00 2001 From: "Yanjun.Zhu" Date: Thu, 14 Mar 2024 07:51:40 +0100 Subject: [PATCH 340/553] RDMA/rxe: Fix the problem "mutex_destroy missing" [ Upstream commit 481047d7e8391d3842ae59025806531cdad710d9 ] When a mutex lock is not used any more, the function mutex_destroy should be called to mark the mutex lock uninitialized. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yanjun.Zhu Link: https://lore.kernel.org/r/20240314065140.27468-1-yanjun.zhu@linux.dev Reviewed-by: Daisuke Matsuda Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 51daac5c4feb..be3ddfbf3cae 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -33,6 +33,8 @@ void rxe_dealloc(struct ib_device *ib_dev) if (rxe->tfm) crypto_free_shash(rxe->tfm); + + mutex_destroy(&rxe->usdev_lock); } /* initialize rxe device parameters */ From ea42dbe759921c02f70fb3d0e1e6c34e1779319a Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Fri, 22 Mar 2024 13:20:49 +0200 Subject: [PATCH 341/553] RDMA/cm: Print the old state when cm_destroy_id gets timeout [ Upstream commit b68e1acb5834ed1a2ad42d9d002815a8bae7c0b6 ] The old state is helpful for debugging, as the current state is always IB_CM_IDLE when timeout happens. Fixes: 96d9cbe2f2ff ("RDMA/cm: add timeout to cm_destroy_id wait") Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/20240322112049.2022994-1-markzhang@nvidia.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/core/cm.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 462a10d6a576..950fe205995b 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1026,23 +1026,26 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) } } -static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id) +static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id, + enum ib_cm_state old_state) { struct cm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - pr_err("%s: cm_id=%p timed out. state=%d refcnt=%d\n", __func__, - cm_id, cm_id->state, refcount_read(&cm_id_priv->refcount)); + pr_err("%s: cm_id=%p timed out. state %d -> %d, refcnt=%d\n", __func__, + cm_id, old_state, cm_id->state, refcount_read(&cm_id_priv->refcount)); } static void cm_destroy_id(struct ib_cm_id *cm_id, int err) { struct cm_id_private *cm_id_priv; + enum ib_cm_state old_state; struct cm_work *work; int ret; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irq(&cm_id_priv->lock); + old_state = cm_id->state; retest: switch (cm_id->state) { case IB_CM_LISTEN: @@ -1151,7 +1154,7 @@ retest: msecs_to_jiffies( CM_DESTROY_ID_WAIT_TIMEOUT)); if (!ret) /* timeout happened */ - cm_destroy_id_wait_timeout(cm_id); + cm_destroy_id_wait_timeout(cm_id, old_state); } while (!ret); while ((work = cm_dequeue_work(cm_id_priv)) != NULL) From fe446927f8afc08a5f7d54a5fed84abec480cd1f Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Wed, 3 Apr 2024 12:03:46 +0300 Subject: [PATCH 342/553] RDMA/mlx5: Fix port number for counter query in multi-port configuration [ Upstream commit be121ffb384f53e966ee7299ffccc6eeb61bc73d ] Set the correct port when querying PPCNT in multi-port configuration. Distinguish between cases where switchdev mode was enabled to multi-port configuration and don't overwrite the queried port to 1 in multi-port case. Fixes: 74b30b3ad5ce ("RDMA/mlx5: Set local port to one when accessing counters") Signed-off-by: Michael Guralnik Link: https://lore.kernel.org/r/9bfcc8ade958b760a51408c3ad654a01b11f7d76.1712134988.git.leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/mad.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 9c8a7b206dcf..e61efed320f1 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -188,7 +188,8 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num, mdev = dev->mdev; mdev_port_num = 1; } - if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1) { + if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1 && + !mlx5_core_mp_enabled(mdev)) { /* set local port to one for Function-Per-Port HCA. */ mdev = dev->mdev; mdev_port_num = 1; From beb3ff19a55d78d3c70f9284458c9d155818ae27 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 10 Apr 2024 11:46:18 +0200 Subject: [PATCH 343/553] s390/qdio: handle deferred cc1 [ Upstream commit 607638faf2ff1cede37458111496e7cc6c977f6f ] A deferred condition code 1 response indicates that I/O was not started and should be retried. The current QDIO implementation handles a cc1 response as I/O error, resulting in a failed QDIO setup. This can happen for example when a path verification request arrives at the same time as QDIO setup I/O is started. Fix this by retrying the QDIO setup I/O when a cc1 response is received. Note that since commit 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") commit 5ef1dc40ffa6 ("s390/cio: fix invalid -EBUSY on ccw_device_start") deferred cc1 responses are much more likely to occur. See the commit message of the latter for more background information. Fixes: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") Reviewed-by: Alexandra Winter Signed-off-by: Peter Oberparleiter Signed-off-by: Alexander Gordeev Signed-off-by: Sasha Levin --- drivers/s390/cio/qdio_main.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 9cde55730b65..ebcb53580988 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -722,8 +722,8 @@ static void qdio_handle_activate_check(struct qdio_irq *irq_ptr, lgr_info_log(); } -static void qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat, - int dstat) +static int qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat, + int dstat, int dcc) { DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qest irq"); @@ -731,15 +731,18 @@ static void qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat, goto error; if (dstat & ~(DEV_STAT_DEV_END | DEV_STAT_CHN_END)) goto error; + if (dcc == 1) + return -EAGAIN; if (!(dstat & DEV_STAT_DEV_END)) goto error; qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ESTABLISHED); - return; + return 0; error: DBF_ERROR("%4x EQ:error", irq_ptr->schid.sch_no); DBF_ERROR("ds: %2x cs:%2x", dstat, cstat); qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); + return -EIO; } /* qdio interrupt handler */ @@ -748,7 +751,7 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, { struct qdio_irq *irq_ptr = cdev->private->qdio_data; struct subchannel_id schid; - int cstat, dstat; + int cstat, dstat, rc, dcc; if (!intparm || !irq_ptr) { ccw_device_get_schid(cdev, &schid); @@ -768,10 +771,12 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, qdio_irq_check_sense(irq_ptr, irb); cstat = irb->scsw.cmd.cstat; dstat = irb->scsw.cmd.dstat; + dcc = scsw_cmd_is_valid_cc(&irb->scsw) ? irb->scsw.cmd.cc : 0; + rc = 0; switch (irq_ptr->state) { case QDIO_IRQ_STATE_INACTIVE: - qdio_establish_handle_irq(irq_ptr, cstat, dstat); + rc = qdio_establish_handle_irq(irq_ptr, cstat, dstat, dcc); break; case QDIO_IRQ_STATE_CLEANUP: qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); @@ -785,12 +790,25 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, if (cstat || dstat) qdio_handle_activate_check(irq_ptr, intparm, cstat, dstat); + else if (dcc == 1) + rc = -EAGAIN; break; case QDIO_IRQ_STATE_STOPPED: break; default: WARN_ON_ONCE(1); } + + if (rc == -EAGAIN) { + DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qint retry"); + rc = ccw_device_start(cdev, irq_ptr->ccw, intparm, 0, 0); + if (!rc) + return; + DBF_ERROR("%4x RETRY ERR", irq_ptr->schid.sch_no); + DBF_ERROR("rc:%4x", rc); + qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); + } + wake_up(&cdev->private->wait_q); } From 559f3a6333397ab6cd4a696edd65a70b6be62c6e Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 10 Apr 2024 11:46:19 +0200 Subject: [PATCH 344/553] s390/cio: fix race condition during online processing [ Upstream commit 2d8527f2f911fab84aec04df4788c0c23af3df48 ] A race condition exists in ccw_device_set_online() that can cause the online process to fail, leaving the affected device in an inconsistent state. As a result, subsequent attempts to set that device online fail with return code ENODEV. The problem occurs when a path verification request arrives after a wait for final device state completed, but before the result state is evaluated. Fix this by ensuring that the CCW-device lock is held between determining final state and checking result state. Note that since: commit 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") path verification requests are much more likely to occur during boot, resulting in an increased chance of this race condition occurring. Fixes: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") Reviewed-by: Alexandra Winter Reviewed-by: Vineeth Vijayan Signed-off-by: Peter Oberparleiter Signed-off-by: Alexander Gordeev Signed-off-by: Sasha Levin --- drivers/s390/cio/device.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 02813b63f90f..5666b9cc5d29 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -360,10 +360,8 @@ int ccw_device_set_online(struct ccw_device *cdev) spin_lock_irq(cdev->ccwlock); ret = ccw_device_online(cdev); - spin_unlock_irq(cdev->ccwlock); - if (ret == 0) - wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev)); - else { + if (ret) { + spin_unlock_irq(cdev->ccwlock); CIO_MSG_EVENT(0, "ccw_device_online returned %d, " "device 0.%x.%04x\n", ret, cdev->private->dev_id.ssid, @@ -372,7 +370,12 @@ int ccw_device_set_online(struct ccw_device *cdev) put_device(&cdev->dev); return ret; } - spin_lock_irq(cdev->ccwlock); + /* Wait until a final state is reached */ + while (!dev_fsm_final_state(cdev)) { + spin_unlock_irq(cdev->ccwlock); + wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev)); + spin_lock_irq(cdev->ccwlock); + } /* Check if online processing was successful */ if ((cdev->private->state != DEV_STATE_ONLINE) && (cdev->private->state != DEV_STATE_W4SENSE)) { From 5fd4b090304e450aa0e7cc9cc2b4873285c6face Mon Sep 17 00:00:00 2001 From: Mikhail Kobuk Date: Thu, 11 Apr 2024 14:08:52 +0300 Subject: [PATCH 345/553] drm: nv04: Fix out of bounds access [ Upstream commit cf92bb778eda7830e79452c6917efa8474a30c1e ] When Output Resource (dcb->or) value is assigned in fabricate_dcb_output(), there may be out of bounds access to dac_users array in case dcb->or is zero because ffs(dcb->or) is used as index there. The 'or' argument of fabricate_dcb_output() must be interpreted as a number of bit to set, not value. Utilize macros from 'enum nouveau_or' in calls instead of hardcoding. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 2e5702aff395 ("drm/nouveau: fabricate DCB encoder table for iMac G4") Fixes: 670820c0e6a9 ("drm/nouveau: Workaround incorrect DCB entry on a GeForce3 Ti 200.") Signed-off-by: Mikhail Kobuk Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240411110854.16701-1-m.kobuk@ispras.ru Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_bios.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c index 189903b65edc..48cf593383b3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c @@ -23,6 +23,7 @@ */ #include "nouveau_drv.h" +#include "nouveau_bios.h" #include "nouveau_reg.h" #include "dispnv04/hw.h" #include "nouveau_encoder.h" @@ -1675,7 +1676,7 @@ apply_dcb_encoder_quirks(struct drm_device *dev, int idx, u32 *conn, u32 *conf) */ if (nv_match_device(dev, 0x0201, 0x1462, 0x8851)) { if (*conn == 0xf2005014 && *conf == 0xffffffff) { - fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, 1); + fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, DCB_OUTPUT_B); return false; } } @@ -1761,26 +1762,26 @@ fabricate_dcb_encoder_table(struct drm_device *dev, struct nvbios *bios) #ifdef __powerpc__ /* Apple iMac G4 NV17 */ if (of_machine_is_compatible("PowerMac4,5")) { - fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, 1); - fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, 2); + fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, DCB_OUTPUT_B); + fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, DCB_OUTPUT_C); return; } #endif /* Make up some sane defaults */ fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, - bios->legacy.i2c_indices.crt, 1, 1); + bios->legacy.i2c_indices.crt, 1, DCB_OUTPUT_B); if (nv04_tv_identify(dev, bios->legacy.i2c_indices.tv) >= 0) fabricate_dcb_output(dcb, DCB_OUTPUT_TV, bios->legacy.i2c_indices.tv, - all_heads, 0); + all_heads, DCB_OUTPUT_A); else if (bios->tmds.output0_script_ptr || bios->tmds.output1_script_ptr) fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, bios->legacy.i2c_indices.panel, - all_heads, 1); + all_heads, DCB_OUTPUT_B); } static int From c330a13ab77c67ab09b2332a5e5fe33cded96122 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 4 Apr 2024 13:07:59 +0300 Subject: [PATCH 346/553] drm/panel: visionox-rm69299: don't unregister DSI device [ Upstream commit 9e4d3f4f34455abbaa9930bf6b7575a5cd081496 ] The DSI device for the panel was registered by the DSI host, so it is an error to unregister it from the panel driver. Drop the call to mipi_dsi_device_unregister(). Fixes: c7f66d32dd43 ("drm/panel: add support for rm69299 visionox panel") Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240404-drop-panel-unregister-v1-1-9f56953c5fb9@linaro.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-visionox-rm69299.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-visionox-rm69299.c b/drivers/gpu/drm/panel/panel-visionox-rm69299.c index ec228c269146..b380bbb0e0d0 100644 --- a/drivers/gpu/drm/panel/panel-visionox-rm69299.c +++ b/drivers/gpu/drm/panel/panel-visionox-rm69299.c @@ -261,8 +261,6 @@ static void visionox_rm69299_remove(struct mipi_dsi_device *dsi) struct visionox_rm69299 *ctx = mipi_dsi_get_drvdata(dsi); mipi_dsi_detach(ctx->dsi); - mipi_dsi_device_unregister(ctx->dsi); - drm_panel_remove(&ctx->panel); } From 3c55d4396b1577e074d55ba19e9ca69730de72d5 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 2 Nov 2022 16:20:02 -0700 Subject: [PATCH 347/553] ARM: omap2: n8x0: stop instantiating codec platform data [ Upstream commit faf3b5cb59f84e4056bd84f115a958bc99c61e65 ] As of 0426370b58b2 ("ARM: dts: omap2420-n810: Correct the audio codec (tlv320aic33) node") the DTS properly specifies reset GPIO, and the device name in auxdata lookup table does not even match the one in device tree anymore, so stop instantiating it. Signed-off-by: Dmitry Torokhov Acked-by: Tony Lindgren Link: https://lore.kernel.org/r/20221102232004.1721864-1-dmitry.torokhov@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/board-n8x0.c | 5 ----- arch/arm/mach-omap2/common-board-devices.h | 2 -- arch/arm/mach-omap2/pdata-quirks.c | 1 - 3 files changed, 8 deletions(-) diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c index 5e86145db0e2..8897364e550b 100644 --- a/arch/arm/mach-omap2/board-n8x0.c +++ b/arch/arm/mach-omap2/board-n8x0.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -567,10 +566,6 @@ struct menelaus_platform_data n8x0_menelaus_platform_data = { .late_init = n8x0_menelaus_late_init, }; -struct aic3x_pdata n810_aic33_data = { - .gpio_reset = 118, -}; - static int __init n8x0_late_initcall(void) { if (!board_caps) diff --git a/arch/arm/mach-omap2/common-board-devices.h b/arch/arm/mach-omap2/common-board-devices.h index b23962c38fb2..69694af71475 100644 --- a/arch/arm/mach-omap2/common-board-devices.h +++ b/arch/arm/mach-omap2/common-board-devices.h @@ -2,12 +2,10 @@ #ifndef __OMAP_COMMON_BOARD_DEVICES__ #define __OMAP_COMMON_BOARD_DEVICES__ -#include #include void *n8x0_legacy_init(void); extern struct menelaus_platform_data n8x0_menelaus_platform_data; -extern struct aic3x_pdata n810_aic33_data; #endif /* __OMAP_COMMON_BOARD_DEVICES__ */ diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 5b99d602c87b..9deba798cc91 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -440,7 +440,6 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = { #ifdef CONFIG_MACH_NOKIA_N8X0 OF_DEV_AUXDATA("ti,omap2420-mmc", 0x4809c000, "mmci-omap.0", NULL), OF_DEV_AUXDATA("menelaus", 0x72, "1-0072", &n8x0_menelaus_platform_data), - OF_DEV_AUXDATA("tlv320aic3x", 0x18, "2-0018", &n810_aic33_data), #endif #ifdef CONFIG_ARCH_OMAP3 OF_DEV_AUXDATA("ti,omap2-iommu", 0x5d000000, "5d000000.mmu", From 57aadcc0288c2db8677ecb784c97b2548a78758b Mon Sep 17 00:00:00 2001 From: Alvaro Karsz Date: Tue, 10 Jan 2023 18:56:37 +0200 Subject: [PATCH 348/553] PCI: Avoid FLR for SolidRun SNET DPU rev 1 [ Upstream commit d089d69cc1f824936eeaa4fa172f8fa1a0949eaa ] This patch fixes a FLR bug on the SNET DPU rev 1 by setting the PCI_DEV_FLAGS_NO_FLR_RESET flag. As there is a quirk to avoid FLR (quirk_no_flr), I added a new quirk to check the rev ID before calling to quirk_no_flr. Without this patch, a SNET DPU rev 1 may hang when FLR is applied. Signed-off-by: Alvaro Karsz Acked-by: Bjorn Helgaas Message-Id: <20230110165638.123745-3-alvaro.karsz@solid-run.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 289ba6902e41..d8d3f817e95c 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5403,6 +5403,14 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x7901, quirk_no_flr); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1502, quirk_no_flr); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1503, quirk_no_flr); +/* FLR may cause the SolidRun SNET DPU (rev 0x1) to hang */ +static void quirk_no_flr_snet(struct pci_dev *dev) +{ + if (dev->revision == 0x1) + quirk_no_flr(dev); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SOLIDRUN, 0x1000, quirk_no_flr_snet); + static void quirk_no_ext_tags(struct pci_dev *pdev) { struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus); From c96b07dca169ac58fb6afe16a01e68ff0db3c528 Mon Sep 17 00:00:00 2001 From: David Yang Date: Tue, 7 Feb 2023 12:33:16 +0800 Subject: [PATCH 349/553] HID: kye: Sort kye devices [ Upstream commit 8c7b79bc04abb67e7f5864e94286a800b42aa96c ] Sort kye devices by their Produce IDs. Signed-off-by: David Yang Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 2 +- drivers/hid/hid-kye.c | 66 ++++++++++++++++++++-------------------- drivers/hid/hid-quirks.c | 6 ++-- 3 files changed, 37 insertions(+), 37 deletions(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 1be454bafcb9..405d88b08908 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -717,10 +717,10 @@ #define USB_DEVICE_ID_KYE_GPEN_560 0x5003 #define USB_DEVICE_ID_KYE_EASYPEN_I405X 0x5010 #define USB_DEVICE_ID_KYE_MOUSEPEN_I608X 0x5011 -#define USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2 0x501a #define USB_DEVICE_ID_KYE_EASYPEN_M610X 0x5013 #define USB_DEVICE_ID_KYE_PENSKETCH_M912 0x5015 #define USB_DEVICE_ID_KYE_EASYPEN_M406XE 0x5019 +#define USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2 0x501A #define USB_VENDOR_ID_LABTEC 0x1020 #define USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD 0x0006 diff --git a/drivers/hid/hid-kye.c b/drivers/hid/hid-kye.c index da903138eee4..dc57e9d4a3e2 100644 --- a/drivers/hid/hid-kye.c +++ b/drivers/hid/hid-kye.c @@ -602,6 +602,18 @@ static __u8 *kye_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc[74] = 0x08; } break; + case USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE: + rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 104, + "Genius Gila Gaming Mouse"); + break; + case USB_DEVICE_ID_GENIUS_MANTICORE: + rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 104, + "Genius Manticore Keyboard"); + break; + case USB_DEVICE_ID_GENIUS_GX_IMPERATOR: + rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 83, + "Genius Gx Imperator Keyboard"); + break; case USB_DEVICE_ID_KYE_EASYPEN_I405X: if (*rsize == EASYPEN_I405X_RDESC_ORIG_SIZE) { rdesc = easypen_i405x_rdesc_fixed; @@ -638,18 +650,6 @@ static __u8 *kye_report_fixup(struct hid_device *hdev, __u8 *rdesc, *rsize = sizeof(pensketch_m912_rdesc_fixed); } break; - case USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE: - rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 104, - "Genius Gila Gaming Mouse"); - break; - case USB_DEVICE_ID_GENIUS_GX_IMPERATOR: - rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 83, - "Genius Gx Imperator Keyboard"); - break; - case USB_DEVICE_ID_GENIUS_MANTICORE: - rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 104, - "Genius Manticore Keyboard"); - break; } return rdesc; } @@ -717,18 +717,6 @@ static int kye_probe(struct hid_device *hdev, const struct hid_device_id *id) } switch (id->product) { - case USB_DEVICE_ID_KYE_EASYPEN_I405X: - case USB_DEVICE_ID_KYE_MOUSEPEN_I608X: - case USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2: - case USB_DEVICE_ID_KYE_EASYPEN_M610X: - case USB_DEVICE_ID_KYE_EASYPEN_M406XE: - case USB_DEVICE_ID_KYE_PENSKETCH_M912: - ret = kye_tablet_enable(hdev); - if (ret) { - hid_err(hdev, "tablet enabling failed\n"); - goto enabling_err; - } - break; case USB_DEVICE_ID_GENIUS_MANTICORE: /* * The manticore keyboard needs to have all the interfaces @@ -737,6 +725,18 @@ static int kye_probe(struct hid_device *hdev, const struct hid_device_id *id) if (hid_hw_open(hdev)) hid_hw_close(hdev); break; + case USB_DEVICE_ID_KYE_EASYPEN_I405X: + case USB_DEVICE_ID_KYE_MOUSEPEN_I608X: + case USB_DEVICE_ID_KYE_EASYPEN_M610X: + case USB_DEVICE_ID_KYE_PENSKETCH_M912: + case USB_DEVICE_ID_KYE_EASYPEN_M406XE: + case USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2: + ret = kye_tablet_enable(hdev); + if (ret) { + hid_err(hdev, "tablet enabling failed\n"); + goto enabling_err; + } + break; } return 0; @@ -748,24 +748,24 @@ err: static const struct hid_device_id kye_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_ERGO_525V) }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, + USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE) }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, + USB_DEVICE_ID_GENIUS_MANTICORE) }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, + USB_DEVICE_ID_GENIUS_GX_IMPERATOR) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_I405X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X) }, - { HID_USB_DEVICE(USB_VENDOR_ID_KYE, - USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X) }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, + USB_DEVICE_ID_KYE_PENSKETCH_M912) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406XE) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, - USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE) }, - { HID_USB_DEVICE(USB_VENDOR_ID_KYE, - USB_DEVICE_ID_GENIUS_GX_IMPERATOR) }, - { HID_USB_DEVICE(USB_VENDOR_ID_KYE, - USB_DEVICE_ID_GENIUS_MANTICORE) }, - { HID_USB_DEVICE(USB_VENDOR_ID_KYE, - USB_DEVICE_ID_KYE_PENSKETCH_M912) }, + USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2) }, { } }; MODULE_DEVICE_TABLE(hid, kye_devices); diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 60884066362a..debc49272a5c 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -107,12 +107,12 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_1f4a), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_IDEACOM, USB_DEVICE_ID_IDEACOM_IDC6680), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_INNOMEDIA, USB_DEVICE_ID_INNEX_GENESIS_ATARI), HID_QUIRK_MULTI_INPUT }, - { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE_ID2), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X), HID_QUIRK_MULTI_INPUT }, - { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406XE), HID_QUIRK_MULTI_INPUT }, - { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE_ID2), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_OPTICAL_USB_MOUSE_600E), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019), HID_QUIRK_ALWAYS_POLL }, From 0fe6a97a5fea6363442bb83500fb473daedadcf8 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 23 Mar 2023 20:40:22 +0100 Subject: [PATCH 350/553] usb: pci-quirks: Reduce the length of a spinlock section in usb_amd_find_chipset_info() [ Upstream commit c03ff66dc0e0cbad9ed0c29500843e1da8533118 ] 'info' is local to the function. There is no need to zeroing it within a spin_lock section. Moreover, there is no need to explicitly initialize the .need_pll_quirk field. Initialize the structure when defined and remove the now useless memset(). Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/08ee42fced6af6bd56892cd14f2464380ab071fa.1679600396.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/pci-quirks.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index ef08d68b9714..2665832f9add 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -207,8 +207,7 @@ EXPORT_SYMBOL_GPL(sb800_prefetch); static void usb_amd_find_chipset_info(void) { unsigned long flags; - struct amd_chipset_info info; - info.need_pll_quirk = false; + struct amd_chipset_info info = { }; spin_lock_irqsave(&amd_lock, flags); @@ -218,7 +217,6 @@ static void usb_amd_find_chipset_info(void) spin_unlock_irqrestore(&amd_lock, flags); return; } - memset(&info, 0, sizeof(info)); spin_unlock_irqrestore(&amd_lock, flags); if (!amd_chipset_sb_type_init(&info)) { From f4aae2afe2d5497820d0ced9e4cf8af30088d32c Mon Sep 17 00:00:00 2001 From: Mike Pastore Date: Sun, 7 May 2023 02:35:19 -0500 Subject: [PATCH 351/553] PCI: Delay after FLR of Solidigm P44 Pro NVMe [ Upstream commit 0ac448e0d29d6ba978684b3fa2e3ac7294ec2475 ] Prevent KVM hang when a Solidgm P44 Pro NVMe is passed through to a guest via IOMMU and the guest is subsequently rebooted. A similar issue was identified and patched by 51ba09452d11 ("PCI: Delay after FLR of Intel DC P3700 NVMe") and the same fix can be applied for this case. (Intel spun off their NAND and SSD business as Solidigm and sold it to SK Hynix in late 2021.) Link: https://lore.kernel.org/r/20230507073519.9737-1-mike@oobak.org Signed-off-by: Mike Pastore Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 10 ++++++---- include/linux/pci_ids.h | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index d8d3f817e95c..92169dc71468 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4011,10 +4011,11 @@ static int nvme_disable_and_flr(struct pci_dev *dev, bool probe) } /* - * Intel DC P3700 NVMe controller will timeout waiting for ready status - * to change after NVMe enable if the driver starts interacting with the - * device too soon after FLR. A 250ms delay after FLR has heuristically - * proven to produce reliably working results for device assignment cases. + * Some NVMe controllers such as Intel DC P3700 and Solidigm P44 Pro will + * timeout waiting for ready status to change after NVMe enable if the driver + * starts interacting with the device too soon after FLR. A 250ms delay after + * FLR has heuristically proven to produce reliably working results for device + * assignment cases. */ static int delay_250ms_after_flr(struct pci_dev *dev, bool probe) { @@ -4101,6 +4102,7 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = { { PCI_VENDOR_ID_SAMSUNG, 0xa804, nvme_disable_and_flr }, { PCI_VENDOR_ID_INTEL, 0x0953, delay_250ms_after_flr }, { PCI_VENDOR_ID_INTEL, 0x0a54, delay_250ms_after_flr }, + { PCI_VENDOR_ID_SOLIDIGM, 0xf1ac, delay_250ms_after_flr }, { PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID, reset_chelsio_generic_dev }, { PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HINIC_VF, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 73cc1e7dd15a..9e9794d03c9f 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -158,6 +158,8 @@ #define PCI_VENDOR_ID_LOONGSON 0x0014 +#define PCI_VENDOR_ID_SOLIDIGM 0x025e + #define PCI_VENDOR_ID_TTTECH 0x0357 #define PCI_DEVICE_ID_TTTECH_MC322 0x000a From 4b7ed2400e0d895a9920c80d63646728bb389baa Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 May 2023 21:35:38 +0200 Subject: [PATCH 352/553] x86/quirks: Include linux/pnp.h for arch_pnpbios_disabled() [ Upstream commit 056b44a4d10907ec8153863b2a0564e808ef1440 ] arch_pnpbios_disabled() is defined in architecture code on x86, but this does not include the appropriate header, causing a warning: arch/x86/kernel/platform-quirks.c:42:13: error: no previous prototype for 'arch_pnpbios_disabled' [-Werror=missing-prototypes] Signed-off-by: Arnd Bergmann Signed-off-by: Dave Hansen Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/all/20230516193549.544673-10-arnd%40kernel.org Signed-off-by: Sasha Levin --- arch/x86/kernel/platform-quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c index b348a672f71d..b525fe6d6657 100644 --- a/arch/x86/kernel/platform-quirks.c +++ b/arch/x86/kernel/platform-quirks.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include From 932a7651002be85bada95f0277b9ec5d6aa6d687 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 3 Feb 2023 15:57:59 +0200 Subject: [PATCH 353/553] thunderbolt: Log function name of the called quirk [ Upstream commit f14d177e0be652ef7b265753f08f2a7d31935668 ] This is useful when debugging whether a quirk has been matched or not. Signed-off-by: Mika Westerberg Signed-off-by: Sasha Levin --- drivers/thunderbolt/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thunderbolt/quirks.c b/drivers/thunderbolt/quirks.c index 638cb5fb22c1..13719a851c71 100644 --- a/drivers/thunderbolt/quirks.c +++ b/drivers/thunderbolt/quirks.c @@ -130,6 +130,7 @@ void tb_check_quirks(struct tb_switch *sw) if (q->device && q->device != sw->device) continue; + tb_sw_dbg(sw, "running %ps\n", q->hook); q->hook(sw); } } From caa7ff1d7d514a668fb6e52ef50cbee17a6f4fc1 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 27 Feb 2023 12:45:09 +0200 Subject: [PATCH 354/553] thunderbolt: Add debug log for link controller power quirk [ Upstream commit ccdb0900a0c3b0b56af5f547cceb64ee8d09483f ] Add a debug log to this quirk as well so we can see what quirks have been applied when debugging. Signed-off-by: Mika Westerberg Signed-off-by: Sasha Levin --- drivers/thunderbolt/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thunderbolt/quirks.c b/drivers/thunderbolt/quirks.c index 13719a851c71..e81de9c30eac 100644 --- a/drivers/thunderbolt/quirks.c +++ b/drivers/thunderbolt/quirks.c @@ -10,6 +10,7 @@ static void quirk_force_power_link(struct tb_switch *sw) { sw->quirks |= QUIRK_FORCE_POWER_LINK_CONTROLLER; + tb_sw_dbg(sw, "forcing power to link controller\n"); } static void quirk_dp_credit_allocation(struct tb_switch *sw) From 89a9196aec6b666bd69d571a82569b24baf9d66c Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 11 Jun 2023 18:19:23 +0100 Subject: [PATCH 355/553] PCI: Execute quirk_enable_clear_retrain_link() earlier [ Upstream commit 07a8d698de50c4740ac6f709c43e23a6da6e4dbc ] Make quirk_enable_clear_retrain_link() an early quirk so that any later fixups can rely on dev->clear_retrain_link to have been already initialised. [bhelgaas: reorder to just before it becomes possible to call pcie_retrain_link() earlier] Link: https://lore.kernel.org/r/alpine.DEB.2.21.2305310049000.59226@angie.orcam.me.uk Signed-off-by: Maciej W. Rozycki Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 92169dc71468..3959ea7b106b 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2425,9 +2425,9 @@ static void quirk_enable_clear_retrain_link(struct pci_dev *dev) dev->clear_retrain_link = 1; pci_info(dev, "Enable PCIe Retrain Link quirk\n"); } -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PERICOM, 0xe110, quirk_enable_clear_retrain_link); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PERICOM, 0xe111, quirk_enable_clear_retrain_link); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PERICOM, 0xe130, quirk_enable_clear_retrain_link); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PERICOM, 0xe110, quirk_enable_clear_retrain_link); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PERICOM, 0xe111, quirk_enable_clear_retrain_link); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PERICOM, 0xe130, quirk_enable_clear_retrain_link); static void fixup_rev1_53c810(struct pci_dev *dev) { From 87709f7ecdb884ab75e3de6f951dffe500ee95b2 Mon Sep 17 00:00:00 2001 From: Kelvin Cao Date: Fri, 23 Jun 2023 17:00:02 -0700 Subject: [PATCH 356/553] PCI: switchtec: Use normal comment style [ Upstream commit 846691f5483d61259db2f4d6a3dce8b98d518794 ] Use normal comment style '/* */' for device ID description. Link: https://lore.kernel.org/r/20230624000003.2315364-2-kelvin.cao@microchip.com Signed-off-by: Kelvin Cao Signed-off-by: Bjorn Helgaas Reviewed-by: Logan Gunthorpe Stable-dep-of: 0fb53e64705a ("PCI: switchtec: Add support for PCIe Gen5 devices") Signed-off-by: Sasha Levin --- drivers/pci/switch/switchtec.c | 114 ++++++++++++++++----------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index d05a482639e3..f0322e9dbee9 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -1740,63 +1740,63 @@ static void switchtec_pci_remove(struct pci_dev *pdev) } static const struct pci_device_id switchtec_pci_tbl[] = { - SWITCHTEC_PCI_DEVICE(0x8531, SWITCHTEC_GEN3), //PFX 24xG3 - SWITCHTEC_PCI_DEVICE(0x8532, SWITCHTEC_GEN3), //PFX 32xG3 - SWITCHTEC_PCI_DEVICE(0x8533, SWITCHTEC_GEN3), //PFX 48xG3 - SWITCHTEC_PCI_DEVICE(0x8534, SWITCHTEC_GEN3), //PFX 64xG3 - SWITCHTEC_PCI_DEVICE(0x8535, SWITCHTEC_GEN3), //PFX 80xG3 - SWITCHTEC_PCI_DEVICE(0x8536, SWITCHTEC_GEN3), //PFX 96xG3 - SWITCHTEC_PCI_DEVICE(0x8541, SWITCHTEC_GEN3), //PSX 24xG3 - SWITCHTEC_PCI_DEVICE(0x8542, SWITCHTEC_GEN3), //PSX 32xG3 - SWITCHTEC_PCI_DEVICE(0x8543, SWITCHTEC_GEN3), //PSX 48xG3 - SWITCHTEC_PCI_DEVICE(0x8544, SWITCHTEC_GEN3), //PSX 64xG3 - SWITCHTEC_PCI_DEVICE(0x8545, SWITCHTEC_GEN3), //PSX 80xG3 - SWITCHTEC_PCI_DEVICE(0x8546, SWITCHTEC_GEN3), //PSX 96xG3 - SWITCHTEC_PCI_DEVICE(0x8551, SWITCHTEC_GEN3), //PAX 24XG3 - SWITCHTEC_PCI_DEVICE(0x8552, SWITCHTEC_GEN3), //PAX 32XG3 - SWITCHTEC_PCI_DEVICE(0x8553, SWITCHTEC_GEN3), //PAX 48XG3 - SWITCHTEC_PCI_DEVICE(0x8554, SWITCHTEC_GEN3), //PAX 64XG3 - SWITCHTEC_PCI_DEVICE(0x8555, SWITCHTEC_GEN3), //PAX 80XG3 - SWITCHTEC_PCI_DEVICE(0x8556, SWITCHTEC_GEN3), //PAX 96XG3 - SWITCHTEC_PCI_DEVICE(0x8561, SWITCHTEC_GEN3), //PFXL 24XG3 - SWITCHTEC_PCI_DEVICE(0x8562, SWITCHTEC_GEN3), //PFXL 32XG3 - SWITCHTEC_PCI_DEVICE(0x8563, SWITCHTEC_GEN3), //PFXL 48XG3 - SWITCHTEC_PCI_DEVICE(0x8564, SWITCHTEC_GEN3), //PFXL 64XG3 - SWITCHTEC_PCI_DEVICE(0x8565, SWITCHTEC_GEN3), //PFXL 80XG3 - SWITCHTEC_PCI_DEVICE(0x8566, SWITCHTEC_GEN3), //PFXL 96XG3 - SWITCHTEC_PCI_DEVICE(0x8571, SWITCHTEC_GEN3), //PFXI 24XG3 - SWITCHTEC_PCI_DEVICE(0x8572, SWITCHTEC_GEN3), //PFXI 32XG3 - SWITCHTEC_PCI_DEVICE(0x8573, SWITCHTEC_GEN3), //PFXI 48XG3 - SWITCHTEC_PCI_DEVICE(0x8574, SWITCHTEC_GEN3), //PFXI 64XG3 - SWITCHTEC_PCI_DEVICE(0x8575, SWITCHTEC_GEN3), //PFXI 80XG3 - SWITCHTEC_PCI_DEVICE(0x8576, SWITCHTEC_GEN3), //PFXI 96XG3 - SWITCHTEC_PCI_DEVICE(0x4000, SWITCHTEC_GEN4), //PFX 100XG4 - SWITCHTEC_PCI_DEVICE(0x4084, SWITCHTEC_GEN4), //PFX 84XG4 - SWITCHTEC_PCI_DEVICE(0x4068, SWITCHTEC_GEN4), //PFX 68XG4 - SWITCHTEC_PCI_DEVICE(0x4052, SWITCHTEC_GEN4), //PFX 52XG4 - SWITCHTEC_PCI_DEVICE(0x4036, SWITCHTEC_GEN4), //PFX 36XG4 - SWITCHTEC_PCI_DEVICE(0x4028, SWITCHTEC_GEN4), //PFX 28XG4 - SWITCHTEC_PCI_DEVICE(0x4100, SWITCHTEC_GEN4), //PSX 100XG4 - SWITCHTEC_PCI_DEVICE(0x4184, SWITCHTEC_GEN4), //PSX 84XG4 - SWITCHTEC_PCI_DEVICE(0x4168, SWITCHTEC_GEN4), //PSX 68XG4 - SWITCHTEC_PCI_DEVICE(0x4152, SWITCHTEC_GEN4), //PSX 52XG4 - SWITCHTEC_PCI_DEVICE(0x4136, SWITCHTEC_GEN4), //PSX 36XG4 - SWITCHTEC_PCI_DEVICE(0x4128, SWITCHTEC_GEN4), //PSX 28XG4 - SWITCHTEC_PCI_DEVICE(0x4200, SWITCHTEC_GEN4), //PAX 100XG4 - SWITCHTEC_PCI_DEVICE(0x4284, SWITCHTEC_GEN4), //PAX 84XG4 - SWITCHTEC_PCI_DEVICE(0x4268, SWITCHTEC_GEN4), //PAX 68XG4 - SWITCHTEC_PCI_DEVICE(0x4252, SWITCHTEC_GEN4), //PAX 52XG4 - SWITCHTEC_PCI_DEVICE(0x4236, SWITCHTEC_GEN4), //PAX 36XG4 - SWITCHTEC_PCI_DEVICE(0x4228, SWITCHTEC_GEN4), //PAX 28XG4 - SWITCHTEC_PCI_DEVICE(0x4352, SWITCHTEC_GEN4), //PFXA 52XG4 - SWITCHTEC_PCI_DEVICE(0x4336, SWITCHTEC_GEN4), //PFXA 36XG4 - SWITCHTEC_PCI_DEVICE(0x4328, SWITCHTEC_GEN4), //PFXA 28XG4 - SWITCHTEC_PCI_DEVICE(0x4452, SWITCHTEC_GEN4), //PSXA 52XG4 - SWITCHTEC_PCI_DEVICE(0x4436, SWITCHTEC_GEN4), //PSXA 36XG4 - SWITCHTEC_PCI_DEVICE(0x4428, SWITCHTEC_GEN4), //PSXA 28XG4 - SWITCHTEC_PCI_DEVICE(0x4552, SWITCHTEC_GEN4), //PAXA 52XG4 - SWITCHTEC_PCI_DEVICE(0x4536, SWITCHTEC_GEN4), //PAXA 36XG4 - SWITCHTEC_PCI_DEVICE(0x4528, SWITCHTEC_GEN4), //PAXA 28XG4 + SWITCHTEC_PCI_DEVICE(0x8531, SWITCHTEC_GEN3), /* PFX 24xG3 */ + SWITCHTEC_PCI_DEVICE(0x8532, SWITCHTEC_GEN3), /* PFX 32xG3 */ + SWITCHTEC_PCI_DEVICE(0x8533, SWITCHTEC_GEN3), /* PFX 48xG3 */ + SWITCHTEC_PCI_DEVICE(0x8534, SWITCHTEC_GEN3), /* PFX 64xG3 */ + SWITCHTEC_PCI_DEVICE(0x8535, SWITCHTEC_GEN3), /* PFX 80xG3 */ + SWITCHTEC_PCI_DEVICE(0x8536, SWITCHTEC_GEN3), /* PFX 96xG3 */ + SWITCHTEC_PCI_DEVICE(0x8541, SWITCHTEC_GEN3), /* PSX 24xG3 */ + SWITCHTEC_PCI_DEVICE(0x8542, SWITCHTEC_GEN3), /* PSX 32xG3 */ + SWITCHTEC_PCI_DEVICE(0x8543, SWITCHTEC_GEN3), /* PSX 48xG3 */ + SWITCHTEC_PCI_DEVICE(0x8544, SWITCHTEC_GEN3), /* PSX 64xG3 */ + SWITCHTEC_PCI_DEVICE(0x8545, SWITCHTEC_GEN3), /* PSX 80xG3 */ + SWITCHTEC_PCI_DEVICE(0x8546, SWITCHTEC_GEN3), /* PSX 96xG3 */ + SWITCHTEC_PCI_DEVICE(0x8551, SWITCHTEC_GEN3), /* PAX 24XG3 */ + SWITCHTEC_PCI_DEVICE(0x8552, SWITCHTEC_GEN3), /* PAX 32XG3 */ + SWITCHTEC_PCI_DEVICE(0x8553, SWITCHTEC_GEN3), /* PAX 48XG3 */ + SWITCHTEC_PCI_DEVICE(0x8554, SWITCHTEC_GEN3), /* PAX 64XG3 */ + SWITCHTEC_PCI_DEVICE(0x8555, SWITCHTEC_GEN3), /* PAX 80XG3 */ + SWITCHTEC_PCI_DEVICE(0x8556, SWITCHTEC_GEN3), /* PAX 96XG3 */ + SWITCHTEC_PCI_DEVICE(0x8561, SWITCHTEC_GEN3), /* PFXL 24XG3 */ + SWITCHTEC_PCI_DEVICE(0x8562, SWITCHTEC_GEN3), /* PFXL 32XG3 */ + SWITCHTEC_PCI_DEVICE(0x8563, SWITCHTEC_GEN3), /* PFXL 48XG3 */ + SWITCHTEC_PCI_DEVICE(0x8564, SWITCHTEC_GEN3), /* PFXL 64XG3 */ + SWITCHTEC_PCI_DEVICE(0x8565, SWITCHTEC_GEN3), /* PFXL 80XG3 */ + SWITCHTEC_PCI_DEVICE(0x8566, SWITCHTEC_GEN3), /* PFXL 96XG3 */ + SWITCHTEC_PCI_DEVICE(0x8571, SWITCHTEC_GEN3), /* PFXI 24XG3 */ + SWITCHTEC_PCI_DEVICE(0x8572, SWITCHTEC_GEN3), /* PFXI 32XG3 */ + SWITCHTEC_PCI_DEVICE(0x8573, SWITCHTEC_GEN3), /* PFXI 48XG3 */ + SWITCHTEC_PCI_DEVICE(0x8574, SWITCHTEC_GEN3), /* PFXI 64XG3 */ + SWITCHTEC_PCI_DEVICE(0x8575, SWITCHTEC_GEN3), /* PFXI 80XG3 */ + SWITCHTEC_PCI_DEVICE(0x8576, SWITCHTEC_GEN3), /* PFXI 96XG3 */ + SWITCHTEC_PCI_DEVICE(0x4000, SWITCHTEC_GEN4), /* PFX 100XG4 */ + SWITCHTEC_PCI_DEVICE(0x4084, SWITCHTEC_GEN4), /* PFX 84XG4 */ + SWITCHTEC_PCI_DEVICE(0x4068, SWITCHTEC_GEN4), /* PFX 68XG4 */ + SWITCHTEC_PCI_DEVICE(0x4052, SWITCHTEC_GEN4), /* PFX 52XG4 */ + SWITCHTEC_PCI_DEVICE(0x4036, SWITCHTEC_GEN4), /* PFX 36XG4 */ + SWITCHTEC_PCI_DEVICE(0x4028, SWITCHTEC_GEN4), /* PFX 28XG4 */ + SWITCHTEC_PCI_DEVICE(0x4100, SWITCHTEC_GEN4), /* PSX 100XG4 */ + SWITCHTEC_PCI_DEVICE(0x4184, SWITCHTEC_GEN4), /* PSX 84XG4 */ + SWITCHTEC_PCI_DEVICE(0x4168, SWITCHTEC_GEN4), /* PSX 68XG4 */ + SWITCHTEC_PCI_DEVICE(0x4152, SWITCHTEC_GEN4), /* PSX 52XG4 */ + SWITCHTEC_PCI_DEVICE(0x4136, SWITCHTEC_GEN4), /* PSX 36XG4 */ + SWITCHTEC_PCI_DEVICE(0x4128, SWITCHTEC_GEN4), /* PSX 28XG4 */ + SWITCHTEC_PCI_DEVICE(0x4200, SWITCHTEC_GEN4), /* PAX 100XG4 */ + SWITCHTEC_PCI_DEVICE(0x4284, SWITCHTEC_GEN4), /* PAX 84XG4 */ + SWITCHTEC_PCI_DEVICE(0x4268, SWITCHTEC_GEN4), /* PAX 68XG4 */ + SWITCHTEC_PCI_DEVICE(0x4252, SWITCHTEC_GEN4), /* PAX 52XG4 */ + SWITCHTEC_PCI_DEVICE(0x4236, SWITCHTEC_GEN4), /* PAX 36XG4 */ + SWITCHTEC_PCI_DEVICE(0x4228, SWITCHTEC_GEN4), /* PAX 28XG4 */ + SWITCHTEC_PCI_DEVICE(0x4352, SWITCHTEC_GEN4), /* PFXA 52XG4 */ + SWITCHTEC_PCI_DEVICE(0x4336, SWITCHTEC_GEN4), /* PFXA 36XG4 */ + SWITCHTEC_PCI_DEVICE(0x4328, SWITCHTEC_GEN4), /* PFXA 28XG4 */ + SWITCHTEC_PCI_DEVICE(0x4452, SWITCHTEC_GEN4), /* PSXA 52XG4 */ + SWITCHTEC_PCI_DEVICE(0x4436, SWITCHTEC_GEN4), /* PSXA 36XG4 */ + SWITCHTEC_PCI_DEVICE(0x4428, SWITCHTEC_GEN4), /* PSXA 28XG4 */ + SWITCHTEC_PCI_DEVICE(0x4552, SWITCHTEC_GEN4), /* PAXA 52XG4 */ + SWITCHTEC_PCI_DEVICE(0x4536, SWITCHTEC_GEN4), /* PAXA 36XG4 */ + SWITCHTEC_PCI_DEVICE(0x4528, SWITCHTEC_GEN4), /* PAXA 28XG4 */ {0} }; MODULE_DEVICE_TABLE(pci, switchtec_pci_tbl); From bbdfa144820566e142b90018cab7b8214c54e418 Mon Sep 17 00:00:00 2001 From: Kelvin Cao Date: Fri, 23 Jun 2023 17:00:03 -0700 Subject: [PATCH 357/553] PCI: switchtec: Add support for PCIe Gen5 devices [ Upstream commit 0fb53e64705ae0fabd9593102e0f0e6812968802 ] Advertise support of Gen5 devices in the driver's device ID table and add the same IDs for the switchtec quirks. Also update driver code to accommodate them. Link: https://lore.kernel.org/r/20230624000003.2315364-3-kelvin.cao@microchip.com Signed-off-by: Kelvin Cao Signed-off-by: Bjorn Helgaas Reviewed-by: Logan Gunthorpe Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 36 ++++++++++++++++++++++++++++ drivers/pci/switch/switchtec.c | 44 ++++++++++++++++++++++++++++++---- include/linux/switchtec.h | 1 + 3 files changed, 77 insertions(+), 4 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 3959ea7b106b..47099d00fcf1 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5818,6 +5818,42 @@ SWITCHTEC_QUIRK(0x4428); /* PSXA 28XG4 */ SWITCHTEC_QUIRK(0x4552); /* PAXA 52XG4 */ SWITCHTEC_QUIRK(0x4536); /* PAXA 36XG4 */ SWITCHTEC_QUIRK(0x4528); /* PAXA 28XG4 */ +SWITCHTEC_QUIRK(0x5000); /* PFX 100XG5 */ +SWITCHTEC_QUIRK(0x5084); /* PFX 84XG5 */ +SWITCHTEC_QUIRK(0x5068); /* PFX 68XG5 */ +SWITCHTEC_QUIRK(0x5052); /* PFX 52XG5 */ +SWITCHTEC_QUIRK(0x5036); /* PFX 36XG5 */ +SWITCHTEC_QUIRK(0x5028); /* PFX 28XG5 */ +SWITCHTEC_QUIRK(0x5100); /* PSX 100XG5 */ +SWITCHTEC_QUIRK(0x5184); /* PSX 84XG5 */ +SWITCHTEC_QUIRK(0x5168); /* PSX 68XG5 */ +SWITCHTEC_QUIRK(0x5152); /* PSX 52XG5 */ +SWITCHTEC_QUIRK(0x5136); /* PSX 36XG5 */ +SWITCHTEC_QUIRK(0x5128); /* PSX 28XG5 */ +SWITCHTEC_QUIRK(0x5200); /* PAX 100XG5 */ +SWITCHTEC_QUIRK(0x5284); /* PAX 84XG5 */ +SWITCHTEC_QUIRK(0x5268); /* PAX 68XG5 */ +SWITCHTEC_QUIRK(0x5252); /* PAX 52XG5 */ +SWITCHTEC_QUIRK(0x5236); /* PAX 36XG5 */ +SWITCHTEC_QUIRK(0x5228); /* PAX 28XG5 */ +SWITCHTEC_QUIRK(0x5300); /* PFXA 100XG5 */ +SWITCHTEC_QUIRK(0x5384); /* PFXA 84XG5 */ +SWITCHTEC_QUIRK(0x5368); /* PFXA 68XG5 */ +SWITCHTEC_QUIRK(0x5352); /* PFXA 52XG5 */ +SWITCHTEC_QUIRK(0x5336); /* PFXA 36XG5 */ +SWITCHTEC_QUIRK(0x5328); /* PFXA 28XG5 */ +SWITCHTEC_QUIRK(0x5400); /* PSXA 100XG5 */ +SWITCHTEC_QUIRK(0x5484); /* PSXA 84XG5 */ +SWITCHTEC_QUIRK(0x5468); /* PSXA 68XG5 */ +SWITCHTEC_QUIRK(0x5452); /* PSXA 52XG5 */ +SWITCHTEC_QUIRK(0x5436); /* PSXA 36XG5 */ +SWITCHTEC_QUIRK(0x5428); /* PSXA 28XG5 */ +SWITCHTEC_QUIRK(0x5500); /* PAXA 100XG5 */ +SWITCHTEC_QUIRK(0x5584); /* PAXA 84XG5 */ +SWITCHTEC_QUIRK(0x5568); /* PAXA 68XG5 */ +SWITCHTEC_QUIRK(0x5552); /* PAXA 52XG5 */ +SWITCHTEC_QUIRK(0x5536); /* PAXA 36XG5 */ +SWITCHTEC_QUIRK(0x5528); /* PAXA 28XG5 */ /* * The PLX NTB uses devfn proxy IDs to move TLPs between NT endpoints. diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index f0322e9dbee9..332af6938d7f 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -372,7 +372,7 @@ static ssize_t field ## _show(struct device *dev, \ if (stdev->gen == SWITCHTEC_GEN3) \ return io_string_show(buf, &si->gen3.field, \ sizeof(si->gen3.field)); \ - else if (stdev->gen == SWITCHTEC_GEN4) \ + else if (stdev->gen >= SWITCHTEC_GEN4) \ return io_string_show(buf, &si->gen4.field, \ sizeof(si->gen4.field)); \ else \ @@ -663,7 +663,7 @@ static int ioctl_flash_info(struct switchtec_dev *stdev, if (stdev->gen == SWITCHTEC_GEN3) { info.flash_length = ioread32(&fi->gen3.flash_length); info.num_partitions = SWITCHTEC_NUM_PARTITIONS_GEN3; - } else if (stdev->gen == SWITCHTEC_GEN4) { + } else if (stdev->gen >= SWITCHTEC_GEN4) { info.flash_length = ioread32(&fi->gen4.flash_length); info.num_partitions = SWITCHTEC_NUM_PARTITIONS_GEN4; } else { @@ -870,7 +870,7 @@ static int ioctl_flash_part_info(struct switchtec_dev *stdev, ret = flash_part_info_gen3(stdev, &info); if (ret) return ret; - } else if (stdev->gen == SWITCHTEC_GEN4) { + } else if (stdev->gen >= SWITCHTEC_GEN4) { ret = flash_part_info_gen4(stdev, &info); if (ret) return ret; @@ -1606,7 +1606,7 @@ static int switchtec_init_pci(struct switchtec_dev *stdev, if (stdev->gen == SWITCHTEC_GEN3) part_id = &stdev->mmio_sys_info->gen3.partition_id; - else if (stdev->gen == SWITCHTEC_GEN4) + else if (stdev->gen >= SWITCHTEC_GEN4) part_id = &stdev->mmio_sys_info->gen4.partition_id; else return -EOPNOTSUPP; @@ -1797,6 +1797,42 @@ static const struct pci_device_id switchtec_pci_tbl[] = { SWITCHTEC_PCI_DEVICE(0x4552, SWITCHTEC_GEN4), /* PAXA 52XG4 */ SWITCHTEC_PCI_DEVICE(0x4536, SWITCHTEC_GEN4), /* PAXA 36XG4 */ SWITCHTEC_PCI_DEVICE(0x4528, SWITCHTEC_GEN4), /* PAXA 28XG4 */ + SWITCHTEC_PCI_DEVICE(0x5000, SWITCHTEC_GEN5), /* PFX 100XG5 */ + SWITCHTEC_PCI_DEVICE(0x5084, SWITCHTEC_GEN5), /* PFX 84XG5 */ + SWITCHTEC_PCI_DEVICE(0x5068, SWITCHTEC_GEN5), /* PFX 68XG5 */ + SWITCHTEC_PCI_DEVICE(0x5052, SWITCHTEC_GEN5), /* PFX 52XG5 */ + SWITCHTEC_PCI_DEVICE(0x5036, SWITCHTEC_GEN5), /* PFX 36XG5 */ + SWITCHTEC_PCI_DEVICE(0x5028, SWITCHTEC_GEN5), /* PFX 28XG5 */ + SWITCHTEC_PCI_DEVICE(0x5100, SWITCHTEC_GEN5), /* PSX 100XG5 */ + SWITCHTEC_PCI_DEVICE(0x5184, SWITCHTEC_GEN5), /* PSX 84XG5 */ + SWITCHTEC_PCI_DEVICE(0x5168, SWITCHTEC_GEN5), /* PSX 68XG5 */ + SWITCHTEC_PCI_DEVICE(0x5152, SWITCHTEC_GEN5), /* PSX 52XG5 */ + SWITCHTEC_PCI_DEVICE(0x5136, SWITCHTEC_GEN5), /* PSX 36XG5 */ + SWITCHTEC_PCI_DEVICE(0x5128, SWITCHTEC_GEN5), /* PSX 28XG5 */ + SWITCHTEC_PCI_DEVICE(0x5200, SWITCHTEC_GEN5), /* PAX 100XG5 */ + SWITCHTEC_PCI_DEVICE(0x5284, SWITCHTEC_GEN5), /* PAX 84XG5 */ + SWITCHTEC_PCI_DEVICE(0x5268, SWITCHTEC_GEN5), /* PAX 68XG5 */ + SWITCHTEC_PCI_DEVICE(0x5252, SWITCHTEC_GEN5), /* PAX 52XG5 */ + SWITCHTEC_PCI_DEVICE(0x5236, SWITCHTEC_GEN5), /* PAX 36XG5 */ + SWITCHTEC_PCI_DEVICE(0x5228, SWITCHTEC_GEN5), /* PAX 28XG5 */ + SWITCHTEC_PCI_DEVICE(0x5300, SWITCHTEC_GEN5), /* PFXA 100XG5 */ + SWITCHTEC_PCI_DEVICE(0x5384, SWITCHTEC_GEN5), /* PFXA 84XG5 */ + SWITCHTEC_PCI_DEVICE(0x5368, SWITCHTEC_GEN5), /* PFXA 68XG5 */ + SWITCHTEC_PCI_DEVICE(0x5352, SWITCHTEC_GEN5), /* PFXA 52XG5 */ + SWITCHTEC_PCI_DEVICE(0x5336, SWITCHTEC_GEN5), /* PFXA 36XG5 */ + SWITCHTEC_PCI_DEVICE(0x5328, SWITCHTEC_GEN5), /* PFXA 28XG5 */ + SWITCHTEC_PCI_DEVICE(0x5400, SWITCHTEC_GEN5), /* PSXA 100XG5 */ + SWITCHTEC_PCI_DEVICE(0x5484, SWITCHTEC_GEN5), /* PSXA 84XG5 */ + SWITCHTEC_PCI_DEVICE(0x5468, SWITCHTEC_GEN5), /* PSXA 68XG5 */ + SWITCHTEC_PCI_DEVICE(0x5452, SWITCHTEC_GEN5), /* PSXA 52XG5 */ + SWITCHTEC_PCI_DEVICE(0x5436, SWITCHTEC_GEN5), /* PSXA 36XG5 */ + SWITCHTEC_PCI_DEVICE(0x5428, SWITCHTEC_GEN5), /* PSXA 28XG5 */ + SWITCHTEC_PCI_DEVICE(0x5500, SWITCHTEC_GEN5), /* PAXA 100XG5 */ + SWITCHTEC_PCI_DEVICE(0x5584, SWITCHTEC_GEN5), /* PAXA 84XG5 */ + SWITCHTEC_PCI_DEVICE(0x5568, SWITCHTEC_GEN5), /* PAXA 68XG5 */ + SWITCHTEC_PCI_DEVICE(0x5552, SWITCHTEC_GEN5), /* PAXA 52XG5 */ + SWITCHTEC_PCI_DEVICE(0x5536, SWITCHTEC_GEN5), /* PAXA 36XG5 */ + SWITCHTEC_PCI_DEVICE(0x5528, SWITCHTEC_GEN5), /* PAXA 28XG5 */ {0} }; MODULE_DEVICE_TABLE(pci, switchtec_pci_tbl); diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h index 48fabe36509e..8d8fac1626bd 100644 --- a/include/linux/switchtec.h +++ b/include/linux/switchtec.h @@ -41,6 +41,7 @@ enum { enum switchtec_gen { SWITCHTEC_GEN3, SWITCHTEC_GEN4, + SWITCHTEC_GEN5, }; struct mrpc_regs { From 4d74cb9cdfd51e6d854f8da54113e716335cb2ca Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 15 Sep 2023 03:02:16 +0930 Subject: [PATCH 358/553] ALSA: scarlett2: Move USB IDs out from device_info struct [ Upstream commit d98cc489029dba4d99714c2e8ec4f5ba249f6851 ] By moving the USB IDs from the device_info struct into scarlett2_devices[], that will allow for devices with different USB IDs to share the same device_info. Tested-by: Philippe Perrot Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/8263368e8d49e6fcebc709817bd82ab79b404468.1694705811.git.g@b4.vu Signed-off-by: Takashi Iwai Stable-dep-of: b9a98cdd3ac7 ("ALSA: scarlett2: Add support for Clarett 8Pre USB") Signed-off-by: Sasha Levin --- sound/usb/mixer_scarlett_gen2.c | 63 ++++++++++++--------------------- 1 file changed, 23 insertions(+), 40 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 1bcb05c73e0a..2668bc1b918b 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -317,8 +317,6 @@ struct scarlett2_mux_entry { }; struct scarlett2_device_info { - u32 usb_id; /* USB device identifier */ - /* Gen 3 devices have an internal MSD mode switch that needs * to be disabled in order to access the full functionality of * the device. @@ -440,8 +438,6 @@ struct scarlett2_data { /*** Model-specific data ***/ static const struct scarlett2_device_info s6i6_gen2_info = { - .usb_id = USB_ID(0x1235, 0x8203), - .config_set = SCARLETT2_CONFIG_SET_GEN_2, .level_input_count = 2, .pad_input_count = 2, @@ -486,8 +482,6 @@ static const struct scarlett2_device_info s6i6_gen2_info = { }; static const struct scarlett2_device_info s18i8_gen2_info = { - .usb_id = USB_ID(0x1235, 0x8204), - .config_set = SCARLETT2_CONFIG_SET_GEN_2, .level_input_count = 2, .pad_input_count = 4, @@ -535,8 +529,6 @@ static const struct scarlett2_device_info s18i8_gen2_info = { }; static const struct scarlett2_device_info s18i20_gen2_info = { - .usb_id = USB_ID(0x1235, 0x8201), - .config_set = SCARLETT2_CONFIG_SET_GEN_2, .line_out_hw_vol = 1, @@ -589,8 +581,6 @@ static const struct scarlett2_device_info s18i20_gen2_info = { }; static const struct scarlett2_device_info solo_gen3_info = { - .usb_id = USB_ID(0x1235, 0x8211), - .has_msd_mode = 1, .config_set = SCARLETT2_CONFIG_SET_NO_MIXER, .level_input_count = 1, @@ -602,8 +592,6 @@ static const struct scarlett2_device_info solo_gen3_info = { }; static const struct scarlett2_device_info s2i2_gen3_info = { - .usb_id = USB_ID(0x1235, 0x8210), - .has_msd_mode = 1, .config_set = SCARLETT2_CONFIG_SET_NO_MIXER, .level_input_count = 2, @@ -614,8 +602,6 @@ static const struct scarlett2_device_info s2i2_gen3_info = { }; static const struct scarlett2_device_info s4i4_gen3_info = { - .usb_id = USB_ID(0x1235, 0x8212), - .has_msd_mode = 1, .config_set = SCARLETT2_CONFIG_SET_GEN_3, .level_input_count = 2, @@ -660,8 +646,6 @@ static const struct scarlett2_device_info s4i4_gen3_info = { }; static const struct scarlett2_device_info s8i6_gen3_info = { - .usb_id = USB_ID(0x1235, 0x8213), - .has_msd_mode = 1, .config_set = SCARLETT2_CONFIG_SET_GEN_3, .level_input_count = 2, @@ -713,8 +697,6 @@ static const struct scarlett2_device_info s8i6_gen3_info = { }; static const struct scarlett2_device_info s18i8_gen3_info = { - .usb_id = USB_ID(0x1235, 0x8214), - .has_msd_mode = 1, .config_set = SCARLETT2_CONFIG_SET_GEN_3, .line_out_hw_vol = 1, @@ -783,8 +765,6 @@ static const struct scarlett2_device_info s18i8_gen3_info = { }; static const struct scarlett2_device_info s18i20_gen3_info = { - .usb_id = USB_ID(0x1235, 0x8215), - .has_msd_mode = 1, .config_set = SCARLETT2_CONFIG_SET_GEN_3, .line_out_hw_vol = 1, @@ -848,8 +828,6 @@ static const struct scarlett2_device_info s18i20_gen3_info = { }; static const struct scarlett2_device_info clarett_8pre_info = { - .usb_id = USB_ID(0x1235, 0x820c), - .config_set = SCARLETT2_CONFIG_SET_CLARETT, .line_out_hw_vol = 1, .level_input_count = 2, @@ -902,25 +880,30 @@ static const struct scarlett2_device_info clarett_8pre_info = { } }, }; -static const struct scarlett2_device_info *scarlett2_devices[] = { +struct scarlett2_device_entry { + const u32 usb_id; /* USB device identifier */ + const struct scarlett2_device_info *info; +}; + +static const struct scarlett2_device_entry scarlett2_devices[] = { /* Supported Gen 2 devices */ - &s6i6_gen2_info, - &s18i8_gen2_info, - &s18i20_gen2_info, + { USB_ID(0x1235, 0x8203), &s6i6_gen2_info }, + { USB_ID(0x1235, 0x8204), &s18i8_gen2_info }, + { USB_ID(0x1235, 0x8201), &s18i20_gen2_info }, /* Supported Gen 3 devices */ - &solo_gen3_info, - &s2i2_gen3_info, - &s4i4_gen3_info, - &s8i6_gen3_info, - &s18i8_gen3_info, - &s18i20_gen3_info, + { USB_ID(0x1235, 0x8211), &solo_gen3_info }, + { USB_ID(0x1235, 0x8210), &s2i2_gen3_info }, + { USB_ID(0x1235, 0x8212), &s4i4_gen3_info }, + { USB_ID(0x1235, 0x8213), &s8i6_gen3_info }, + { USB_ID(0x1235, 0x8214), &s18i8_gen3_info }, + { USB_ID(0x1235, 0x8215), &s18i20_gen3_info }, /* Supported Clarett+ devices */ - &clarett_8pre_info, + { USB_ID(0x1235, 0x820c), &clarett_8pre_info }, /* End of list */ - NULL + { 0, NULL }, }; /* get the starting port index number for a given port type/direction */ @@ -4149,17 +4132,17 @@ static int scarlett2_init_notify(struct usb_mixer_interface *mixer) static int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer) { - const struct scarlett2_device_info **info = scarlett2_devices; + const struct scarlett2_device_entry *entry = scarlett2_devices; int err; - /* Find device in scarlett2_devices */ - while (*info && (*info)->usb_id != mixer->chip->usb_id) - info++; - if (!*info) + /* Find entry in scarlett2_devices */ + while (entry->usb_id && entry->usb_id != mixer->chip->usb_id) + entry++; + if (!entry->usb_id) return -EINVAL; /* Initialise private data */ - err = scarlett2_init_private(mixer, *info); + err = scarlett2_init_private(mixer, entry->info); if (err < 0) return err; From e2b8480b709d48ca1723eed7258f26a0df9a1965 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 15 Sep 2023 03:02:37 +0930 Subject: [PATCH 359/553] ALSA: scarlett2: Add support for Clarett 8Pre USB [ Upstream commit b9a98cdd3ac7b80d8ea0f6acd81c88ad3d8bcb4a ] The Clarett 8Pre USB works the same as the Clarett+ 8Pre, only the USB ID is different. Tested-by: Philippe Perrot Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/e59f47b29e2037f031b56bde10474c6e96e31ba5.1694705811.git.g@b4.vu Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/mixer_quirks.c | 1 + sound/usb/mixer_scarlett_gen2.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 1f32e3ae3aa3..b122d7aedb44 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -3447,6 +3447,7 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) case USB_ID(0x1235, 0x8213): /* Focusrite Scarlett 8i6 3rd Gen */ case USB_ID(0x1235, 0x8214): /* Focusrite Scarlett 18i8 3rd Gen */ case USB_ID(0x1235, 0x8215): /* Focusrite Scarlett 18i20 3rd Gen */ + case USB_ID(0x1235, 0x8208): /* Focusrite Clarett 8Pre USB */ case USB_ID(0x1235, 0x820c): /* Focusrite Clarett+ 8Pre */ err = snd_scarlett_gen2_init(mixer); break; diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 2668bc1b918b..f949d22da382 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -1,13 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Focusrite Scarlett Gen 2/3 and Clarett+ Driver for ALSA + * Focusrite Scarlett Gen 2/3 and Clarett USB/Clarett+ Driver for ALSA * * Supported models: * - 6i6/18i8/18i20 Gen 2 * - Solo/2i2/4i4/8i6/18i8/18i20 Gen 3 + * - Clarett 8Pre USB * - Clarett+ 8Pre * - * Copyright (c) 2018-2022 by Geoffrey D. Bennett + * Copyright (c) 2018-2023 by Geoffrey D. Bennett * Copyright (c) 2020-2021 by Vladimir Sadovnikov * Copyright (c) 2022 by Christian Colglazier * @@ -56,6 +57,9 @@ * Support for Clarett+ 8Pre added in Aug 2022 by Christian * Colglazier. * + * Support for Clarett 8Pre USB added in Sep 2023 (thanks to Philippe + * Perrot for confirmation). + * * This ALSA mixer gives access to (model-dependent): * - input, output, mixer-matrix muxes * - mixer-matrix gain stages @@ -899,7 +903,8 @@ static const struct scarlett2_device_entry scarlett2_devices[] = { { USB_ID(0x1235, 0x8214), &s18i8_gen3_info }, { USB_ID(0x1235, 0x8215), &s18i20_gen3_info }, - /* Supported Clarett+ devices */ + /* Supported Clarett USB/Clarett+ devices */ + { USB_ID(0x1235, 0x8208), &clarett_8pre_info }, { USB_ID(0x1235, 0x820c), &clarett_8pre_info }, /* End of list */ From 0f4048e1a0c6e9d3d31ce5b684600fd137cebfca Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 26 Sep 2023 15:25:32 +0200 Subject: [PATCH 360/553] ASoC: ti: Convert Pandora ASoC to GPIO descriptors [ Upstream commit 319e6ac143b9e9048e527ab9dd2aabb8fdf3d60f ] The Pandora uses GPIO descriptors pretty much exclusively, but not for ASoC, so let's fix it. Register the pins in a descriptor table in the machine since the ASoC device is not using device tree. Use static locals for the GPIO descriptors because I'm not able to experient with better state storage on any real hardware. Others using the Pandora can come afterwards and improve this. Signed-off-by: Linus Walleij Acked-by: Jarkko Nikula Link: https://lore.kernel.org/r/20230926-descriptors-asoc-ti-v1-4-60cf4f8adbc5@linaro.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/pdata-quirks.c | 10 +++++ sound/soc/ti/omap3pandora.c | 63 +++++++++++------------------- 2 files changed, 33 insertions(+), 40 deletions(-) diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 9deba798cc91..44da1e14a374 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -257,9 +257,19 @@ static struct platform_device pandora_backlight = { .id = -1, }; +static struct gpiod_lookup_table pandora_soc_audio_gpios = { + .dev_id = "soc-audio", + .table = { + GPIO_LOOKUP("gpio-112-127", 6, "dac", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-0-15", 14, "amp", GPIO_ACTIVE_HIGH), + { } + }, +}; + static void __init omap3_pandora_legacy_init(void) { platform_device_register(&pandora_backlight); + gpiod_add_lookup_table(&pandora_soc_audio_gpios); } #endif /* CONFIG_ARCH_OMAP3 */ diff --git a/sound/soc/ti/omap3pandora.c b/sound/soc/ti/omap3pandora.c index a287e9747c2a..fa92ed97dfe3 100644 --- a/sound/soc/ti/omap3pandora.c +++ b/sound/soc/ti/omap3pandora.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include @@ -21,12 +21,11 @@ #include "omap-mcbsp.h" -#define OMAP3_PANDORA_DAC_POWER_GPIO 118 -#define OMAP3_PANDORA_AMP_POWER_GPIO 14 - #define PREFIX "ASoC omap3pandora: " static struct regulator *omap3pandora_dac_reg; +static struct gpio_desc *dac_power_gpio; +static struct gpio_desc *amp_power_gpio; static int omap3pandora_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) @@ -78,9 +77,9 @@ static int omap3pandora_dac_event(struct snd_soc_dapm_widget *w, return ret; } mdelay(1); - gpio_set_value(OMAP3_PANDORA_DAC_POWER_GPIO, 1); + gpiod_set_value(dac_power_gpio, 1); } else { - gpio_set_value(OMAP3_PANDORA_DAC_POWER_GPIO, 0); + gpiod_set_value(dac_power_gpio, 0); mdelay(1); regulator_disable(omap3pandora_dac_reg); } @@ -92,9 +91,9 @@ static int omap3pandora_hp_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *k, int event) { if (SND_SOC_DAPM_EVENT_ON(event)) - gpio_set_value(OMAP3_PANDORA_AMP_POWER_GPIO, 1); + gpiod_set_value(amp_power_gpio, 1); else - gpio_set_value(OMAP3_PANDORA_AMP_POWER_GPIO, 0); + gpiod_set_value(amp_power_gpio, 0); return 0; } @@ -229,35 +228,10 @@ static int __init omap3pandora_soc_init(void) pr_info("OMAP3 Pandora SoC init\n"); - ret = gpio_request(OMAP3_PANDORA_DAC_POWER_GPIO, "dac_power"); - if (ret) { - pr_err(PREFIX "Failed to get DAC power GPIO\n"); - return ret; - } - - ret = gpio_direction_output(OMAP3_PANDORA_DAC_POWER_GPIO, 0); - if (ret) { - pr_err(PREFIX "Failed to set DAC power GPIO direction\n"); - goto fail0; - } - - ret = gpio_request(OMAP3_PANDORA_AMP_POWER_GPIO, "amp_power"); - if (ret) { - pr_err(PREFIX "Failed to get amp power GPIO\n"); - goto fail0; - } - - ret = gpio_direction_output(OMAP3_PANDORA_AMP_POWER_GPIO, 0); - if (ret) { - pr_err(PREFIX "Failed to set amp power GPIO direction\n"); - goto fail1; - } - omap3pandora_snd_device = platform_device_alloc("soc-audio", -1); if (omap3pandora_snd_device == NULL) { pr_err(PREFIX "Platform device allocation failed\n"); - ret = -ENOMEM; - goto fail1; + return -ENOMEM; } platform_set_drvdata(omap3pandora_snd_device, &snd_soc_card_omap3pandora); @@ -268,6 +242,20 @@ static int __init omap3pandora_soc_init(void) goto fail2; } + dac_power_gpio = devm_gpiod_get(&omap3pandora_snd_device->dev, + "dac", GPIOD_OUT_LOW); + if (IS_ERR(dac_power_gpio)) { + ret = PTR_ERR(dac_power_gpio); + goto fail3; + } + + amp_power_gpio = devm_gpiod_get(&omap3pandora_snd_device->dev, + "amp", GPIOD_OUT_LOW); + if (IS_ERR(amp_power_gpio)) { + ret = PTR_ERR(amp_power_gpio); + goto fail3; + } + omap3pandora_dac_reg = regulator_get(&omap3pandora_snd_device->dev, "vcc"); if (IS_ERR(omap3pandora_dac_reg)) { pr_err(PREFIX "Failed to get DAC regulator from %s: %ld\n", @@ -283,10 +271,7 @@ fail3: platform_device_del(omap3pandora_snd_device); fail2: platform_device_put(omap3pandora_snd_device); -fail1: - gpio_free(OMAP3_PANDORA_AMP_POWER_GPIO); -fail0: - gpio_free(OMAP3_PANDORA_DAC_POWER_GPIO); + return ret; } module_init(omap3pandora_soc_init); @@ -295,8 +280,6 @@ static void __exit omap3pandora_soc_exit(void) { regulator_put(omap3pandora_dac_reg); platform_device_unregister(omap3pandora_snd_device); - gpio_free(OMAP3_PANDORA_AMP_POWER_GPIO); - gpio_free(OMAP3_PANDORA_DAC_POWER_GPIO); } module_exit(omap3pandora_soc_exit); From b928cde9c07cf031a9a5bffbfb94048f340d34ee Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 15 Sep 2023 03:01:57 +0930 Subject: [PATCH 361/553] ALSA: scarlett2: Default mixer driver to enabled [ Upstream commit bc83058f598757a908b30f8f536338cb1478ab5b ] Early versions of this mixer driver did not work on all hardware, so out of caution the driver was disabled by default and had to be explicitly enabled with device_setup=1. Since commit 764fa6e686e0 ("ALSA: usb-audio: scarlett2: Fix device hang with ehci-pci") no more problems of this nature have been reported. Therefore, enable the driver by default but provide a new device_setup option to disable the driver in case that is needed. - device_setup value of 0 now means "enable" rather than "disable". - device_setup value of 1 is now ignored. - device_setup value of 4 now means "disable". Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/89600a35b40307f2766578ad1ca2f21801286b58.1694705811.git.g@b4.vu Signed-off-by: Takashi Iwai Stable-dep-of: b61a3acada00 ("ALSA: scarlett2: Add Focusrite Clarett+ 2Pre and 4Pre support") Signed-off-by: Sasha Levin --- sound/usb/mixer_scarlett_gen2.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index f949d22da382..2bd46fe91394 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -145,12 +145,12 @@ #include "mixer_scarlett_gen2.h" -/* device_setup value to enable */ -#define SCARLETT2_ENABLE 0x01 - /* device_setup value to allow turning MSD mode back on */ #define SCARLETT2_MSD_ENABLE 0x02 +/* device_setup value to disable this mixer driver */ +#define SCARLETT2_DISABLE 0x04 + /* some gui mixers can't handle negative ctl values */ #define SCARLETT2_VOLUME_BIAS 127 @@ -4237,19 +4237,20 @@ int snd_scarlett_gen2_init(struct usb_mixer_interface *mixer) if (!mixer->protocol) return 0; - if (!(chip->setup & SCARLETT2_ENABLE)) { + if (chip->setup & SCARLETT2_DISABLE) { usb_audio_info(chip, - "Focusrite Scarlett Gen 2/3 Mixer Driver disabled; " - "use options snd_usb_audio vid=0x%04x pid=0x%04x " - "device_setup=1 to enable and report any issues " - "to g@b4.vu", + "Focusrite Scarlett Gen 2/3 Mixer Driver disabled " + "by modprobe options (snd_usb_audio " + "vid=0x%04x pid=0x%04x device_setup=%d)\n", USB_ID_VENDOR(chip->usb_id), - USB_ID_PRODUCT(chip->usb_id)); + USB_ID_PRODUCT(chip->usb_id), + SCARLETT2_DISABLE); return 0; } usb_audio_info(chip, - "Focusrite Scarlett Gen 2/3 Mixer Driver enabled pid=0x%04x", + "Focusrite Scarlett Gen 2/3 Mixer Driver enabled (pid=0x%04x); " + "report any issues to g@b4.vu", USB_ID_PRODUCT(chip->usb_id)); err = snd_scarlett_gen2_controls_create(mixer); From 7c02a4a6ccea9a2ab98a07ab4a5f9743877deccd Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 15 Sep 2023 03:03:03 +0930 Subject: [PATCH 362/553] ALSA: scarlett2: Add correct product series name to messages [ Upstream commit 6e743781d62e28f5fa095e5f31f878819622c143 ] This driver was originally developed for the Focusrite Scarlett Gen 2 series, but now also supports the Scarlett Gen 3 series, the Clarett 8Pre USB, and the Clarett+ 8Pre. The messages output by the driver on initialisation and error include the identifying text "Scarlett Gen 2/3", but this is no longer accurate, and writing "Scarlett Gen 2/3/Clarett USB/Clarett+" would be unwieldy. Add series_name field to the scarlett2_device_entry struct so that concise and accurate messages can be output. Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/3774b9d35bf1fbdd6fdad9f3f4f97e9b82ac76bf.1694705811.git.g@b4.vu Signed-off-by: Takashi Iwai Stable-dep-of: b61a3acada00 ("ALSA: scarlett2: Add Focusrite Clarett+ 2Pre and 4Pre support") Signed-off-by: Sasha Levin --- sound/usb/mixer_scarlett_gen2.c | 81 ++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 2bd46fe91394..328a593aceaa 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -391,6 +391,7 @@ struct scarlett2_data { struct mutex data_mutex; /* lock access to this data */ struct delayed_work work; const struct scarlett2_device_info *info; + const char *series_name; __u8 bInterfaceNumber; __u8 bEndpointAddress; __u16 wMaxPacketSize; @@ -887,25 +888,26 @@ static const struct scarlett2_device_info clarett_8pre_info = { struct scarlett2_device_entry { const u32 usb_id; /* USB device identifier */ const struct scarlett2_device_info *info; + const char *series_name; }; static const struct scarlett2_device_entry scarlett2_devices[] = { /* Supported Gen 2 devices */ - { USB_ID(0x1235, 0x8203), &s6i6_gen2_info }, - { USB_ID(0x1235, 0x8204), &s18i8_gen2_info }, - { USB_ID(0x1235, 0x8201), &s18i20_gen2_info }, + { USB_ID(0x1235, 0x8203), &s6i6_gen2_info, "Scarlett Gen 2" }, + { USB_ID(0x1235, 0x8204), &s18i8_gen2_info, "Scarlett Gen 2" }, + { USB_ID(0x1235, 0x8201), &s18i20_gen2_info, "Scarlett Gen 2" }, /* Supported Gen 3 devices */ - { USB_ID(0x1235, 0x8211), &solo_gen3_info }, - { USB_ID(0x1235, 0x8210), &s2i2_gen3_info }, - { USB_ID(0x1235, 0x8212), &s4i4_gen3_info }, - { USB_ID(0x1235, 0x8213), &s8i6_gen3_info }, - { USB_ID(0x1235, 0x8214), &s18i8_gen3_info }, - { USB_ID(0x1235, 0x8215), &s18i20_gen3_info }, + { USB_ID(0x1235, 0x8211), &solo_gen3_info, "Scarlett Gen 3" }, + { USB_ID(0x1235, 0x8210), &s2i2_gen3_info, "Scarlett Gen 3" }, + { USB_ID(0x1235, 0x8212), &s4i4_gen3_info, "Scarlett Gen 3" }, + { USB_ID(0x1235, 0x8213), &s8i6_gen3_info, "Scarlett Gen 3" }, + { USB_ID(0x1235, 0x8214), &s18i8_gen3_info, "Scarlett Gen 3" }, + { USB_ID(0x1235, 0x8215), &s18i20_gen3_info, "Scarlett Gen 3" }, /* Supported Clarett USB/Clarett+ devices */ - { USB_ID(0x1235, 0x8208), &clarett_8pre_info }, - { USB_ID(0x1235, 0x820c), &clarett_8pre_info }, + { USB_ID(0x1235, 0x8208), &clarett_8pre_info, "Clarett USB" }, + { USB_ID(0x1235, 0x820c), &clarett_8pre_info, "Clarett+" }, /* End of list */ { 0, NULL }, @@ -1205,8 +1207,8 @@ static int scarlett2_usb( if (err != req_buf_size) { usb_audio_err( mixer->chip, - "Scarlett Gen 2/3 USB request result cmd %x was %d\n", - cmd, err); + "%s USB request result cmd %x was %d\n", + private->series_name, cmd, err); err = -EINVAL; goto unlock; } @@ -1222,9 +1224,8 @@ static int scarlett2_usb( if (err != resp_buf_size) { usb_audio_err( mixer->chip, - "Scarlett Gen 2/3 USB response result cmd %x was %d " - "expected %zu\n", - cmd, err, resp_buf_size); + "%s USB response result cmd %x was %d expected %zu\n", + private->series_name, cmd, err, resp_buf_size); err = -EINVAL; goto unlock; } @@ -1240,9 +1241,10 @@ static int scarlett2_usb( resp->pad) { usb_audio_err( mixer->chip, - "Scarlett Gen 2/3 USB invalid response; " + "%s USB invalid response; " "cmd tx/rx %d/%d seq %d/%d size %d/%d " "error %d pad %d\n", + private->series_name, le32_to_cpu(req->cmd), le32_to_cpu(resp->cmd), le16_to_cpu(req->seq), le16_to_cpu(resp->seq), resp_size, le16_to_cpu(resp->size), @@ -3798,7 +3800,7 @@ static int scarlett2_find_fc_interface(struct usb_device *dev, /* Initialise private data */ static int scarlett2_init_private(struct usb_mixer_interface *mixer, - const struct scarlett2_device_info *info) + const struct scarlett2_device_entry *entry) { struct scarlett2_data *private = kzalloc(sizeof(struct scarlett2_data), GFP_KERNEL); @@ -3814,7 +3816,8 @@ static int scarlett2_init_private(struct usb_mixer_interface *mixer, mixer->private_free = scarlett2_private_free; mixer->private_suspend = scarlett2_private_suspend; - private->info = info; + private->info = entry->info; + private->series_name = entry->series_name; scarlett2_count_mux_io(private); private->scarlett2_seq = 0; private->mixer = mixer; @@ -4135,19 +4138,28 @@ static int scarlett2_init_notify(struct usb_mixer_interface *mixer) return usb_submit_urb(mixer->urb, GFP_KERNEL); } -static int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer) +static const struct scarlett2_device_entry *get_scarlett2_device_entry( + struct usb_mixer_interface *mixer) { const struct scarlett2_device_entry *entry = scarlett2_devices; - int err; /* Find entry in scarlett2_devices */ while (entry->usb_id && entry->usb_id != mixer->chip->usb_id) entry++; if (!entry->usb_id) - return -EINVAL; + return NULL; + + return entry; +} + +static int snd_scarlett_gen2_controls_create( + struct usb_mixer_interface *mixer, + const struct scarlett2_device_entry *entry) +{ + int err; /* Initialise private data */ - err = scarlett2_init_private(mixer, entry->info); + err = scarlett2_init_private(mixer, entry); if (err < 0) return err; @@ -4231,17 +4243,30 @@ static int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer) int snd_scarlett_gen2_init(struct usb_mixer_interface *mixer) { struct snd_usb_audio *chip = mixer->chip; + const struct scarlett2_device_entry *entry; int err; /* only use UAC_VERSION_2 */ if (!mixer->protocol) return 0; + /* find entry in scarlett2_devices */ + entry = get_scarlett2_device_entry(mixer); + if (!entry) { + usb_audio_err(mixer->chip, + "%s: missing device entry for %04x:%04x\n", + __func__, + USB_ID_VENDOR(chip->usb_id), + USB_ID_PRODUCT(chip->usb_id)); + return 0; + } + if (chip->setup & SCARLETT2_DISABLE) { usb_audio_info(chip, - "Focusrite Scarlett Gen 2/3 Mixer Driver disabled " + "Focusrite %s Mixer Driver disabled " "by modprobe options (snd_usb_audio " "vid=0x%04x pid=0x%04x device_setup=%d)\n", + entry->series_name, USB_ID_VENDOR(chip->usb_id), USB_ID_PRODUCT(chip->usb_id), SCARLETT2_DISABLE); @@ -4249,14 +4274,16 @@ int snd_scarlett_gen2_init(struct usb_mixer_interface *mixer) } usb_audio_info(chip, - "Focusrite Scarlett Gen 2/3 Mixer Driver enabled (pid=0x%04x); " + "Focusrite %s Mixer Driver enabled (pid=0x%04x); " "report any issues to g@b4.vu", + entry->series_name, USB_ID_PRODUCT(chip->usb_id)); - err = snd_scarlett_gen2_controls_create(mixer); + err = snd_scarlett_gen2_controls_create(mixer, entry); if (err < 0) usb_audio_err(mixer->chip, - "Error initialising Scarlett Mixer Driver: %d", + "Error initialising %s Mixer Driver: %d", + entry->series_name, err); return err; From 03bfe0e93613fbd9039f16b313b918aeb395d704 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Wed, 27 Sep 2023 01:11:30 +0930 Subject: [PATCH 363/553] ALSA: scarlett2: Add Focusrite Clarett+ 2Pre and 4Pre support [ Upstream commit b61a3acada0031e7a4922d1340b4296ab95c260b ] The Focusrite Clarett+ series uses the same protocol as the Scarlett Gen 2 and Gen 3 series. This patch adds support for the Clarett+ 2Pre and Clarett+ 4Pre similarly to the existing 8Pre support by adding appropriate entries to the scarlett2 driver. The Clarett 2Pre USB and 4Pre USB presumably use the same protocol as well, so support for them can easily be added if someone can test. Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/ZRL7qjC3tYQllT3H@m.b4.vu Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/mixer_quirks.c | 2 + sound/usb/mixer_scarlett_gen2.c | 97 ++++++++++++++++++++++++++++++++- 2 files changed, 98 insertions(+), 1 deletion(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index b122d7aedb44..3721d59a5680 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -3448,6 +3448,8 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) case USB_ID(0x1235, 0x8214): /* Focusrite Scarlett 18i8 3rd Gen */ case USB_ID(0x1235, 0x8215): /* Focusrite Scarlett 18i20 3rd Gen */ case USB_ID(0x1235, 0x8208): /* Focusrite Clarett 8Pre USB */ + case USB_ID(0x1235, 0x820a): /* Focusrite Clarett+ 2Pre */ + case USB_ID(0x1235, 0x820b): /* Focusrite Clarett+ 4Pre */ case USB_ID(0x1235, 0x820c): /* Focusrite Clarett+ 8Pre */ err = snd_scarlett_gen2_init(mixer); break; diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 328a593aceaa..e6088fdafe7a 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -6,7 +6,7 @@ * - 6i6/18i8/18i20 Gen 2 * - Solo/2i2/4i4/8i6/18i8/18i20 Gen 3 * - Clarett 8Pre USB - * - Clarett+ 8Pre + * - Clarett+ 2Pre/4Pre/8Pre * * Copyright (c) 2018-2023 by Geoffrey D. Bennett * Copyright (c) 2020-2021 by Vladimir Sadovnikov @@ -60,6 +60,10 @@ * Support for Clarett 8Pre USB added in Sep 2023 (thanks to Philippe * Perrot for confirmation). * + * Support for Clarett+ 4Pre and 2Pre added in Sep 2023 (thanks to + * Gregory Rozzo for donating a 4Pre, and David Sherwood and Patrice + * Peterson for usbmon output). + * * This ALSA mixer gives access to (model-dependent): * - input, output, mixer-matrix muxes * - mixer-matrix gain stages @@ -832,6 +836,95 @@ static const struct scarlett2_device_info s18i20_gen3_info = { } }, }; +static const struct scarlett2_device_info clarett_2pre_info = { + .config_set = SCARLETT2_CONFIG_SET_CLARETT, + .line_out_hw_vol = 1, + .level_input_count = 2, + .air_input_count = 2, + + .line_out_descrs = { + "Monitor L", + "Monitor R", + "Headphones L", + "Headphones R", + }, + + .port_count = { + [SCARLETT2_PORT_TYPE_NONE] = { 1, 0 }, + [SCARLETT2_PORT_TYPE_ANALOGUE] = { 2, 4 }, + [SCARLETT2_PORT_TYPE_SPDIF] = { 2, 0 }, + [SCARLETT2_PORT_TYPE_ADAT] = { 8, 0 }, + [SCARLETT2_PORT_TYPE_MIX] = { 10, 18 }, + [SCARLETT2_PORT_TYPE_PCM] = { 4, 12 }, + }, + + .mux_assignment = { { + { SCARLETT2_PORT_TYPE_PCM, 0, 12 }, + { SCARLETT2_PORT_TYPE_ANALOGUE, 0, 4 }, + { SCARLETT2_PORT_TYPE_MIX, 0, 18 }, + { SCARLETT2_PORT_TYPE_NONE, 0, 8 }, + { 0, 0, 0 }, + }, { + { SCARLETT2_PORT_TYPE_PCM, 0, 8 }, + { SCARLETT2_PORT_TYPE_ANALOGUE, 0, 4 }, + { SCARLETT2_PORT_TYPE_MIX, 0, 18 }, + { SCARLETT2_PORT_TYPE_NONE, 0, 8 }, + { 0, 0, 0 }, + }, { + { SCARLETT2_PORT_TYPE_PCM, 0, 2 }, + { SCARLETT2_PORT_TYPE_ANALOGUE, 0, 4 }, + { SCARLETT2_PORT_TYPE_NONE, 0, 26 }, + { 0, 0, 0 }, + } }, +}; + +static const struct scarlett2_device_info clarett_4pre_info = { + .config_set = SCARLETT2_CONFIG_SET_CLARETT, + .line_out_hw_vol = 1, + .level_input_count = 2, + .air_input_count = 4, + + .line_out_descrs = { + "Monitor L", + "Monitor R", + "Headphones 1 L", + "Headphones 1 R", + "Headphones 2 L", + "Headphones 2 R", + }, + + .port_count = { + [SCARLETT2_PORT_TYPE_NONE] = { 1, 0 }, + [SCARLETT2_PORT_TYPE_ANALOGUE] = { 8, 6 }, + [SCARLETT2_PORT_TYPE_SPDIF] = { 2, 2 }, + [SCARLETT2_PORT_TYPE_ADAT] = { 8, 0 }, + [SCARLETT2_PORT_TYPE_MIX] = { 10, 18 }, + [SCARLETT2_PORT_TYPE_PCM] = { 8, 18 }, + }, + + .mux_assignment = { { + { SCARLETT2_PORT_TYPE_PCM, 0, 18 }, + { SCARLETT2_PORT_TYPE_ANALOGUE, 0, 6 }, + { SCARLETT2_PORT_TYPE_SPDIF, 0, 2 }, + { SCARLETT2_PORT_TYPE_MIX, 0, 18 }, + { SCARLETT2_PORT_TYPE_NONE, 0, 8 }, + { 0, 0, 0 }, + }, { + { SCARLETT2_PORT_TYPE_PCM, 0, 14 }, + { SCARLETT2_PORT_TYPE_ANALOGUE, 0, 6 }, + { SCARLETT2_PORT_TYPE_SPDIF, 0, 2 }, + { SCARLETT2_PORT_TYPE_MIX, 0, 18 }, + { SCARLETT2_PORT_TYPE_NONE, 0, 8 }, + { 0, 0, 0 }, + }, { + { SCARLETT2_PORT_TYPE_PCM, 0, 12 }, + { SCARLETT2_PORT_TYPE_ANALOGUE, 0, 6 }, + { SCARLETT2_PORT_TYPE_SPDIF, 0, 2 }, + { SCARLETT2_PORT_TYPE_NONE, 0, 24 }, + { 0, 0, 0 }, + } }, +}; + static const struct scarlett2_device_info clarett_8pre_info = { .config_set = SCARLETT2_CONFIG_SET_CLARETT, .line_out_hw_vol = 1, @@ -907,6 +1000,8 @@ static const struct scarlett2_device_entry scarlett2_devices[] = { /* Supported Clarett USB/Clarett+ devices */ { USB_ID(0x1235, 0x8208), &clarett_8pre_info, "Clarett USB" }, + { USB_ID(0x1235, 0x820a), &clarett_2pre_info, "Clarett+" }, + { USB_ID(0x1235, 0x820b), &clarett_4pre_info, "Clarett+" }, { USB_ID(0x1235, 0x820c), &clarett_8pre_info, "Clarett+" }, /* End of list */ From aedbd0961467843ff4276db5f484acd78ff54974 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Sat, 7 Oct 2023 22:03:04 +1030 Subject: [PATCH 364/553] ALSA: scarlett2: Add Focusrite Clarett 2Pre and 4Pre USB support [ Upstream commit 2b17b489e47a956c8e93c8f1bcabb0343c851d90 ] It has been confirmed that all devices in the Focusrite Clarett USB series work the same as the devices in the Clarett+ series. Add the missing PIDs to enable support for the Clarett 2Pre and 4Pre USB. Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/ZSFB8EVTG1PK1eq/@m.b4.vu Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/mixer_quirks.c | 2 ++ sound/usb/mixer_scarlett_gen2.c | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 3721d59a5680..a331732fed89 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -3447,6 +3447,8 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) case USB_ID(0x1235, 0x8213): /* Focusrite Scarlett 8i6 3rd Gen */ case USB_ID(0x1235, 0x8214): /* Focusrite Scarlett 18i8 3rd Gen */ case USB_ID(0x1235, 0x8215): /* Focusrite Scarlett 18i20 3rd Gen */ + case USB_ID(0x1235, 0x8206): /* Focusrite Clarett 2Pre USB */ + case USB_ID(0x1235, 0x8207): /* Focusrite Clarett 4Pre USB */ case USB_ID(0x1235, 0x8208): /* Focusrite Clarett 8Pre USB */ case USB_ID(0x1235, 0x820a): /* Focusrite Clarett+ 2Pre */ case USB_ID(0x1235, 0x820b): /* Focusrite Clarett+ 4Pre */ diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index e6088fdafe7a..cbdef89ab987 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -5,7 +5,7 @@ * Supported models: * - 6i6/18i8/18i20 Gen 2 * - Solo/2i2/4i4/8i6/18i8/18i20 Gen 3 - * - Clarett 8Pre USB + * - Clarett 2Pre/4Pre/8Pre USB * - Clarett+ 2Pre/4Pre/8Pre * * Copyright (c) 2018-2023 by Geoffrey D. Bennett @@ -64,6 +64,8 @@ * Gregory Rozzo for donating a 4Pre, and David Sherwood and Patrice * Peterson for usbmon output). * + * Support for Clarett 2Pre and 4Pre USB added in Oct 2023. + * * This ALSA mixer gives access to (model-dependent): * - input, output, mixer-matrix muxes * - mixer-matrix gain stages @@ -999,6 +1001,8 @@ static const struct scarlett2_device_entry scarlett2_devices[] = { { USB_ID(0x1235, 0x8215), &s18i20_gen3_info, "Scarlett Gen 3" }, /* Supported Clarett USB/Clarett+ devices */ + { USB_ID(0x1235, 0x8206), &clarett_2pre_info, "Clarett USB" }, + { USB_ID(0x1235, 0x8207), &clarett_4pre_info, "Clarett USB" }, { USB_ID(0x1235, 0x8208), &clarett_8pre_info, "Clarett USB" }, { USB_ID(0x1235, 0x820a), &clarett_2pre_info, "Clarett+" }, { USB_ID(0x1235, 0x820b), &clarett_4pre_info, "Clarett+" }, @@ -1197,7 +1201,7 @@ static const struct scarlett2_config [SCARLETT2_CONFIG_TALKBACK_MAP] = { .offset = 0xb0, .size = 16, .activate = 10 }, -/* Clarett+ 8Pre */ +/* Clarett USB and Clarett+ devices: 2Pre, 4Pre, 8Pre */ }, { [SCARLETT2_CONFIG_DIM_MUTE] = { .offset = 0x31, .size = 8, .activate = 2 }, From 636f0fdb3623848deb5b738a78e56e7cf0ec309f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 18 Oct 2023 14:32:51 +0300 Subject: [PATCH 365/553] PCI/DPC: Use FIELD_GET() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9a9eec4765737b9b2a8d6ae03de6480a5f12dd5c ] Use FIELD_GET() to remove dependencies on the field position, i.e., the shift value. No functional change intended. Link: https://lore.kernel.org/r/20231018113254.17616-5-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/pcie/dpc.c | 5 +++-- drivers/pci/quirks.c | 2 +- include/uapi/linux/pci_regs.h | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 08800282825e..acdbf9e770a8 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -9,6 +9,7 @@ #define dev_fmt(fmt) "DPC: " fmt #include +#include #include #include #include @@ -203,7 +204,7 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev) /* Get First Error Pointer */ pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &dpc_status); - first_error = (dpc_status & 0x1f00) >> 8; + first_error = FIELD_GET(PCI_EXP_DPC_RP_PIO_FEP, dpc_status); for (i = 0; i < ARRAY_SIZE(rp_pio_error_string); i++) { if ((status & ~mask) & (1 << i)) @@ -339,7 +340,7 @@ void pci_dpc_init(struct pci_dev *pdev) /* Quirks may set dpc_rp_log_size if device or firmware is buggy */ if (!pdev->dpc_rp_log_size) { pdev->dpc_rp_log_size = - (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; + FIELD_GET(PCI_EXP_DPC_RP_PIO_LOG_SIZE, cap); if (pdev->dpc_rp_log_size < 4 || pdev->dpc_rp_log_size > 9) { pci_err(pdev, "RP PIO log size %u is invalid\n", pdev->dpc_rp_log_size); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 47099d00fcf1..1131e353d8c9 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -6103,7 +6103,7 @@ static void dpc_log_size(struct pci_dev *dev) if (!(val & PCI_EXP_DPC_CAP_RP_EXT)) return; - if (!((val & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8)) { + if (FIELD_GET(PCI_EXP_DPC_RP_PIO_LOG_SIZE, val) == 0) { pci_info(dev, "Overriding RP PIO Log Size to 4\n"); dev->dpc_rp_log_size = 4; } diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 57b8e2ffb1dd..3325155036c8 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1043,6 +1043,7 @@ #define PCI_EXP_DPC_STATUS_INTERRUPT 0x0008 /* Interrupt Status */ #define PCI_EXP_DPC_RP_BUSY 0x0010 /* Root Port Busy */ #define PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT 0x0060 /* Trig Reason Extension */ +#define PCI_EXP_DPC_RP_PIO_FEP 0x1f00 /* RP PIO First Err Ptr */ #define PCI_EXP_DPC_SOURCE_ID 0x0A /* DPC Source Identifier */ From 39f932d2953bdf17361b64cdfc0c05c0b2487eb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 26 Oct 2023 15:19:23 +0300 Subject: [PATCH 366/553] PCI: Simplify pcie_capability_clear_and_set_word() to ..._clear_word() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0fce6e5c87faec2c8bf28d2abc8cb595f4e244b6 ] When using pcie_capability_clear_and_set_word() but not actually *setting* anything, use pcie_capability_clear_word() instead. Link: https://lore.kernel.org/r/20231026121924.2164-1-ilpo.jarvinen@linux.intel.com Link: https://lore.kernel.org/r/20231026121924.2164-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: squash] Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/pcie/aspm.c | 8 ++++---- drivers/pci/quirks.c | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 25736d408e88..2a3d973658da 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -743,10 +743,10 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) * in pcie_config_aspm_link(). */ if (enable_req & (ASPM_STATE_L1_1 | ASPM_STATE_L1_2)) { - pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPM_L1, 0); - pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPM_L1, 0); + pcie_capability_clear_word(child, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPM_L1); + pcie_capability_clear_word(parent, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPM_L1); } val = 0; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 1131e353d8c9..56dce858a693 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4476,9 +4476,9 @@ static void quirk_disable_root_port_attributes(struct pci_dev *pdev) pci_info(root_port, "Disabling No Snoop/Relaxed Ordering Attributes to avoid PCIe Completion erratum in %s\n", dev_name(&pdev->dev)); - pcie_capability_clear_and_set_word(root_port, PCI_EXP_DEVCTL, - PCI_EXP_DEVCTL_RELAX_EN | - PCI_EXP_DEVCTL_NOSNOOP_EN, 0); + pcie_capability_clear_word(root_port, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_RELAX_EN | + PCI_EXP_DEVCTL_NOSNOOP_EN); } /* From 7989b04d6cc4f1d83c696d30e8c9d27a4928e28c Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 27 Oct 2023 04:31:28 +1030 Subject: [PATCH 367/553] ALSA: scarlett2: Rename scarlett_gen2 to scarlett2 [ Upstream commit efc3d7d20361cc59325a9f0525e079333b4459c0 ] This driver was originally developed for the Focusrite Scarlett Gen 2 series. Since then Focusrite have used a similar protocol for their Gen 3, Gen 4, Clarett USB, Clarett+, and Vocaster series. Let's call this common protocol the "Scarlett 2 Protocol" and rename the driver to scarlett2 to not imply that it is restricted to Gen 2 series devices. Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/e1ad7f69a1e20cdb39094164504389160c1a0a0b.1698342632.git.g@b4.vu Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- MAINTAINERS | 2 +- sound/usb/Makefile | 2 +- sound/usb/mixer_quirks.c | 4 ++-- .../usb/{mixer_scarlett_gen2.c => mixer_scarlett2.c} | 12 +++++++----- sound/usb/mixer_scarlett2.h | 7 +++++++ sound/usb/mixer_scarlett_gen2.h | 7 ------- 6 files changed, 18 insertions(+), 16 deletions(-) rename sound/usb/{mixer_scarlett_gen2.c => mixer_scarlett2.c} (99%) create mode 100644 sound/usb/mixer_scarlett2.h delete mode 100644 sound/usb/mixer_scarlett_gen2.h diff --git a/MAINTAINERS b/MAINTAINERS index bbfedb0b2093..ecf4d0c8f446 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8031,7 +8031,7 @@ M: Geoffrey D. Bennett L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git -F: sound/usb/mixer_scarlett_gen2.c +F: sound/usb/mixer_scarlett2.c FORCEDETH GIGABIT ETHERNET DRIVER M: Rain River diff --git a/sound/usb/Makefile b/sound/usb/Makefile index 9ccb21a4ff8a..64a718c766a7 100644 --- a/sound/usb/Makefile +++ b/sound/usb/Makefile @@ -12,7 +12,7 @@ snd-usb-audio-objs := card.o \ mixer.o \ mixer_quirks.o \ mixer_scarlett.o \ - mixer_scarlett_gen2.o \ + mixer_scarlett2.o \ mixer_us16x08.o \ mixer_s1810c.o \ pcm.o \ diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index a331732fed89..c8d48566e175 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -33,7 +33,7 @@ #include "mixer.h" #include "mixer_quirks.h" #include "mixer_scarlett.h" -#include "mixer_scarlett_gen2.h" +#include "mixer_scarlett2.h" #include "mixer_us16x08.h" #include "mixer_s1810c.h" #include "helper.h" @@ -3453,7 +3453,7 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) case USB_ID(0x1235, 0x820a): /* Focusrite Clarett+ 2Pre */ case USB_ID(0x1235, 0x820b): /* Focusrite Clarett+ 4Pre */ case USB_ID(0x1235, 0x820c): /* Focusrite Clarett+ 8Pre */ - err = snd_scarlett_gen2_init(mixer); + err = snd_scarlett2_init(mixer); break; case USB_ID(0x041e, 0x323b): /* Creative Sound Blaster E1 */ diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett2.c similarity index 99% rename from sound/usb/mixer_scarlett_gen2.c rename to sound/usb/mixer_scarlett2.c index cbdef89ab987..bcb8b7617406 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett2.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Focusrite Scarlett Gen 2/3 and Clarett USB/Clarett+ Driver for ALSA + * Focusrite Scarlett 2 Protocol Driver for ALSA + * (including Scarlett 2nd Gen, 3rd Gen, Clarett USB, and Clarett+ + * series products) * * Supported models: * - 6i6/18i8/18i20 Gen 2 @@ -149,7 +151,7 @@ #include "mixer.h" #include "helper.h" -#include "mixer_scarlett_gen2.h" +#include "mixer_scarlett2.h" /* device_setup value to allow turning MSD mode back on */ #define SCARLETT2_MSD_ENABLE 0x02 @@ -4251,7 +4253,7 @@ static const struct scarlett2_device_entry *get_scarlett2_device_entry( return entry; } -static int snd_scarlett_gen2_controls_create( +static int snd_scarlett2_controls_create( struct usb_mixer_interface *mixer, const struct scarlett2_device_entry *entry) { @@ -4339,7 +4341,7 @@ static int snd_scarlett_gen2_controls_create( return 0; } -int snd_scarlett_gen2_init(struct usb_mixer_interface *mixer) +int snd_scarlett2_init(struct usb_mixer_interface *mixer) { struct snd_usb_audio *chip = mixer->chip; const struct scarlett2_device_entry *entry; @@ -4378,7 +4380,7 @@ int snd_scarlett_gen2_init(struct usb_mixer_interface *mixer) entry->series_name, USB_ID_PRODUCT(chip->usb_id)); - err = snd_scarlett_gen2_controls_create(mixer, entry); + err = snd_scarlett2_controls_create(mixer, entry); if (err < 0) usb_audio_err(mixer->chip, "Error initialising %s Mixer Driver: %d", diff --git a/sound/usb/mixer_scarlett2.h b/sound/usb/mixer_scarlett2.h new file mode 100644 index 000000000000..d209362cf41a --- /dev/null +++ b/sound/usb/mixer_scarlett2.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __USB_MIXER_SCARLETT2_H +#define __USB_MIXER_SCARLETT2_H + +int snd_scarlett2_init(struct usb_mixer_interface *mixer); + +#endif /* __USB_MIXER_SCARLETT2_H */ diff --git a/sound/usb/mixer_scarlett_gen2.h b/sound/usb/mixer_scarlett_gen2.h deleted file mode 100644 index 668c6b0cb50a..000000000000 --- a/sound/usb/mixer_scarlett_gen2.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __USB_MIXER_SCARLETT_GEN2_H -#define __USB_MIXER_SCARLETT_GEN2_H - -int snd_scarlett_gen2_init(struct usb_mixer_interface *mixer); - -#endif /* __USB_MIXER_SCARLETT_GEN2_H */ From 1d011d972f588f94e29773791abde6da3fa423be Mon Sep 17 00:00:00 2001 From: Brenton Simpson Date: Tue, 14 Nov 2023 23:38:59 +0000 Subject: [PATCH 368/553] drm: panel-orientation-quirks: Add quirk for Lenovo Legion Go MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 430143b0d3611f4a9c8434319e5e504244749e79 ] The Legion Go has a 2560x1600 portrait screen, with the native "up" facing the right controller (90° CW from the rest of the device). Signed-off-by: Brenton Simpson Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231114233859.274189-1-appsforartists@google.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 3fe5e6439c40..aa93129c3397 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -348,6 +348,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "IdeaPad Duet 3 10IGL5"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, + }, { /* Lenovo Legion Go 8APU1 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Legion Go 8APU1"), + }, + .driver_data = (void *)&lcd1600x2560_leftside_up, }, { /* Lenovo Yoga Book X90F / X90L */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), From 5f9b63193bcac6938298ec8f079e533da6db5e86 Mon Sep 17 00:00:00 2001 From: Hardik Gajjar Date: Fri, 27 Oct 2023 17:20:28 +0200 Subject: [PATCH 369/553] usb: xhci: Add timeout argument in address_device USB HCD callback [ Upstream commit a769154c7cac037914ba375ae88aae55b2c853e0 ] - The HCD address_device callback now accepts a user-defined timeout value in milliseconds, providing better control over command execution times. - The default timeout value for the address_device command has been set to 5000 ms, aligning with the USB 3.2 specification. However, this timeout can be adjusted as needed. - The xhci_setup_device function has been updated to accept the timeout value, allowing it to specify the maximum wait time for the command operation to complete. - The hub driver has also been updated to accommodate the newly added timeout parameter during the SET_ADDRESS request. Signed-off-by: Hardik Gajjar Reviewed-by: Mathias Nyman Link: https://lore.kernel.org/r/20231027152029.104363-1-hgajjar@de.adit-jv.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 5a1ccf0c72cf ("usb: new quirk to reduce the SET_ADDRESS request timeout") Signed-off-by: Sasha Levin --- drivers/usb/core/hub.c | 2 +- drivers/usb/host/xhci-mem.c | 2 ++ drivers/usb/host/xhci-ring.c | 11 ++++++----- drivers/usb/host/xhci.c | 23 ++++++++++++++++------- drivers/usb/host/xhci.h | 9 +++++++-- include/linux/usb/hcd.h | 5 +++-- 6 files changed, 35 insertions(+), 17 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index b1fb04e5247c..a661f6ac1ad1 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4661,7 +4661,7 @@ static int hub_set_address(struct usb_device *udev, int devnum) if (udev->state != USB_STATE_DEFAULT) return -EINVAL; if (hcd->driver->address_device) - retval = hcd->driver->address_device(hcd, udev); + retval = hcd->driver->address_device(hcd, udev, USB_CTRL_SET_TIMEOUT); else retval = usb_control_msg(udev, usb_sndaddr0pipe(), USB_REQ_SET_ADDRESS, 0, devnum, 0, diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 019dcbe55dbd..62808c98713e 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1752,6 +1752,8 @@ struct xhci_command *xhci_alloc_command(struct xhci_hcd *xhci, } command->status = 0; + /* set default timeout to 5000 ms */ + command->timeout_ms = XHCI_CMD_DEFAULT_TIMEOUT; INIT_LIST_HEAD(&command->cmd_list); return command; } diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 239b5edee326..4a039e42694b 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -332,9 +332,10 @@ void xhci_ring_cmd_db(struct xhci_hcd *xhci) readl(&xhci->dba->doorbell[0]); } -static bool xhci_mod_cmd_timer(struct xhci_hcd *xhci, unsigned long delay) +static bool xhci_mod_cmd_timer(struct xhci_hcd *xhci) { - return mod_delayed_work(system_wq, &xhci->cmd_timer, delay); + return mod_delayed_work(system_wq, &xhci->cmd_timer, + msecs_to_jiffies(xhci->current_cmd->timeout_ms)); } static struct xhci_command *xhci_next_queued_cmd(struct xhci_hcd *xhci) @@ -378,7 +379,7 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) && !(xhci->xhc_state & XHCI_STATE_DYING)) { xhci->current_cmd = cur_cmd; - xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); + xhci_mod_cmd_timer(xhci); xhci_ring_cmd_db(xhci); } } @@ -1762,7 +1763,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, if (!list_is_singular(&xhci->cmd_list)) { xhci->current_cmd = list_first_entry(&cmd->cmd_list, struct xhci_command, cmd_list); - xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); + xhci_mod_cmd_timer(xhci); } else if (xhci->current_cmd == cmd) { xhci->current_cmd = NULL; } @@ -4339,7 +4340,7 @@ static int queue_command(struct xhci_hcd *xhci, struct xhci_command *cmd, /* if there are no other commands queued we start the timeout timer */ if (list_empty(&xhci->cmd_list)) { xhci->current_cmd = cmd; - xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); + xhci_mod_cmd_timer(xhci); } list_add_tail(&cmd->cmd_list, &xhci->cmd_list); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 565aba6b9986..27e01671d386 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4170,12 +4170,18 @@ disable_slot: return 0; } -/* - * Issue an Address Device command and optionally send a corresponding - * SetAddress request to the device. +/** + * xhci_setup_device - issues an Address Device command to assign a unique + * USB bus address. + * @hcd: USB host controller data structure. + * @udev: USB dev structure representing the connected device. + * @setup: Enum specifying setup mode: address only or with context. + * @timeout_ms: Max wait time (ms) for the command operation to complete. + * + * Return: 0 if successful; otherwise, negative error code. */ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, - enum xhci_setup_dev setup) + enum xhci_setup_dev setup, unsigned int timeout_ms) { const char *act = setup == SETUP_CONTEXT_ONLY ? "context" : "address"; unsigned long flags; @@ -4232,6 +4238,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, } command->in_ctx = virt_dev->in_ctx; + command->timeout_ms = timeout_ms; slot_ctx = xhci_get_slot_ctx(xhci, virt_dev->in_ctx); ctrl_ctx = xhci_get_input_control_ctx(virt_dev->in_ctx); @@ -4358,14 +4365,16 @@ out: return ret; } -static int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev) +static int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev, + unsigned int timeout_ms) { - return xhci_setup_device(hcd, udev, SETUP_CONTEXT_ADDRESS); + return xhci_setup_device(hcd, udev, SETUP_CONTEXT_ADDRESS, timeout_ms); } static int xhci_enable_device(struct usb_hcd *hcd, struct usb_device *udev) { - return xhci_setup_device(hcd, udev, SETUP_CONTEXT_ONLY); + return xhci_setup_device(hcd, udev, SETUP_CONTEXT_ONLY, + XHCI_CMD_DEFAULT_TIMEOUT); } /* diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index fc25a5b09710..fa9e87141e0b 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -815,6 +815,8 @@ struct xhci_command { struct completion *completion; union xhci_trb *command_trb; struct list_head cmd_list; + /* xHCI command response timeout in milliseconds */ + unsigned int timeout_ms; }; /* drop context bitmasks */ @@ -1574,8 +1576,11 @@ struct xhci_td { unsigned int num_trbs; }; -/* xHCI command default timeout value */ -#define XHCI_CMD_DEFAULT_TIMEOUT (5 * HZ) +/* + * xHCI command default timeout value in milliseconds. + * USB 3.2 spec, section 9.2.6.1 + */ +#define XHCI_CMD_DEFAULT_TIMEOUT 5000 /* command descriptor */ struct xhci_cd { diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 5a89928ea953..cd667acf6267 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -371,8 +371,9 @@ struct hc_driver { * or bandwidth constraints. */ void (*reset_bandwidth)(struct usb_hcd *, struct usb_device *); - /* Returns the hardware-chosen device address */ - int (*address_device)(struct usb_hcd *, struct usb_device *udev); + /* Set the hardware-chosen device address */ + int (*address_device)(struct usb_hcd *, struct usb_device *udev, + unsigned int timeout_ms); /* prepares the hardware to send commands to the device */ int (*enable_device)(struct usb_hcd *, struct usb_device *udev); /* Notifies the HCD after a hub descriptor is fetched. From f6ac4fdfa519ba66a7beb1ba4077f1a7319aecea Mon Sep 17 00:00:00 2001 From: Hardik Gajjar Date: Fri, 27 Oct 2023 17:20:29 +0200 Subject: [PATCH 370/553] usb: new quirk to reduce the SET_ADDRESS request timeout [ Upstream commit 5a1ccf0c72cf917ff3ccc131d1bb8d19338ffe52 ] This patch introduces a new USB quirk, USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT, which modifies the timeout value for the SET_ADDRESS request. The standard timeout for USB request/command is 5000 ms, as recommended in the USB 3.2 specification (section 9.2.6.1). However, certain scenarios, such as connecting devices through an APTIV hub, can lead to timeout errors when the device enumerates as full speed initially and later switches to high speed during chirp negotiation. In such cases, USB analyzer logs reveal that the bus suspends for 5 seconds due to incorrect chirp parsing and resumes only after two consecutive timeout errors trigger a hub driver reset. Packet(54) Dir(?) Full Speed J(997.100 us) Idle( 2.850 us) _______| Time Stamp(28 . 105 910 682) _______|_____________________________________________________________Ch0 Packet(55) Dir(?) Full Speed J(997.118 us) Idle( 2.850 us) _______| Time Stamp(28 . 106 910 632) _______|_____________________________________________________________Ch0 Packet(56) Dir(?) Full Speed J(399.650 us) Idle(222.582 us) _______| Time Stamp(28 . 107 910 600) _______|_____________________________________________________________Ch0 Packet(57) Dir Chirp J( 23.955 ms) Idle(115.169 ms) _______| Time Stamp(28 . 108 532 832) _______|_____________________________________________________________Ch0 Packet(58) Dir(?) Full Speed J (Suspend)( 5.347 sec) Idle( 5.366 us) _______| Time Stamp(28 . 247 657 600) _______|_____________________________________________________________Ch0 This 5-second delay in device enumeration is undesirable, particularly in automotive applications where quick enumeration is crucial (ideally within 3 seconds). The newly introduced quirks provide the flexibility to align with a 3-second time limit, as required in specific contexts like automotive applications. By reducing the SET_ADDRESS request timeout to 500 ms, the system can respond more swiftly to errors, initiate rapid recovery, and ensure efficient device enumeration. This change is vital for scenarios where rapid smartphone enumeration and screen projection are essential. To use the quirk, please write "vendor_id:product_id:p" to /sys/bus/usb/drivers/hub/module/parameter/quirks For example, echo "0x2c48:0x0132:p" > /sys/bus/usb/drivers/hub/module/parameters/quirks" Signed-off-by: Hardik Gajjar Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20231027152029.104363-2-hgajjar@de.adit-jv.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ drivers/usb/core/hub.c | 15 +++++++++++++-- drivers/usb/core/quirks.c | 7 +++++++ include/linux/usb/quirks.h | 3 +++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index aebbe2981241..e6f0570cf490 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6603,6 +6603,9 @@ pause after every control message); o = USB_QUIRK_HUB_SLOW_RESET (Hub needs extra delay after resetting its port); + p = USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT + (Reduce timeout of the SET_ADDRESS + request from 5000 ms to 500 ms); Example: quirks=0781:5580:bk,0a5c:5834:gij usbhid.mousepoll= diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index a661f6ac1ad1..dea110241ee7 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -60,6 +60,12 @@ #define USB_PING_RESPONSE_TIME 400 /* ns */ #define USB_REDUCE_FRAME_INTR_BINTERVAL 9 +/* + * The SET_ADDRESS request timeout will be 500 ms when + * USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT quirk flag is set. + */ +#define USB_SHORT_SET_ADDRESS_REQ_TIMEOUT 500 /* ms */ + /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can * change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */ @@ -4648,7 +4654,12 @@ EXPORT_SYMBOL_GPL(usb_ep0_reinit); static int hub_set_address(struct usb_device *udev, int devnum) { int retval; + unsigned int timeout_ms = USB_CTRL_SET_TIMEOUT; struct usb_hcd *hcd = bus_to_hcd(udev->bus); + struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent); + + if (hub->hdev->quirks & USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT) + timeout_ms = USB_SHORT_SET_ADDRESS_REQ_TIMEOUT; /* * The host controller will choose the device address, @@ -4661,11 +4672,11 @@ static int hub_set_address(struct usb_device *udev, int devnum) if (udev->state != USB_STATE_DEFAULT) return -EINVAL; if (hcd->driver->address_device) - retval = hcd->driver->address_device(hcd, udev, USB_CTRL_SET_TIMEOUT); + retval = hcd->driver->address_device(hcd, udev, timeout_ms); else retval = usb_control_msg(udev, usb_sndaddr0pipe(), USB_REQ_SET_ADDRESS, 0, devnum, 0, - NULL, 0, USB_CTRL_SET_TIMEOUT); + NULL, 0, timeout_ms); if (retval == 0) { update_devnum(udev, devnum); /* Device now using proper address. */ diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 15e9bd180a1d..b4783574b8e6 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -138,6 +138,9 @@ static int quirks_param_set(const char *value, const struct kernel_param *kp) case 'o': flags |= USB_QUIRK_HUB_SLOW_RESET; break; + case 'p': + flags |= USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT; + break; /* Ignore unrecognized flag characters */ } } @@ -527,6 +530,10 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x2386, 0x350e), .driver_info = USB_QUIRK_NO_LPM }, + /* APTIV AUTOMOTIVE HUB */ + { USB_DEVICE(0x2c48, 0x0132), .driver_info = + USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT }, + /* DJI CineSSD */ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM }, diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index eeb7c2157c72..59409c1fc3de 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -72,4 +72,7 @@ /* device has endpoints that should be ignored */ #define USB_QUIRK_ENDPOINT_IGNORE BIT(15) +/* short SET_ADDRESS request timeout */ +#define USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT BIT(16) + #endif /* __LINUX_USB_QUIRKS_H */ From 349dbfd65f862baf06099e559419443c6fe09ecf Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 25 Mar 2024 11:41:55 -0700 Subject: [PATCH 371/553] clk: Remove prepare_lock hold assertion in __clk_release() [ Upstream commit 8358a76cfb47c9a5af627a0c4e7168aa14fa25f6 ] Removing this assertion lets us move the kref_put() call outside the prepare_lock section. We don't need to hold the prepare_lock here to free memory and destroy the clk_core structure. We've already unlinked the clk from the clk tree and by the time the release function runs nothing holds a reference to the clk_core anymore so anything with the pointer can't access the memory that's being freed anyway. Way back in commit 496eadf821c2 ("clk: Use lockdep asserts to find missing hold of prepare_lock") we didn't need to have this assertion either. Fixes: 496eadf821c2 ("clk: Use lockdep asserts to find missing hold of prepare_lock") Cc: Krzysztof Kozlowski Reviewed-by: Douglas Anderson Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240325184204.745706-2-sboyd@kernel.org Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 9004e0718225..ad40913d80a8 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -4230,8 +4230,6 @@ static void __clk_release(struct kref *ref) { struct clk_core *core = container_of(ref, struct clk_core, ref); - lockdep_assert_held(&prepare_lock); - clk_core_free_parent_map(core); kfree_const(core->name); kfree(core); From 43bc4cfef2b18fad0141af7aff7703b5081aa4a5 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 7 Mar 2023 14:29:28 +0100 Subject: [PATCH 372/553] clk: Print an info line before disabling unused clocks [ Upstream commit 12ca59b91d04df32e41be5a52f0cabba912c11de ] Currently, the regulator framework informs us before calling into their unused cleanup paths, which eases at least some debugging. The same could be beneficial for clocks, so that random shutdowns shortly after most initcalls are done can be less of a guess. Add a pr_info before disabling unused clocks to do so. Reviewed-by: Marijn Suijten Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230307132928.3887737-1-konrad.dybcio@linaro.org Signed-off-by: Stephen Boyd Stable-dep-of: e581cf5d2162 ("clk: Get runtime PM before walking tree during disable_unused") Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index ad40913d80a8..d841a9d7281c 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1388,6 +1388,8 @@ static int __init clk_disable_unused(void) return 0; } + pr_info("clk: Disabling unused clocks\n"); + clk_prepare_lock(); hlist_for_each_entry(core, &clk_root_list, child_node) From 5558b3b68c241998c1e601172c8b0ff02c6332c1 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 25 Mar 2024 11:41:57 -0700 Subject: [PATCH 373/553] clk: Initialize struct clk_core kref earlier [ Upstream commit 9d05ae531c2cff20d5d527f04e28d28e04379929 ] Initialize this kref once we allocate memory for the struct clk_core so that we can reuse the release function to free any memory associated with the structure. This mostly consolidates code, but also clarifies that the kref lifetime exists once the container structure (struct clk_core) is allocated instead of leaving it in a half-baked state for most of __clk_core_init(). Reviewed-by: Douglas Anderson Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240325184204.745706-4-sboyd@kernel.org Stable-dep-of: e581cf5d2162 ("clk: Get runtime PM before walking tree during disable_unused") Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index d841a9d7281c..4f9f55cff923 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3838,8 +3838,6 @@ static int __clk_core_init(struct clk_core *core) } clk_core_reparent_orphans_nolock(); - - kref_init(&core->ref); out: clk_pm_runtime_put(core); unlock: @@ -4068,6 +4066,16 @@ static void clk_core_free_parent_map(struct clk_core *core) kfree(core->parents); } +/* Free memory allocated for a struct clk_core */ +static void __clk_release(struct kref *ref) +{ + struct clk_core *core = container_of(ref, struct clk_core, ref); + + clk_core_free_parent_map(core); + kfree_const(core->name); + kfree(core); +} + static struct clk * __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) { @@ -4088,6 +4096,8 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) goto fail_out; } + kref_init(&core->ref); + core->name = kstrdup_const(init->name, GFP_KERNEL); if (!core->name) { ret = -ENOMEM; @@ -4142,12 +4152,10 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) hw->clk = NULL; fail_create_clk: - clk_core_free_parent_map(core); fail_parents: fail_ops: - kfree_const(core->name); fail_name: - kfree(core); + kref_put(&core->ref, __clk_release); fail_out: return ERR_PTR(ret); } @@ -4227,16 +4235,6 @@ int of_clk_hw_register(struct device_node *node, struct clk_hw *hw) } EXPORT_SYMBOL_GPL(of_clk_hw_register); -/* Free memory allocated for a clock. */ -static void __clk_release(struct kref *ref) -{ - struct clk_core *core = container_of(ref, struct clk_core, ref); - - clk_core_free_parent_map(core); - kfree_const(core->name); - kfree(core); -} - /* * Empty clk_ops for unregistered clocks. These are used temporarily * after clk_unregister() was called on a clock and until last clock From a424e713e0cc33d4b969cfda25b9f46df4d7b5bc Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 25 Mar 2024 11:41:58 -0700 Subject: [PATCH 374/553] clk: Get runtime PM before walking tree during disable_unused [ Upstream commit e581cf5d216289ef292d1a4036d53ce90e122469 ] Doug reported [1] the following hung task: INFO: task swapper/0:1 blocked for more than 122 seconds. Not tainted 5.15.149-21875-gf795ebc40eb8 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:swapper/0 state:D stack: 0 pid: 1 ppid: 0 flags:0x00000008 Call trace: __switch_to+0xf4/0x1f4 __schedule+0x418/0xb80 schedule+0x5c/0x10c rpm_resume+0xe0/0x52c rpm_resume+0x178/0x52c __pm_runtime_resume+0x58/0x98 clk_pm_runtime_get+0x30/0xb0 clk_disable_unused_subtree+0x58/0x208 clk_disable_unused_subtree+0x38/0x208 clk_disable_unused_subtree+0x38/0x208 clk_disable_unused_subtree+0x38/0x208 clk_disable_unused_subtree+0x38/0x208 clk_disable_unused+0x4c/0xe4 do_one_initcall+0xcc/0x2d8 do_initcall_level+0xa4/0x148 do_initcalls+0x5c/0x9c do_basic_setup+0x24/0x30 kernel_init_freeable+0xec/0x164 kernel_init+0x28/0x120 ret_from_fork+0x10/0x20 INFO: task kworker/u16:0:9 blocked for more than 122 seconds. Not tainted 5.15.149-21875-gf795ebc40eb8 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/u16:0 state:D stack: 0 pid: 9 ppid: 2 flags:0x00000008 Workqueue: events_unbound deferred_probe_work_func Call trace: __switch_to+0xf4/0x1f4 __schedule+0x418/0xb80 schedule+0x5c/0x10c schedule_preempt_disabled+0x2c/0x48 __mutex_lock+0x238/0x488 __mutex_lock_slowpath+0x1c/0x28 mutex_lock+0x50/0x74 clk_prepare_lock+0x7c/0x9c clk_core_prepare_lock+0x20/0x44 clk_prepare+0x24/0x30 clk_bulk_prepare+0x40/0xb0 mdss_runtime_resume+0x54/0x1c8 pm_generic_runtime_resume+0x30/0x44 __genpd_runtime_resume+0x68/0x7c genpd_runtime_resume+0x108/0x1f4 __rpm_callback+0x84/0x144 rpm_callback+0x30/0x88 rpm_resume+0x1f4/0x52c rpm_resume+0x178/0x52c __pm_runtime_resume+0x58/0x98 __device_attach+0xe0/0x170 device_initial_probe+0x1c/0x28 bus_probe_device+0x3c/0x9c device_add+0x644/0x814 mipi_dsi_device_register_full+0xe4/0x170 devm_mipi_dsi_device_register_full+0x28/0x70 ti_sn_bridge_probe+0x1dc/0x2c0 auxiliary_bus_probe+0x4c/0x94 really_probe+0xcc/0x2c8 __driver_probe_device+0xa8/0x130 driver_probe_device+0x48/0x110 __device_attach_driver+0xa4/0xcc bus_for_each_drv+0x8c/0xd8 __device_attach+0xf8/0x170 device_initial_probe+0x1c/0x28 bus_probe_device+0x3c/0x9c deferred_probe_work_func+0x9c/0xd8 process_one_work+0x148/0x518 worker_thread+0x138/0x350 kthread+0x138/0x1e0 ret_from_fork+0x10/0x20 The first thread is walking the clk tree and calling clk_pm_runtime_get() to power on devices required to read the clk hardware via struct clk_ops::is_enabled(). This thread holds the clk prepare_lock, and is trying to runtime PM resume a device, when it finds that the device is in the process of resuming so the thread schedule()s away waiting for the device to finish resuming before continuing. The second thread is runtime PM resuming the same device, but the runtime resume callback is calling clk_prepare(), trying to grab the prepare_lock waiting on the first thread. This is a classic ABBA deadlock. To properly fix the deadlock, we must never runtime PM resume or suspend a device with the clk prepare_lock held. Actually doing that is near impossible today because the global prepare_lock would have to be dropped in the middle of the tree, the device runtime PM resumed/suspended, and then the prepare_lock grabbed again to ensure consistency of the clk tree topology. If anything changes with the clk tree in the meantime, we've lost and will need to start the operation all over again. Luckily, most of the time we're simply incrementing or decrementing the runtime PM count on an active device, so we don't have the chance to schedule away with the prepare_lock held. Let's fix this immediate problem that can be triggered more easily by simply booting on Qualcomm sc7180. Introduce a list of clk_core structures that have been registered, or are in the process of being registered, that require runtime PM to operate. Iterate this list and call clk_pm_runtime_get() on each of them without holding the prepare_lock during clk_disable_unused(). This way we can be certain that the runtime PM state of the devices will be active and resumed so we can't schedule away while walking the clk tree with the prepare_lock held. Similarly, call clk_pm_runtime_put() without the prepare_lock held to properly drop the runtime PM reference. We remove the calls to clk_pm_runtime_{get,put}() in this path because they're superfluous now that we know the devices are runtime resumed. Reported-by: Douglas Anderson Closes: https://lore.kernel.org/all/20220922084322.RFC.2.I375b6b9e0a0a5348962f004beb3dafee6a12dfbb@changeid/ [1] Closes: https://issuetracker.google.com/328070191 Cc: Marek Szyprowski Cc: Ulf Hansson Cc: Krzysztof Kozlowski Fixes: 9a34b45397e5 ("clk: Add support for runtime PM") Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240325184204.745706-5-sboyd@kernel.org Reviewed-by: Douglas Anderson Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 117 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 105 insertions(+), 12 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 4f9f55cff923..75d8f7f0de9b 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -37,6 +37,10 @@ static HLIST_HEAD(clk_root_list); static HLIST_HEAD(clk_orphan_list); static LIST_HEAD(clk_notifier_list); +/* List of registered clks that use runtime PM */ +static HLIST_HEAD(clk_rpm_list); +static DEFINE_MUTEX(clk_rpm_list_lock); + static const struct hlist_head *all_lists[] = { &clk_root_list, &clk_orphan_list, @@ -59,6 +63,7 @@ struct clk_core { struct clk_hw *hw; struct module *owner; struct device *dev; + struct hlist_node rpm_node; struct device_node *of_node; struct clk_core *parent; struct clk_parent_map *parents; @@ -122,6 +127,89 @@ static void clk_pm_runtime_put(struct clk_core *core) pm_runtime_put_sync(core->dev); } +/** + * clk_pm_runtime_get_all() - Runtime "get" all clk provider devices + * + * Call clk_pm_runtime_get() on all runtime PM enabled clks in the clk tree so + * that disabling unused clks avoids a deadlock where a device is runtime PM + * resuming/suspending and the runtime PM callback is trying to grab the + * prepare_lock for something like clk_prepare_enable() while + * clk_disable_unused_subtree() holds the prepare_lock and is trying to runtime + * PM resume/suspend the device as well. + * + * Context: Acquires the 'clk_rpm_list_lock' and returns with the lock held on + * success. Otherwise the lock is released on failure. + * + * Return: 0 on success, negative errno otherwise. + */ +static int clk_pm_runtime_get_all(void) +{ + int ret; + struct clk_core *core, *failed; + + /* + * Grab the list lock to prevent any new clks from being registered + * or unregistered until clk_pm_runtime_put_all(). + */ + mutex_lock(&clk_rpm_list_lock); + + /* + * Runtime PM "get" all the devices that are needed for the clks + * currently registered. Do this without holding the prepare_lock, to + * avoid the deadlock. + */ + hlist_for_each_entry(core, &clk_rpm_list, rpm_node) { + ret = clk_pm_runtime_get(core); + if (ret) { + failed = core; + pr_err("clk: Failed to runtime PM get '%s' for clk '%s'\n", + dev_name(failed->dev), failed->name); + goto err; + } + } + + return 0; + +err: + hlist_for_each_entry(core, &clk_rpm_list, rpm_node) { + if (core == failed) + break; + + clk_pm_runtime_put(core); + } + mutex_unlock(&clk_rpm_list_lock); + + return ret; +} + +/** + * clk_pm_runtime_put_all() - Runtime "put" all clk provider devices + * + * Put the runtime PM references taken in clk_pm_runtime_get_all() and release + * the 'clk_rpm_list_lock'. + */ +static void clk_pm_runtime_put_all(void) +{ + struct clk_core *core; + + hlist_for_each_entry(core, &clk_rpm_list, rpm_node) + clk_pm_runtime_put(core); + mutex_unlock(&clk_rpm_list_lock); +} + +static void clk_pm_runtime_init(struct clk_core *core) +{ + struct device *dev = core->dev; + + if (dev && pm_runtime_enabled(dev)) { + core->rpm_enabled = true; + + mutex_lock(&clk_rpm_list_lock); + hlist_add_head(&core->rpm_node, &clk_rpm_list); + mutex_unlock(&clk_rpm_list_lock); + } +} + /*** locking ***/ static void clk_prepare_lock(void) { @@ -1310,9 +1398,6 @@ static void __init clk_unprepare_unused_subtree(struct clk_core *core) if (core->flags & CLK_IGNORE_UNUSED) return; - if (clk_pm_runtime_get(core)) - return; - if (clk_core_is_prepared(core)) { trace_clk_unprepare(core); if (core->ops->unprepare_unused) @@ -1321,8 +1406,6 @@ static void __init clk_unprepare_unused_subtree(struct clk_core *core) core->ops->unprepare(core->hw); trace_clk_unprepare_complete(core); } - - clk_pm_runtime_put(core); } static void __init clk_disable_unused_subtree(struct clk_core *core) @@ -1338,9 +1421,6 @@ static void __init clk_disable_unused_subtree(struct clk_core *core) if (core->flags & CLK_OPS_PARENT_ENABLE) clk_core_prepare_enable(core->parent); - if (clk_pm_runtime_get(core)) - goto unprepare_out; - flags = clk_enable_lock(); if (core->enable_count) @@ -1365,8 +1445,6 @@ static void __init clk_disable_unused_subtree(struct clk_core *core) unlock_out: clk_enable_unlock(flags); - clk_pm_runtime_put(core); -unprepare_out: if (core->flags & CLK_OPS_PARENT_ENABLE) clk_core_disable_unprepare(core->parent); } @@ -1382,6 +1460,7 @@ __setup("clk_ignore_unused", clk_ignore_unused_setup); static int __init clk_disable_unused(void) { struct clk_core *core; + int ret; if (clk_ignore_unused) { pr_warn("clk: Not disabling unused clocks\n"); @@ -1390,6 +1469,13 @@ static int __init clk_disable_unused(void) pr_info("clk: Disabling unused clocks\n"); + ret = clk_pm_runtime_get_all(); + if (ret) + return ret; + /* + * Grab the prepare lock to keep the clk topology stable while iterating + * over clks. + */ clk_prepare_lock(); hlist_for_each_entry(core, &clk_root_list, child_node) @@ -1406,6 +1492,8 @@ static int __init clk_disable_unused(void) clk_prepare_unlock(); + clk_pm_runtime_put_all(); + return 0; } late_initcall_sync(clk_disable_unused); @@ -4071,6 +4159,12 @@ static void __clk_release(struct kref *ref) { struct clk_core *core = container_of(ref, struct clk_core, ref); + if (core->rpm_enabled) { + mutex_lock(&clk_rpm_list_lock); + hlist_del(&core->rpm_node); + mutex_unlock(&clk_rpm_list_lock); + } + clk_core_free_parent_map(core); kfree_const(core->name); kfree(core); @@ -4110,9 +4204,8 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) } core->ops = init->ops; - if (dev && pm_runtime_enabled(dev)) - core->rpm_enabled = true; core->dev = dev; + clk_pm_runtime_init(core); core->of_node = np; if (dev && dev->driver) core->owner = dev->driver->owner; From 5833b99cf8fda9475f3db16490e66aee9467df61 Mon Sep 17 00:00:00 2001 From: Yu Zhe Date: Thu, 16 Mar 2023 15:58:26 +0800 Subject: [PATCH 375/553] clk: remove unnecessary (void*) conversions [ Upstream commit 5b1a1c1ab1f981b15bce778db863344f59bd1501 ] Pointer variables of void * type do not require type cast. Signed-off-by: Yu Zhe Link: https://lore.kernel.org/r/20230316075826.22754-1-yuzhe@nfschina.com Signed-off-by: Stephen Boyd Stable-dep-of: 9d1e795f754d ("clk: Get runtime PM before walking tree for clk_summary") Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 75d8f7f0de9b..bf4ac2f52d33 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3245,7 +3245,7 @@ static void clk_summary_show_subtree(struct seq_file *s, struct clk_core *c, static int clk_summary_show(struct seq_file *s, void *data) { struct clk_core *c; - struct hlist_head **lists = (struct hlist_head **)s->private; + struct hlist_head **lists = s->private; seq_puts(s, " enable prepare protect duty hardware\n"); seq_puts(s, " clock count count count rate accuracy phase cycle enable\n"); @@ -3304,7 +3304,7 @@ static int clk_dump_show(struct seq_file *s, void *data) { struct clk_core *c; bool first_node = true; - struct hlist_head **lists = (struct hlist_head **)s->private; + struct hlist_head **lists = s->private; seq_putc(s, '{'); clk_prepare_lock(); From 5a704c267a210e1ea097e1d80bfc6620a706051d Mon Sep 17 00:00:00 2001 From: Vishal Badole Date: Sun, 27 Nov 2022 22:53:19 +0530 Subject: [PATCH 376/553] clk: Show active consumers of clocks in debugfs [ Upstream commit dcce5cc7826e9c6b3a2443e5e6b7f8d02a103c35 ] This feature lists the clock consumer's name and respective connection id. Using this feature user can easily check that which user has acquired and enabled a particular clock. Usage: >> cat /sys/kernel/debug/clk/clk_summary enable prepare protect duty hardware Connection clock count count count rate accuracy phase cycle enable consumer Id ------------------------------------------------------------------------------------------------------------------------------ clk_mcasp0_fixed 0 0 0 24576000 0 0 50000 Y deviceless of_clk_get_from_provider deviceless no_connection_id clk_mcasp0 0 0 0 24576000 0 0 50000 N simple-audio-card,cpu no_connection_id deviceless no_connection_id Co-developed-by: Chinmoy Ghosh Signed-off-by: Chinmoy Ghosh Co-developed-by: Mintu Patel Signed-off-by: Mintu Patel Co-developed-by: Vimal Kumar Signed-off-by: Vimal Kumar Signed-off-by: Vishal Badole Link: https://lore.kernel.org/r/1669569799-8526-1-git-send-email-badolevishal1116@gmail.com Signed-off-by: Stephen Boyd Stable-dep-of: 9d1e795f754d ("clk: Get runtime PM before walking tree for clk_summary") Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index bf4ac2f52d33..ded4a51323d2 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3205,28 +3205,41 @@ static void clk_summary_show_one(struct seq_file *s, struct clk_core *c, int level) { int phase; + struct clk *clk_user; + int multi_node = 0; - seq_printf(s, "%*s%-*s %7d %8d %8d %11lu %10lu ", + seq_printf(s, "%*s%-*s %-7d %-8d %-8d %-11lu %-10lu ", level * 3 + 1, "", - 30 - level * 3, c->name, + 35 - level * 3, c->name, c->enable_count, c->prepare_count, c->protect_count, clk_core_get_rate_recalc(c), clk_core_get_accuracy_recalc(c)); phase = clk_core_get_phase(c); if (phase >= 0) - seq_printf(s, "%5d", phase); + seq_printf(s, "%-5d", phase); else seq_puts(s, "-----"); - seq_printf(s, " %6d", clk_core_get_scaled_duty_cycle(c, 100000)); + seq_printf(s, " %-6d", clk_core_get_scaled_duty_cycle(c, 100000)); if (c->ops->is_enabled) - seq_printf(s, " %9c\n", clk_core_is_enabled(c) ? 'Y' : 'N'); + seq_printf(s, " %5c ", clk_core_is_enabled(c) ? 'Y' : 'N'); else if (!c->ops->enable) - seq_printf(s, " %9c\n", 'Y'); + seq_printf(s, " %5c ", 'Y'); else - seq_printf(s, " %9c\n", '?'); + seq_printf(s, " %5c ", '?'); + + hlist_for_each_entry(clk_user, &c->clks, clks_node) { + seq_printf(s, "%*s%-*s %-25s\n", + level * 3 + 2 + 105 * multi_node, "", + 30, + clk_user->dev_id ? clk_user->dev_id : "deviceless", + clk_user->con_id ? clk_user->con_id : "no_connection_id"); + + multi_node = 1; + } + } static void clk_summary_show_subtree(struct seq_file *s, struct clk_core *c, @@ -3247,9 +3260,10 @@ static int clk_summary_show(struct seq_file *s, void *data) struct clk_core *c; struct hlist_head **lists = s->private; - seq_puts(s, " enable prepare protect duty hardware\n"); - seq_puts(s, " clock count count count rate accuracy phase cycle enable\n"); - seq_puts(s, "-------------------------------------------------------------------------------------------------------\n"); + seq_puts(s, " enable prepare protect duty hardware connection\n"); + seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n"); + seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n"); + clk_prepare_lock(); From 83ada89e4a86e2b28ea2b5113c76d6dc7560a4d0 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 25 Mar 2024 11:41:59 -0700 Subject: [PATCH 377/553] clk: Get runtime PM before walking tree for clk_summary [ Upstream commit 9d1e795f754db1ac3344528b7af0b17b8146f321 ] Similar to the previous commit, we should make sure that all devices are runtime resumed before printing the clk_summary through debugfs. Failure to do so would result in a deadlock if the thread is resuming a device to print clk state and that device is also runtime resuming in another thread, e.g the screen is turning on and the display driver is starting up. We remove the calls to clk_pm_runtime_{get,put}() in this path because they're superfluous now that we know the devices are runtime resumed. This also squashes a bug where the return value of clk_pm_runtime_get() wasn't checked, leading to an RPM count underflow on error paths. Fixes: 1bb294a7981c ("clk: Enable/Disable runtime PM for clk_summary") Cc: Taniya Das Cc: Douglas Anderson Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240325184204.745706-6-sboyd@kernel.org Reviewed-by: Douglas Anderson Signed-off-by: Sasha Levin --- drivers/clk/clk.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index ded4a51323d2..fe1d45eac837 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3247,9 +3247,7 @@ static void clk_summary_show_subtree(struct seq_file *s, struct clk_core *c, { struct clk_core *child; - clk_pm_runtime_get(c); clk_summary_show_one(s, c, level); - clk_pm_runtime_put(c); hlist_for_each_entry(child, &c->children, child_node) clk_summary_show_subtree(s, child, level + 1); @@ -3259,11 +3257,15 @@ static int clk_summary_show(struct seq_file *s, void *data) { struct clk_core *c; struct hlist_head **lists = s->private; + int ret; seq_puts(s, " enable prepare protect duty hardware connection\n"); seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n"); seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n"); + ret = clk_pm_runtime_get_all(); + if (ret) + return ret; clk_prepare_lock(); @@ -3272,6 +3274,7 @@ static int clk_summary_show(struct seq_file *s, void *data) clk_summary_show_subtree(s, c, 0); clk_prepare_unlock(); + clk_pm_runtime_put_all(); return 0; } @@ -3319,8 +3322,14 @@ static int clk_dump_show(struct seq_file *s, void *data) struct clk_core *c; bool first_node = true; struct hlist_head **lists = s->private; + int ret; + + ret = clk_pm_runtime_get_all(); + if (ret) + return ret; seq_putc(s, '{'); + clk_prepare_lock(); for (; *lists; lists++) { @@ -3333,6 +3342,7 @@ static int clk_dump_show(struct seq_file *s, void *data) } clk_prepare_unlock(); + clk_pm_runtime_put_all(); seq_puts(s, "}\n"); return 0; From 0904f9ef910a4a34d9380e36edc4c73348882464 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 20 Jan 2023 10:20:31 +0100 Subject: [PATCH 378/553] clk: mediatek: mt8192: Correctly unregister and free clocks on failure [ Upstream commit 0cbe12694990501be92f997d987925132002dbe5 ] If anything fails during probe of the clock controller(s), unregister (and kfree!) whatever we have previously registered to leave with a clean state and prevent leaks. Fixes: 710573dee31b ("clk: mediatek: Add MT8192 basic clocks support") Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Chen-Yu Tsai Reviewed-by: Markus Schneider-Pargmann Tested-by: Miles Chen Link: https://lore.kernel.org/r/20230120092053.182923-2-angelogioacchino.delregno@collabora.com Tested-by: Mingming Su Signed-off-by: Stephen Boyd Stable-dep-of: 2f7b1d8b5505 ("clk: mediatek: Do a runtime PM get on controllers during probe") Signed-off-by: Sasha Levin --- drivers/clk/mediatek/clk-mt8192.c | 75 ++++++++++++++++++++++++------- 1 file changed, 59 insertions(+), 16 deletions(-) diff --git a/drivers/clk/mediatek/clk-mt8192.c b/drivers/clk/mediatek/clk-mt8192.c index d0f226931070..74bd8bac94a3 100644 --- a/drivers/clk/mediatek/clk-mt8192.c +++ b/drivers/clk/mediatek/clk-mt8192.c @@ -1100,27 +1100,64 @@ static int clk_mt8192_top_probe(struct platform_device *pdev) if (IS_ERR(base)) return PTR_ERR(base); - mtk_clk_register_fixed_clks(top_fixed_clks, ARRAY_SIZE(top_fixed_clks), top_clk_data); - mtk_clk_register_factors(top_early_divs, ARRAY_SIZE(top_early_divs), top_clk_data); - mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), top_clk_data); - mtk_clk_register_muxes(top_mtk_muxes, ARRAY_SIZE(top_mtk_muxes), node, &mt8192_clk_lock, - top_clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, &mt8192_clk_lock, - top_clk_data); - mtk_clk_register_composites(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt8192_clk_lock, - top_clk_data); - r = mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), top_clk_data); + r = mtk_clk_register_fixed_clks(top_fixed_clks, ARRAY_SIZE(top_fixed_clks), top_clk_data); if (r) return r; + r = mtk_clk_register_factors(top_early_divs, ARRAY_SIZE(top_early_divs), top_clk_data); + if (r) + goto unregister_fixed_clks; + + r = mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), top_clk_data); + if (r) + goto unregister_early_factors; + + r = mtk_clk_register_muxes(top_mtk_muxes, ARRAY_SIZE(top_mtk_muxes), node, + &mt8192_clk_lock, top_clk_data); + if (r) + goto unregister_factors; + + r = mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, + &mt8192_clk_lock, top_clk_data); + if (r) + goto unregister_muxes; + + r = mtk_clk_register_composites(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, + &mt8192_clk_lock, top_clk_data); + if (r) + goto unregister_top_composites; + + r = mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), top_clk_data); + if (r) + goto unregister_adj_divs_composites; + r = clk_mt8192_reg_mfg_mux_notifier(&pdev->dev, top_clk_data->hws[CLK_TOP_MFG_PLL_SEL]->clk); if (r) - return r; + goto unregister_gates; + r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, top_clk_data); + if (r) + goto unregister_gates; - return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, + return 0; + +unregister_gates: + mtk_clk_unregister_gates(top_clks, ARRAY_SIZE(top_clks), top_clk_data); +unregister_adj_divs_composites: + mtk_clk_unregister_composites(top_adj_divs, ARRAY_SIZE(top_adj_divs), top_clk_data); +unregister_top_composites: + mtk_clk_unregister_composites(top_muxes, ARRAY_SIZE(top_muxes), top_clk_data); +unregister_muxes: + mtk_clk_unregister_muxes(top_mtk_muxes, ARRAY_SIZE(top_mtk_muxes), top_clk_data); +unregister_factors: + mtk_clk_unregister_factors(top_divs, ARRAY_SIZE(top_divs), top_clk_data); +unregister_early_factors: + mtk_clk_unregister_factors(top_early_divs, ARRAY_SIZE(top_early_divs), top_clk_data); +unregister_fixed_clks: + mtk_clk_unregister_fixed_clks(top_fixed_clks, ARRAY_SIZE(top_fixed_clks), top_clk_data); + return r; } static int clk_mt8192_infra_probe(struct platform_device *pdev) @@ -1139,14 +1176,16 @@ static int clk_mt8192_infra_probe(struct platform_device *pdev) r = mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc); if (r) - goto free_clk_data; + goto unregister_gates; r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) - goto free_clk_data; + goto unregister_gates; return r; +unregister_gates: + mtk_clk_unregister_gates(infra_clks, ARRAY_SIZE(infra_clks), clk_data); free_clk_data: mtk_free_clk_data(clk_data); return r; @@ -1168,10 +1207,12 @@ static int clk_mt8192_peri_probe(struct platform_device *pdev) r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) - goto free_clk_data; + goto unregister_gates; return r; +unregister_gates: + mtk_clk_unregister_gates(peri_clks, ARRAY_SIZE(peri_clks), clk_data); free_clk_data: mtk_free_clk_data(clk_data); return r; @@ -1194,10 +1235,12 @@ static int clk_mt8192_apmixed_probe(struct platform_device *pdev) r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) - goto free_clk_data; + goto unregister_gates; return r; +unregister_gates: + mtk_clk_unregister_gates(apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data); free_clk_data: mtk_free_clk_data(clk_data); return r; From 647a25b07d6d0e190b19b2ce3e674435f1d369b8 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 20 Jan 2023 10:20:32 +0100 Subject: [PATCH 379/553] clk: mediatek: mt8192: Propagate struct device for gate clocks [ Upstream commit fdc325c8f79cb4155009db8394db19793c4d07cd ] Convert instances of mtk_clk_register_gates() to use the newer mtk_clk_register_gates_with_dev() to propagate struct device to the clk framework. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Chen-Yu Tsai Tested-by: Miles Chen Link: https://lore.kernel.org/r/20230120092053.182923-3-angelogioacchino.delregno@collabora.com Tested-by: Mingming Su Signed-off-by: Stephen Boyd Stable-dep-of: 2f7b1d8b5505 ("clk: mediatek: Do a runtime PM get on controllers during probe") Signed-off-by: Sasha Levin --- drivers/clk/mediatek/clk-mt8192.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/clk/mediatek/clk-mt8192.c b/drivers/clk/mediatek/clk-mt8192.c index 74bd8bac94a3..508af9bbcc46 100644 --- a/drivers/clk/mediatek/clk-mt8192.c +++ b/drivers/clk/mediatek/clk-mt8192.c @@ -1127,7 +1127,8 @@ static int clk_mt8192_top_probe(struct platform_device *pdev) if (r) goto unregister_top_composites; - r = mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), top_clk_data); + r = mtk_clk_register_gates_with_dev(node, top_clks, ARRAY_SIZE(top_clks), + top_clk_data, &pdev->dev); if (r) goto unregister_adj_divs_composites; @@ -1170,7 +1171,8 @@ static int clk_mt8192_infra_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), clk_data); + r = mtk_clk_register_gates_with_dev(node, infra_clks, ARRAY_SIZE(infra_clks), + clk_data, &pdev->dev); if (r) goto free_clk_data; @@ -1201,7 +1203,8 @@ static int clk_mt8192_peri_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates(node, peri_clks, ARRAY_SIZE(peri_clks), clk_data); + r = mtk_clk_register_gates_with_dev(node, peri_clks, ARRAY_SIZE(peri_clks), + clk_data, &pdev->dev); if (r) goto free_clk_data; @@ -1229,7 +1232,9 @@ static int clk_mt8192_apmixed_probe(struct platform_device *pdev) return -ENOMEM; mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - r = mtk_clk_register_gates(node, apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data); + r = mtk_clk_register_gates_with_dev(node, apmixed_clks, + ARRAY_SIZE(apmixed_clks), clk_data, + &pdev->dev); if (r) goto free_clk_data; From 082b831488a41257b7ac7ffa1d80a0b60d98394d Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 20 Jan 2023 10:20:33 +0100 Subject: [PATCH 380/553] clk: mediatek: clk-gate: Propagate struct device with mtk_clk_register_gates() [ Upstream commit 20498d52c9c1a68b1d92c42bce1dc893d3e74f30 ] Commit e4c23e19aa2a ("clk: mediatek: Register clock gate with device") introduces a helper function for the sole purpose of propagating a struct device pointer to the clk API when registering the mtk-gate clocks to take advantage of Runtime PM when/where needed and where a power domain is defined in devicetree. Function mtk_clk_register_gates() then becomes a wrapper around the new mtk_clk_register_gates_with_dev() function that will simply pass NULL as struct device: this is essential when registering drivers with CLK_OF_DECLARE instead of as a platform device, as there will be no struct device to pass... but we can as well simply have only one function that always takes such pointer as a param and pass NULL when unavoidable. This commit removes the mtk_clk_register_gates() wrapper and renames mtk_clk_register_gates_with_dev() to the former and all of the calls to either of the two functions were fixed in all drivers in order to reflect this change; also, to improve consistency with other kernel functions, the pointer to struct device was moved as the first param. Since a lot of MediaTek clock drivers are actually registering as a platform device, but were still registering the mtk-gate clocks without passing any struct device to the clock framework, they've been changed to pass a valid one now, as to make all those platforms able to use runtime power management where available. While at it, some much needed indentation changes were also done. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Chen-Yu Tsai Reviewed-by: Markus Schneider-Pargmann Tested-by: Miles Chen Link: https://lore.kernel.org/r/20230120092053.182923-4-angelogioacchino.delregno@collabora.com Tested-by: Mingming Su Signed-off-by: Stephen Boyd Stable-dep-of: 2f7b1d8b5505 ("clk: mediatek: Do a runtime PM get on controllers during probe") Signed-off-by: Sasha Levin --- drivers/clk/mediatek/clk-gate.c | 23 ++++++------------- drivers/clk/mediatek/clk-gate.h | 7 +----- drivers/clk/mediatek/clk-mt2701-aud.c | 4 ++-- drivers/clk/mediatek/clk-mt2701-eth.c | 4 ++-- drivers/clk/mediatek/clk-mt2701-g3d.c | 2 +- drivers/clk/mediatek/clk-mt2701-hif.c | 4 ++-- drivers/clk/mediatek/clk-mt2701-mm.c | 4 ++-- drivers/clk/mediatek/clk-mt2701.c | 12 +++++----- drivers/clk/mediatek/clk-mt2712-mm.c | 4 ++-- drivers/clk/mediatek/clk-mt2712.c | 12 +++++----- drivers/clk/mediatek/clk-mt6765.c | 10 ++++---- drivers/clk/mediatek/clk-mt6779-mm.c | 4 ++-- drivers/clk/mediatek/clk-mt6779.c | 6 ++--- drivers/clk/mediatek/clk-mt6795-infracfg.c | 3 ++- drivers/clk/mediatek/clk-mt6795-mm.c | 3 ++- drivers/clk/mediatek/clk-mt6795-pericfg.c | 3 ++- drivers/clk/mediatek/clk-mt6797-mm.c | 4 ++-- drivers/clk/mediatek/clk-mt6797.c | 4 ++-- drivers/clk/mediatek/clk-mt7622-aud.c | 4 ++-- drivers/clk/mediatek/clk-mt7622-eth.c | 8 +++---- drivers/clk/mediatek/clk-mt7622-hif.c | 8 +++---- drivers/clk/mediatek/clk-mt7622.c | 14 ++++++------ drivers/clk/mediatek/clk-mt7629-eth.c | 7 +++--- drivers/clk/mediatek/clk-mt7629-hif.c | 8 +++---- drivers/clk/mediatek/clk-mt7629.c | 10 ++++---- drivers/clk/mediatek/clk-mt7986-eth.c | 10 ++++---- drivers/clk/mediatek/clk-mt7986-infracfg.c | 4 ++-- drivers/clk/mediatek/clk-mt8135.c | 8 +++---- drivers/clk/mediatek/clk-mt8167-aud.c | 2 +- drivers/clk/mediatek/clk-mt8167-img.c | 2 +- drivers/clk/mediatek/clk-mt8167-mfgcfg.c | 2 +- drivers/clk/mediatek/clk-mt8167-mm.c | 4 ++-- drivers/clk/mediatek/clk-mt8167-vdec.c | 3 ++- drivers/clk/mediatek/clk-mt8167.c | 2 +- drivers/clk/mediatek/clk-mt8173-mm.c | 4 ++-- drivers/clk/mediatek/clk-mt8173.c | 24 ++++++++++---------- drivers/clk/mediatek/clk-mt8183-audio.c | 4 ++-- drivers/clk/mediatek/clk-mt8183-mm.c | 4 ++-- drivers/clk/mediatek/clk-mt8183.c | 16 ++++++------- drivers/clk/mediatek/clk-mt8186-mm.c | 3 ++- drivers/clk/mediatek/clk-mt8192-aud.c | 3 ++- drivers/clk/mediatek/clk-mt8192-mm.c | 3 ++- drivers/clk/mediatek/clk-mt8192.c | 17 +++++++------- drivers/clk/mediatek/clk-mt8195-apmixedsys.c | 3 ++- drivers/clk/mediatek/clk-mt8195-topckgen.c | 3 ++- drivers/clk/mediatek/clk-mt8195-vdo0.c | 3 ++- drivers/clk/mediatek/clk-mt8195-vdo1.c | 3 ++- drivers/clk/mediatek/clk-mt8365-mm.c | 5 ++-- drivers/clk/mediatek/clk-mt8365.c | 4 ++-- drivers/clk/mediatek/clk-mt8516-aud.c | 2 +- drivers/clk/mediatek/clk-mt8516.c | 2 +- drivers/clk/mediatek/clk-mtk.c | 4 ++-- 52 files changed, 156 insertions(+), 160 deletions(-) diff --git a/drivers/clk/mediatek/clk-gate.c b/drivers/clk/mediatek/clk-gate.c index 0c867136e49d..67d9e741c5e7 100644 --- a/drivers/clk/mediatek/clk-gate.c +++ b/drivers/clk/mediatek/clk-gate.c @@ -152,12 +152,12 @@ const struct clk_ops mtk_clk_gate_ops_no_setclr_inv = { }; EXPORT_SYMBOL_GPL(mtk_clk_gate_ops_no_setclr_inv); -static struct clk_hw *mtk_clk_register_gate(const char *name, +static struct clk_hw *mtk_clk_register_gate(struct device *dev, const char *name, const char *parent_name, struct regmap *regmap, int set_ofs, int clr_ofs, int sta_ofs, u8 bit, const struct clk_ops *ops, - unsigned long flags, struct device *dev) + unsigned long flags) { struct mtk_clk_gate *cg; int ret; @@ -202,10 +202,9 @@ static void mtk_clk_unregister_gate(struct clk_hw *hw) kfree(cg); } -int mtk_clk_register_gates_with_dev(struct device_node *node, - const struct mtk_gate *clks, int num, - struct clk_hw_onecell_data *clk_data, - struct device *dev) +int mtk_clk_register_gates(struct device *dev, struct device_node *node, + const struct mtk_gate *clks, int num, + struct clk_hw_onecell_data *clk_data) { int i; struct clk_hw *hw; @@ -229,13 +228,13 @@ int mtk_clk_register_gates_with_dev(struct device_node *node, continue; } - hw = mtk_clk_register_gate(gate->name, gate->parent_name, + hw = mtk_clk_register_gate(dev, gate->name, gate->parent_name, regmap, gate->regs->set_ofs, gate->regs->clr_ofs, gate->regs->sta_ofs, gate->shift, gate->ops, - gate->flags, dev); + gate->flags); if (IS_ERR(hw)) { pr_err("Failed to register clk %s: %pe\n", gate->name, @@ -261,14 +260,6 @@ err: return PTR_ERR(hw); } -EXPORT_SYMBOL_GPL(mtk_clk_register_gates_with_dev); - -int mtk_clk_register_gates(struct device_node *node, - const struct mtk_gate *clks, int num, - struct clk_hw_onecell_data *clk_data) -{ - return mtk_clk_register_gates_with_dev(node, clks, num, clk_data, NULL); -} EXPORT_SYMBOL_GPL(mtk_clk_register_gates); void mtk_clk_unregister_gates(const struct mtk_gate *clks, int num, diff --git a/drivers/clk/mediatek/clk-gate.h b/drivers/clk/mediatek/clk-gate.h index d9897ef53528..1a46b4c56fc5 100644 --- a/drivers/clk/mediatek/clk-gate.h +++ b/drivers/clk/mediatek/clk-gate.h @@ -50,15 +50,10 @@ struct mtk_gate { #define GATE_MTK(_id, _name, _parent, _regs, _shift, _ops) \ GATE_MTK_FLAGS(_id, _name, _parent, _regs, _shift, _ops, 0) -int mtk_clk_register_gates(struct device_node *node, +int mtk_clk_register_gates(struct device *dev, struct device_node *node, const struct mtk_gate *clks, int num, struct clk_hw_onecell_data *clk_data); -int mtk_clk_register_gates_with_dev(struct device_node *node, - const struct mtk_gate *clks, int num, - struct clk_hw_onecell_data *clk_data, - struct device *dev); - void mtk_clk_unregister_gates(const struct mtk_gate *clks, int num, struct clk_hw_onecell_data *clk_data); diff --git a/drivers/clk/mediatek/clk-mt2701-aud.c b/drivers/clk/mediatek/clk-mt2701-aud.c index 4287bd3f545e..03ab212aa7f4 100644 --- a/drivers/clk/mediatek/clk-mt2701-aud.c +++ b/drivers/clk/mediatek/clk-mt2701-aud.c @@ -127,8 +127,8 @@ static int clk_mt2701_aud_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_AUD_NR); - mtk_clk_register_gates(node, audio_clks, ARRAY_SIZE(audio_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, audio_clks, + ARRAY_SIZE(audio_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) { diff --git a/drivers/clk/mediatek/clk-mt2701-eth.c b/drivers/clk/mediatek/clk-mt2701-eth.c index 601358748750..924725d67c13 100644 --- a/drivers/clk/mediatek/clk-mt2701-eth.c +++ b/drivers/clk/mediatek/clk-mt2701-eth.c @@ -51,8 +51,8 @@ static int clk_mt2701_eth_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_ETHSYS_NR); - mtk_clk_register_gates(node, eth_clks, ARRAY_SIZE(eth_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, eth_clks, + ARRAY_SIZE(eth_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt2701-g3d.c b/drivers/clk/mediatek/clk-mt2701-g3d.c index 8d1fc8e3336e..501fb99bb41a 100644 --- a/drivers/clk/mediatek/clk-mt2701-g3d.c +++ b/drivers/clk/mediatek/clk-mt2701-g3d.c @@ -45,7 +45,7 @@ static int clk_mt2701_g3dsys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_G3DSYS_NR); - mtk_clk_register_gates(node, g3d_clks, ARRAY_SIZE(g3d_clks), + mtk_clk_register_gates(&pdev->dev, node, g3d_clks, ARRAY_SIZE(g3d_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt2701-hif.c b/drivers/clk/mediatek/clk-mt2701-hif.c index edeeb033a235..1ddefc21d6a0 100644 --- a/drivers/clk/mediatek/clk-mt2701-hif.c +++ b/drivers/clk/mediatek/clk-mt2701-hif.c @@ -48,8 +48,8 @@ static int clk_mt2701_hif_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_HIFSYS_NR); - mtk_clk_register_gates(node, hif_clks, ARRAY_SIZE(hif_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, hif_clks, + ARRAY_SIZE(hif_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) { diff --git a/drivers/clk/mediatek/clk-mt2701-mm.c b/drivers/clk/mediatek/clk-mt2701-mm.c index eb069f3bc9a2..f4885dffb324 100644 --- a/drivers/clk/mediatek/clk-mt2701-mm.c +++ b/drivers/clk/mediatek/clk-mt2701-mm.c @@ -76,8 +76,8 @@ static int clk_mt2701_mm_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_MM_NR); - mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, mm_clks, + ARRAY_SIZE(mm_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt2701.c b/drivers/clk/mediatek/clk-mt2701.c index 00d2e81bdd43..c7510f7ba4cc 100644 --- a/drivers/clk/mediatek/clk-mt2701.c +++ b/drivers/clk/mediatek/clk-mt2701.c @@ -685,8 +685,8 @@ static int mtk_topckgen_init(struct platform_device *pdev) mtk_clk_register_dividers(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt2701_clk_lock, clk_data); - mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, top_clks, + ARRAY_SIZE(top_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } @@ -789,8 +789,8 @@ static int mtk_infrasys_init(struct platform_device *pdev) } } - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - infra_clk_data); + mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), infra_clk_data); mtk_clk_register_factors(infra_fixed_divs, ARRAY_SIZE(infra_fixed_divs), infra_clk_data); @@ -902,8 +902,8 @@ static int mtk_pericfg_init(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - mtk_clk_register_gates(node, peri_clks, ARRAY_SIZE(peri_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, peri_clks, + ARRAY_SIZE(peri_clks), clk_data); mtk_clk_register_composites(peri_muxs, ARRAY_SIZE(peri_muxs), base, &mt2701_clk_lock, clk_data); diff --git a/drivers/clk/mediatek/clk-mt2712-mm.c b/drivers/clk/mediatek/clk-mt2712-mm.c index ad6daa8f28a8..e5264f1ce60d 100644 --- a/drivers/clk/mediatek/clk-mt2712-mm.c +++ b/drivers/clk/mediatek/clk-mt2712-mm.c @@ -117,8 +117,8 @@ static int clk_mt2712_mm_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_MM_NR_CLK); - mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, mm_clks, + ARRAY_SIZE(mm_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt2712.c b/drivers/clk/mediatek/clk-mt2712.c index d6c2cc183b1a..78ebb4f2335c 100644 --- a/drivers/clk/mediatek/clk-mt2712.c +++ b/drivers/clk/mediatek/clk-mt2712.c @@ -1324,8 +1324,8 @@ static int clk_mt2712_top_probe(struct platform_device *pdev) &mt2712_clk_lock, top_clk_data); mtk_clk_register_dividers(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt2712_clk_lock, top_clk_data); - mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), - top_clk_data); + mtk_clk_register_gates(&pdev->dev, node, top_clks, + ARRAY_SIZE(top_clks), top_clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, top_clk_data); @@ -1344,8 +1344,8 @@ static int clk_mt2712_infra_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_INFRA_NR_CLK); - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); @@ -1366,8 +1366,8 @@ static int clk_mt2712_peri_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_PERI_NR_CLK); - mtk_clk_register_gates(node, peri_clks, ARRAY_SIZE(peri_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, peri_clks, + ARRAY_SIZE(peri_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt6765.c b/drivers/clk/mediatek/clk-mt6765.c index 2c6a52ff5564..4a7bc6e04580 100644 --- a/drivers/clk/mediatek/clk-mt6765.c +++ b/drivers/clk/mediatek/clk-mt6765.c @@ -743,7 +743,7 @@ static int clk_mt6765_apmixed_probe(struct platform_device *pdev) mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - mtk_clk_register_gates(node, apmixed_clks, + mtk_clk_register_gates(&pdev->dev, node, apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); @@ -784,8 +784,8 @@ static int clk_mt6765_top_probe(struct platform_device *pdev) clk_data); mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), node, &mt6765_clk_lock, clk_data); - mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, top_clks, + ARRAY_SIZE(top_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); @@ -820,8 +820,8 @@ static int clk_mt6765_ifr_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - mtk_clk_register_gates(node, ifr_clks, ARRAY_SIZE(ifr_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, ifr_clks, + ARRAY_SIZE(ifr_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt6779-mm.c b/drivers/clk/mediatek/clk-mt6779-mm.c index eda8cbee3d23..2cccf62d3b36 100644 --- a/drivers/clk/mediatek/clk-mt6779-mm.c +++ b/drivers/clk/mediatek/clk-mt6779-mm.c @@ -93,8 +93,8 @@ static int clk_mt6779_mm_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_MM_NR_CLK); - mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, mm_clks, + ARRAY_SIZE(mm_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } diff --git a/drivers/clk/mediatek/clk-mt6779.c b/drivers/clk/mediatek/clk-mt6779.c index 39dadc954708..5a396d2464ce 100644 --- a/drivers/clk/mediatek/clk-mt6779.c +++ b/drivers/clk/mediatek/clk-mt6779.c @@ -1223,7 +1223,7 @@ static int clk_mt6779_apmixed_probe(struct platform_device *pdev) mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - mtk_clk_register_gates(node, apmixed_clks, + mtk_clk_register_gates(&pdev->dev, node, apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); @@ -1267,8 +1267,8 @@ static int clk_mt6779_infra_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_INFRA_NR_CLK); - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } diff --git a/drivers/clk/mediatek/clk-mt6795-infracfg.c b/drivers/clk/mediatek/clk-mt6795-infracfg.c index df7eed6e071e..8025d171d692 100644 --- a/drivers/clk/mediatek/clk-mt6795-infracfg.c +++ b/drivers/clk/mediatek/clk-mt6795-infracfg.c @@ -101,7 +101,8 @@ static int clk_mt6795_infracfg_probe(struct platform_device *pdev) if (ret) goto free_clk_data; - ret = mtk_clk_register_gates(node, infra_gates, ARRAY_SIZE(infra_gates), clk_data); + ret = mtk_clk_register_gates(&pdev->dev, node, infra_gates, + ARRAY_SIZE(infra_gates), clk_data); if (ret) goto free_clk_data; diff --git a/drivers/clk/mediatek/clk-mt6795-mm.c b/drivers/clk/mediatek/clk-mt6795-mm.c index fd73f202f292..eebb6143ada2 100644 --- a/drivers/clk/mediatek/clk-mt6795-mm.c +++ b/drivers/clk/mediatek/clk-mt6795-mm.c @@ -87,7 +87,8 @@ static int clk_mt6795_mm_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - ret = mtk_clk_register_gates(node, mm_gates, ARRAY_SIZE(mm_gates), clk_data); + ret = mtk_clk_register_gates(&pdev->dev, node, mm_gates, + ARRAY_SIZE(mm_gates), clk_data); if (ret) goto free_clk_data; diff --git a/drivers/clk/mediatek/clk-mt6795-pericfg.c b/drivers/clk/mediatek/clk-mt6795-pericfg.c index cb28d35dad59..f69e715e0c1f 100644 --- a/drivers/clk/mediatek/clk-mt6795-pericfg.c +++ b/drivers/clk/mediatek/clk-mt6795-pericfg.c @@ -109,7 +109,8 @@ static int clk_mt6795_pericfg_probe(struct platform_device *pdev) if (ret) goto free_clk_data; - ret = mtk_clk_register_gates(node, peri_gates, ARRAY_SIZE(peri_gates), clk_data); + ret = mtk_clk_register_gates(&pdev->dev, node, peri_gates, + ARRAY_SIZE(peri_gates), clk_data); if (ret) goto free_clk_data; diff --git a/drivers/clk/mediatek/clk-mt6797-mm.c b/drivers/clk/mediatek/clk-mt6797-mm.c index 99a63f46642f..d5e9fe445e30 100644 --- a/drivers/clk/mediatek/clk-mt6797-mm.c +++ b/drivers/clk/mediatek/clk-mt6797-mm.c @@ -89,8 +89,8 @@ static int clk_mt6797_mm_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_MM_NR); - mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, mm_clks, + ARRAY_SIZE(mm_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt6797.c b/drivers/clk/mediatek/clk-mt6797.c index b362e99c8f53..29211744b173 100644 --- a/drivers/clk/mediatek/clk-mt6797.c +++ b/drivers/clk/mediatek/clk-mt6797.c @@ -584,8 +584,8 @@ static int mtk_infrasys_init(struct platform_device *pdev) } } - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - infra_clk_data); + mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), infra_clk_data); mtk_clk_register_factors(infra_fixed_divs, ARRAY_SIZE(infra_fixed_divs), infra_clk_data); diff --git a/drivers/clk/mediatek/clk-mt7622-aud.c b/drivers/clk/mediatek/clk-mt7622-aud.c index b17731fa1144..e9070d0bea8d 100644 --- a/drivers/clk/mediatek/clk-mt7622-aud.c +++ b/drivers/clk/mediatek/clk-mt7622-aud.c @@ -114,8 +114,8 @@ static int clk_mt7622_audiosys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_AUDIO_NR_CLK); - mtk_clk_register_gates(node, audio_clks, ARRAY_SIZE(audio_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, audio_clks, + ARRAY_SIZE(audio_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) { diff --git a/drivers/clk/mediatek/clk-mt7622-eth.c b/drivers/clk/mediatek/clk-mt7622-eth.c index a60190e83418..ece0f7a7c5f6 100644 --- a/drivers/clk/mediatek/clk-mt7622-eth.c +++ b/drivers/clk/mediatek/clk-mt7622-eth.c @@ -69,8 +69,8 @@ static int clk_mt7622_ethsys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_ETH_NR_CLK); - mtk_clk_register_gates(node, eth_clks, ARRAY_SIZE(eth_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, eth_clks, + ARRAY_SIZE(eth_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -91,8 +91,8 @@ static int clk_mt7622_sgmiisys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_SGMII_NR_CLK); - mtk_clk_register_gates(node, sgmii_clks, ARRAY_SIZE(sgmii_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, sgmii_clks, + ARRAY_SIZE(sgmii_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt7622-hif.c b/drivers/clk/mediatek/clk-mt7622-hif.c index 55baa6d06a20..c57ac2273c4e 100644 --- a/drivers/clk/mediatek/clk-mt7622-hif.c +++ b/drivers/clk/mediatek/clk-mt7622-hif.c @@ -80,8 +80,8 @@ static int clk_mt7622_ssusbsys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_SSUSB_NR_CLK); - mtk_clk_register_gates(node, ssusb_clks, ARRAY_SIZE(ssusb_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, ssusb_clks, + ARRAY_SIZE(ssusb_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -102,8 +102,8 @@ static int clk_mt7622_pciesys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_PCIE_NR_CLK); - mtk_clk_register_gates(node, pcie_clks, ARRAY_SIZE(pcie_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, pcie_clks, + ARRAY_SIZE(pcie_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt7622.c b/drivers/clk/mediatek/clk-mt7622.c index eebbb8790693..bba88018f056 100644 --- a/drivers/clk/mediatek/clk-mt7622.c +++ b/drivers/clk/mediatek/clk-mt7622.c @@ -621,8 +621,8 @@ static int mtk_topckgen_init(struct platform_device *pdev) mtk_clk_register_dividers(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt7622_clk_lock, clk_data); - mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, top_clks, + ARRAY_SIZE(top_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } @@ -635,8 +635,8 @@ static int mtk_infrasys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_INFRA_NR_CLK); - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); mtk_clk_register_cpumuxes(node, infra_muxes, ARRAY_SIZE(infra_muxes), clk_data); @@ -663,7 +663,7 @@ static int mtk_apmixedsys_init(struct platform_device *pdev) mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - mtk_clk_register_gates(node, apmixed_clks, + mtk_clk_register_gates(&pdev->dev, node, apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); @@ -682,8 +682,8 @@ static int mtk_pericfg_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_PERI_NR_CLK); - mtk_clk_register_gates(node, peri_clks, ARRAY_SIZE(peri_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, peri_clks, + ARRAY_SIZE(peri_clks), clk_data); mtk_clk_register_composites(peri_muxes, ARRAY_SIZE(peri_muxes), base, &mt7622_clk_lock, clk_data); diff --git a/drivers/clk/mediatek/clk-mt7629-eth.c b/drivers/clk/mediatek/clk-mt7629-eth.c index e1d2635c72c1..eab838af6d41 100644 --- a/drivers/clk/mediatek/clk-mt7629-eth.c +++ b/drivers/clk/mediatek/clk-mt7629-eth.c @@ -82,7 +82,8 @@ static int clk_mt7629_ethsys_init(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - mtk_clk_register_gates(node, eth_clks, CLK_ETH_NR_CLK, clk_data); + mtk_clk_register_gates(&pdev->dev, node, eth_clks, + CLK_ETH_NR_CLK, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -106,8 +107,8 @@ static int clk_mt7629_sgmiisys_init(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - mtk_clk_register_gates(node, sgmii_clks[id++], CLK_SGMII_NR_CLK, - clk_data); + mtk_clk_register_gates(&pdev->dev, node, sgmii_clks[id++], + CLK_SGMII_NR_CLK, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt7629-hif.c b/drivers/clk/mediatek/clk-mt7629-hif.c index 3628811a2f57..804900792e49 100644 --- a/drivers/clk/mediatek/clk-mt7629-hif.c +++ b/drivers/clk/mediatek/clk-mt7629-hif.c @@ -75,8 +75,8 @@ static int clk_mt7629_ssusbsys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_SSUSB_NR_CLK); - mtk_clk_register_gates(node, ssusb_clks, ARRAY_SIZE(ssusb_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, ssusb_clks, + ARRAY_SIZE(ssusb_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -97,8 +97,8 @@ static int clk_mt7629_pciesys_init(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_PCIE_NR_CLK); - mtk_clk_register_gates(node, pcie_clks, ARRAY_SIZE(pcie_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, pcie_clks, + ARRAY_SIZE(pcie_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt7629.c b/drivers/clk/mediatek/clk-mt7629.c index 01ee45fcd7e3..c0cdaf024296 100644 --- a/drivers/clk/mediatek/clk-mt7629.c +++ b/drivers/clk/mediatek/clk-mt7629.c @@ -585,8 +585,8 @@ static int mtk_infrasys_init(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); mtk_clk_register_cpumuxes(node, infra_muxes, ARRAY_SIZE(infra_muxes), clk_data); @@ -610,8 +610,8 @@ static int mtk_pericfg_init(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - mtk_clk_register_gates(node, peri_clks, ARRAY_SIZE(peri_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, peri_clks, + ARRAY_SIZE(peri_clks), clk_data); mtk_clk_register_composites(peri_muxes, ARRAY_SIZE(peri_muxes), base, &mt7629_clk_lock, clk_data); @@ -637,7 +637,7 @@ static int mtk_apmixedsys_init(struct platform_device *pdev) mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - mtk_clk_register_gates(node, apmixed_clks, + mtk_clk_register_gates(&pdev->dev, node, apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data); clk_prepare_enable(clk_data->hws[CLK_APMIXED_ARMPLL]->clk); diff --git a/drivers/clk/mediatek/clk-mt7986-eth.c b/drivers/clk/mediatek/clk-mt7986-eth.c index c21e1d672384..e04bc6845ea6 100644 --- a/drivers/clk/mediatek/clk-mt7986-eth.c +++ b/drivers/clk/mediatek/clk-mt7986-eth.c @@ -72,8 +72,8 @@ static void __init mtk_sgmiisys_0_init(struct device_node *node) clk_data = mtk_alloc_clk_data(ARRAY_SIZE(sgmii0_clks)); - mtk_clk_register_gates(node, sgmii0_clks, ARRAY_SIZE(sgmii0_clks), - clk_data); + mtk_clk_register_gates(NULL, node, sgmii0_clks, + ARRAY_SIZE(sgmii0_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -90,8 +90,8 @@ static void __init mtk_sgmiisys_1_init(struct device_node *node) clk_data = mtk_alloc_clk_data(ARRAY_SIZE(sgmii1_clks)); - mtk_clk_register_gates(node, sgmii1_clks, ARRAY_SIZE(sgmii1_clks), - clk_data); + mtk_clk_register_gates(NULL, node, sgmii1_clks, + ARRAY_SIZE(sgmii1_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); @@ -109,7 +109,7 @@ static void __init mtk_ethsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(ARRAY_SIZE(eth_clks)); - mtk_clk_register_gates(node, eth_clks, ARRAY_SIZE(eth_clks), clk_data); + mtk_clk_register_gates(NULL, node, eth_clks, ARRAY_SIZE(eth_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt7986-infracfg.c b/drivers/clk/mediatek/clk-mt7986-infracfg.c index 74e68a719730..578f150e0ee5 100644 --- a/drivers/clk/mediatek/clk-mt7986-infracfg.c +++ b/drivers/clk/mediatek/clk-mt7986-infracfg.c @@ -180,8 +180,8 @@ static int clk_mt7986_infracfg_probe(struct platform_device *pdev) mtk_clk_register_factors(infra_divs, ARRAY_SIZE(infra_divs), clk_data); mtk_clk_register_muxes(infra_muxes, ARRAY_SIZE(infra_muxes), node, &mt7986_clk_lock, clk_data); - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) { diff --git a/drivers/clk/mediatek/clk-mt8135.c b/drivers/clk/mediatek/clk-mt8135.c index 3ea06d2ec2f1..8137cf225272 100644 --- a/drivers/clk/mediatek/clk-mt8135.c +++ b/drivers/clk/mediatek/clk-mt8135.c @@ -553,8 +553,8 @@ static void __init mtk_infrasys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_INFRA_NR_CLK); - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data); + mtk_clk_register_gates(NULL, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -579,8 +579,8 @@ static void __init mtk_pericfg_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_PERI_NR_CLK); - mtk_clk_register_gates(node, peri_gates, ARRAY_SIZE(peri_gates), - clk_data); + mtk_clk_register_gates(NULL, node, peri_gates, + ARRAY_SIZE(peri_gates), clk_data); mtk_clk_register_composites(peri_clks, ARRAY_SIZE(peri_clks), base, &mt8135_clk_lock, clk_data); diff --git a/drivers/clk/mediatek/clk-mt8167-aud.c b/drivers/clk/mediatek/clk-mt8167-aud.c index b5ac196cd945..47a7d89d5777 100644 --- a/drivers/clk/mediatek/clk-mt8167-aud.c +++ b/drivers/clk/mediatek/clk-mt8167-aud.c @@ -50,7 +50,7 @@ static void __init mtk_audsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_AUD_NR_CLK); - mtk_clk_register_gates(node, aud_clks, ARRAY_SIZE(aud_clks), clk_data); + mtk_clk_register_gates(NULL, node, aud_clks, ARRAY_SIZE(aud_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt8167-img.c b/drivers/clk/mediatek/clk-mt8167-img.c index 4e7c0772b4f9..e196b3b894a1 100644 --- a/drivers/clk/mediatek/clk-mt8167-img.c +++ b/drivers/clk/mediatek/clk-mt8167-img.c @@ -42,7 +42,7 @@ static void __init mtk_imgsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_IMG_NR_CLK); - mtk_clk_register_gates(node, img_clks, ARRAY_SIZE(img_clks), clk_data); + mtk_clk_register_gates(NULL, node, img_clks, ARRAY_SIZE(img_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt8167-mfgcfg.c b/drivers/clk/mediatek/clk-mt8167-mfgcfg.c index 192714498b2e..602d25f4cb2e 100644 --- a/drivers/clk/mediatek/clk-mt8167-mfgcfg.c +++ b/drivers/clk/mediatek/clk-mt8167-mfgcfg.c @@ -40,7 +40,7 @@ static void __init mtk_mfgcfg_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_MFG_NR_CLK); - mtk_clk_register_gates(node, mfg_clks, ARRAY_SIZE(mfg_clks), clk_data); + mtk_clk_register_gates(NULL, node, mfg_clks, ARRAY_SIZE(mfg_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt8167-mm.c b/drivers/clk/mediatek/clk-mt8167-mm.c index a94961b7b8cc..abc70e1221bf 100644 --- a/drivers/clk/mediatek/clk-mt8167-mm.c +++ b/drivers/clk/mediatek/clk-mt8167-mm.c @@ -98,8 +98,8 @@ static int clk_mt8167_mm_probe(struct platform_device *pdev) data = &mt8167_mmsys_driver_data; - ret = mtk_clk_register_gates(node, data->gates_clk, data->gates_num, - clk_data); + ret = mtk_clk_register_gates(&pdev->dev, node, data->gates_clk, + data->gates_num, clk_data); if (ret) return ret; diff --git a/drivers/clk/mediatek/clk-mt8167-vdec.c b/drivers/clk/mediatek/clk-mt8167-vdec.c index 38f0ba357d59..92bc05d99798 100644 --- a/drivers/clk/mediatek/clk-mt8167-vdec.c +++ b/drivers/clk/mediatek/clk-mt8167-vdec.c @@ -49,7 +49,8 @@ static void __init mtk_vdecsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_VDEC_NR_CLK); - mtk_clk_register_gates(node, vdec_clks, ARRAY_SIZE(vdec_clks), clk_data); + mtk_clk_register_gates(NULL, node, vdec_clks, ARRAY_SIZE(vdec_clks), + clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt8167.c b/drivers/clk/mediatek/clk-mt8167.c index f900ac4bf7b8..59fe82ba5c7a 100644 --- a/drivers/clk/mediatek/clk-mt8167.c +++ b/drivers/clk/mediatek/clk-mt8167.c @@ -937,7 +937,7 @@ static void __init mtk_topckgen_init(struct device_node *node) mtk_clk_register_fixed_clks(fixed_clks, ARRAY_SIZE(fixed_clks), clk_data); - mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), clk_data); + mtk_clk_register_gates(NULL, node, top_clks, ARRAY_SIZE(top_clks), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, diff --git a/drivers/clk/mediatek/clk-mt8173-mm.c b/drivers/clk/mediatek/clk-mt8173-mm.c index 5826eabdc9c7..444a3d58c8bf 100644 --- a/drivers/clk/mediatek/clk-mt8173-mm.c +++ b/drivers/clk/mediatek/clk-mt8173-mm.c @@ -112,8 +112,8 @@ static int clk_mt8173_mm_probe(struct platform_device *pdev) data = &mt8173_mmsys_driver_data; - ret = mtk_clk_register_gates(node, data->gates_clk, data->gates_num, - clk_data); + ret = mtk_clk_register_gates(&pdev->dev, node, data->gates_clk, + data->gates_num, clk_data); if (ret) return ret; diff --git a/drivers/clk/mediatek/clk-mt8173.c b/drivers/clk/mediatek/clk-mt8173.c index b8529ee7199d..74ed7dd129f4 100644 --- a/drivers/clk/mediatek/clk-mt8173.c +++ b/drivers/clk/mediatek/clk-mt8173.c @@ -888,8 +888,8 @@ static void __init mtk_infrasys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_INFRA_NR_CLK); - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data); + mtk_clk_register_gates(NULL, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); mtk_clk_register_factors(infra_divs, ARRAY_SIZE(infra_divs), clk_data); mtk_clk_register_cpumuxes(node, cpu_muxes, ARRAY_SIZE(cpu_muxes), @@ -918,8 +918,8 @@ static void __init mtk_pericfg_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_PERI_NR_CLK); - mtk_clk_register_gates(node, peri_gates, ARRAY_SIZE(peri_gates), - clk_data); + mtk_clk_register_gates(NULL, node, peri_gates, + ARRAY_SIZE(peri_gates), clk_data); mtk_clk_register_composites(peri_clks, ARRAY_SIZE(peri_clks), base, &mt8173_clk_lock, clk_data); @@ -1062,8 +1062,8 @@ static void __init mtk_imgsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_IMG_NR_CLK); - mtk_clk_register_gates(node, img_clks, ARRAY_SIZE(img_clks), - clk_data); + mtk_clk_register_gates(NULL, node, img_clks, + ARRAY_SIZE(img_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); @@ -1080,8 +1080,8 @@ static void __init mtk_vdecsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_VDEC_NR_CLK); - mtk_clk_register_gates(node, vdec_clks, ARRAY_SIZE(vdec_clks), - clk_data); + mtk_clk_register_gates(NULL, node, vdec_clks, + ARRAY_SIZE(vdec_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -1097,8 +1097,8 @@ static void __init mtk_vencsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_VENC_NR_CLK); - mtk_clk_register_gates(node, venc_clks, ARRAY_SIZE(venc_clks), - clk_data); + mtk_clk_register_gates(NULL, node, venc_clks, + ARRAY_SIZE(venc_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -1114,8 +1114,8 @@ static void __init mtk_vencltsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_VENCLT_NR_CLK); - mtk_clk_register_gates(node, venclt_clks, ARRAY_SIZE(venclt_clks), - clk_data); + mtk_clk_register_gates(NULL, node, venclt_clks, + ARRAY_SIZE(venclt_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt8183-audio.c b/drivers/clk/mediatek/clk-mt8183-audio.c index b2d7746eddbe..f358a6e7a340 100644 --- a/drivers/clk/mediatek/clk-mt8183-audio.c +++ b/drivers/clk/mediatek/clk-mt8183-audio.c @@ -75,8 +75,8 @@ static int clk_mt8183_audio_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_AUDIO_NR_CLK); - mtk_clk_register_gates(node, audio_clks, ARRAY_SIZE(audio_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, audio_clks, + ARRAY_SIZE(audio_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt8183-mm.c b/drivers/clk/mediatek/clk-mt8183-mm.c index 11ecc6fb0065..358031530913 100644 --- a/drivers/clk/mediatek/clk-mt8183-mm.c +++ b/drivers/clk/mediatek/clk-mt8183-mm.c @@ -90,8 +90,8 @@ static int clk_mt8183_mm_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_MM_NR_CLK); - mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, mm_clks, + ARRAY_SIZE(mm_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } diff --git a/drivers/clk/mediatek/clk-mt8183.c b/drivers/clk/mediatek/clk-mt8183.c index 1860a35a723a..ba0d6ba10b35 100644 --- a/drivers/clk/mediatek/clk-mt8183.c +++ b/drivers/clk/mediatek/clk-mt8183.c @@ -1172,8 +1172,8 @@ static int clk_mt8183_apmixed_probe(struct platform_device *pdev) mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - mtk_clk_register_gates(node, apmixed_clks, ARRAY_SIZE(apmixed_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, apmixed_clks, + ARRAY_SIZE(apmixed_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } @@ -1247,8 +1247,8 @@ static int clk_mt8183_top_probe(struct platform_device *pdev) mtk_clk_register_composites(top_aud_divs, ARRAY_SIZE(top_aud_divs), base, &mt8183_clk_lock, top_clk_data); - mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), - top_clk_data); + mtk_clk_register_gates(&pdev->dev, node, top_clks, + ARRAY_SIZE(top_clks), top_clk_data); ret = clk_mt8183_reg_mfg_mux_notifier(&pdev->dev, top_clk_data->hws[CLK_TOP_MUX_MFG]->clk); @@ -1267,8 +1267,8 @@ static int clk_mt8183_infra_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_INFRA_NR_CLK); - mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) { @@ -1290,8 +1290,8 @@ static int clk_mt8183_peri_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_PERI_NR_CLK); - mtk_clk_register_gates(node, peri_clks, ARRAY_SIZE(peri_clks), - clk_data); + mtk_clk_register_gates(&pdev->dev, node, peri_clks, + ARRAY_SIZE(peri_clks), clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } diff --git a/drivers/clk/mediatek/clk-mt8186-mm.c b/drivers/clk/mediatek/clk-mt8186-mm.c index 1d33be407947..0b72607777fa 100644 --- a/drivers/clk/mediatek/clk-mt8186-mm.c +++ b/drivers/clk/mediatek/clk-mt8186-mm.c @@ -69,7 +69,8 @@ static int clk_mt8186_mm_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks), clk_data); + r = mtk_clk_register_gates(&pdev->dev, node, mm_clks, + ARRAY_SIZE(mm_clks), clk_data); if (r) goto free_mm_data; diff --git a/drivers/clk/mediatek/clk-mt8192-aud.c b/drivers/clk/mediatek/clk-mt8192-aud.c index 8c989bffd8c7..f524188fe4c2 100644 --- a/drivers/clk/mediatek/clk-mt8192-aud.c +++ b/drivers/clk/mediatek/clk-mt8192-aud.c @@ -87,7 +87,8 @@ static int clk_mt8192_aud_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates(node, aud_clks, ARRAY_SIZE(aud_clks), clk_data); + r = mtk_clk_register_gates(&pdev->dev, node, aud_clks, + ARRAY_SIZE(aud_clks), clk_data); if (r) return r; diff --git a/drivers/clk/mediatek/clk-mt8192-mm.c b/drivers/clk/mediatek/clk-mt8192-mm.c index 1be3ff4d407d..e9eb4cf8349a 100644 --- a/drivers/clk/mediatek/clk-mt8192-mm.c +++ b/drivers/clk/mediatek/clk-mt8192-mm.c @@ -91,7 +91,8 @@ static int clk_mt8192_mm_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks), clk_data); + r = mtk_clk_register_gates(&pdev->dev, node, mm_clks, + ARRAY_SIZE(mm_clks), clk_data); if (r) return r; diff --git a/drivers/clk/mediatek/clk-mt8192.c b/drivers/clk/mediatek/clk-mt8192.c index 508af9bbcc46..ac1eee513649 100644 --- a/drivers/clk/mediatek/clk-mt8192.c +++ b/drivers/clk/mediatek/clk-mt8192.c @@ -1127,8 +1127,8 @@ static int clk_mt8192_top_probe(struct platform_device *pdev) if (r) goto unregister_top_composites; - r = mtk_clk_register_gates_with_dev(node, top_clks, ARRAY_SIZE(top_clks), - top_clk_data, &pdev->dev); + r = mtk_clk_register_gates(&pdev->dev, node, top_clks, + ARRAY_SIZE(top_clks), top_clk_data); if (r) goto unregister_adj_divs_composites; @@ -1171,8 +1171,8 @@ static int clk_mt8192_infra_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates_with_dev(node, infra_clks, ARRAY_SIZE(infra_clks), - clk_data, &pdev->dev); + r = mtk_clk_register_gates(&pdev->dev, node, infra_clks, + ARRAY_SIZE(infra_clks), clk_data); if (r) goto free_clk_data; @@ -1203,8 +1203,8 @@ static int clk_mt8192_peri_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates_with_dev(node, peri_clks, ARRAY_SIZE(peri_clks), - clk_data, &pdev->dev); + r = mtk_clk_register_gates(&pdev->dev, node, peri_clks, + ARRAY_SIZE(peri_clks), clk_data); if (r) goto free_clk_data; @@ -1232,9 +1232,8 @@ static int clk_mt8192_apmixed_probe(struct platform_device *pdev) return -ENOMEM; mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - r = mtk_clk_register_gates_with_dev(node, apmixed_clks, - ARRAY_SIZE(apmixed_clks), clk_data, - &pdev->dev); + r = mtk_clk_register_gates(&pdev->dev, node, apmixed_clks, + ARRAY_SIZE(apmixed_clks), clk_data); if (r) goto free_clk_data; diff --git a/drivers/clk/mediatek/clk-mt8195-apmixedsys.c b/drivers/clk/mediatek/clk-mt8195-apmixedsys.c index 0dfed6ec4d15..1bc917f2667e 100644 --- a/drivers/clk/mediatek/clk-mt8195-apmixedsys.c +++ b/drivers/clk/mediatek/clk-mt8195-apmixedsys.c @@ -124,7 +124,8 @@ static int clk_mt8195_apmixed_probe(struct platform_device *pdev) if (r) goto free_apmixed_data; - r = mtk_clk_register_gates(node, apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data); + r = mtk_clk_register_gates(&pdev->dev, node, apmixed_clks, + ARRAY_SIZE(apmixed_clks), clk_data); if (r) goto unregister_plls; diff --git a/drivers/clk/mediatek/clk-mt8195-topckgen.c b/drivers/clk/mediatek/clk-mt8195-topckgen.c index 1e016329c1d2..e6e0298d6449 100644 --- a/drivers/clk/mediatek/clk-mt8195-topckgen.c +++ b/drivers/clk/mediatek/clk-mt8195-topckgen.c @@ -1286,7 +1286,8 @@ static int clk_mt8195_topck_probe(struct platform_device *pdev) if (r) goto unregister_muxes; - r = mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), top_clk_data); + r = mtk_clk_register_gates(&pdev->dev, node, top_clks, + ARRAY_SIZE(top_clks), top_clk_data); if (r) goto unregister_composite_divs; diff --git a/drivers/clk/mediatek/clk-mt8195-vdo0.c b/drivers/clk/mediatek/clk-mt8195-vdo0.c index 07b46bfd5040..839b730688ac 100644 --- a/drivers/clk/mediatek/clk-mt8195-vdo0.c +++ b/drivers/clk/mediatek/clk-mt8195-vdo0.c @@ -104,7 +104,8 @@ static int clk_mt8195_vdo0_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates(node, vdo0_clks, ARRAY_SIZE(vdo0_clks), clk_data); + r = mtk_clk_register_gates(&pdev->dev, node, vdo0_clks, + ARRAY_SIZE(vdo0_clks), clk_data); if (r) goto free_vdo0_data; diff --git a/drivers/clk/mediatek/clk-mt8195-vdo1.c b/drivers/clk/mediatek/clk-mt8195-vdo1.c index 835335b9d87b..7df695b28925 100644 --- a/drivers/clk/mediatek/clk-mt8195-vdo1.c +++ b/drivers/clk/mediatek/clk-mt8195-vdo1.c @@ -131,7 +131,8 @@ static int clk_mt8195_vdo1_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates(node, vdo1_clks, ARRAY_SIZE(vdo1_clks), clk_data); + r = mtk_clk_register_gates(&pdev->dev, node, vdo1_clks, + ARRAY_SIZE(vdo1_clks), clk_data); if (r) goto free_vdo1_data; diff --git a/drivers/clk/mediatek/clk-mt8365-mm.c b/drivers/clk/mediatek/clk-mt8365-mm.c index 5c8bf18ab1f1..22c75a03a645 100644 --- a/drivers/clk/mediatek/clk-mt8365-mm.c +++ b/drivers/clk/mediatek/clk-mt8365-mm.c @@ -81,9 +81,8 @@ static int clk_mt8365_mm_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_MM_NR_CLK); - ret = mtk_clk_register_gates_with_dev(node, mm_clks, - ARRAY_SIZE(mm_clks), clk_data, - dev); + ret = mtk_clk_register_gates(dev, node, mm_clks, + ARRAY_SIZE(mm_clks), clk_data); if (ret) goto err_free_clk_data; diff --git a/drivers/clk/mediatek/clk-mt8365.c b/drivers/clk/mediatek/clk-mt8365.c index adfecb618f10..b30cbeae1c3d 100644 --- a/drivers/clk/mediatek/clk-mt8365.c +++ b/drivers/clk/mediatek/clk-mt8365.c @@ -1019,8 +1019,8 @@ static int clk_mt8365_infra_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - ret = mtk_clk_register_gates(node, ifr_clks, ARRAY_SIZE(ifr_clks), - clk_data); + ret = mtk_clk_register_gates(&pdev->dev, node, ifr_clks, + ARRAY_SIZE(ifr_clks), clk_data); if (ret) goto free_clk_data; diff --git a/drivers/clk/mediatek/clk-mt8516-aud.c b/drivers/clk/mediatek/clk-mt8516-aud.c index a3dafc719799..a6ae8003b9ff 100644 --- a/drivers/clk/mediatek/clk-mt8516-aud.c +++ b/drivers/clk/mediatek/clk-mt8516-aud.c @@ -48,7 +48,7 @@ static void __init mtk_audsys_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_AUD_NR_CLK); - mtk_clk_register_gates(node, aud_clks, ARRAY_SIZE(aud_clks), clk_data); + mtk_clk_register_gates(NULL, node, aud_clks, ARRAY_SIZE(aud_clks), clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt8516.c b/drivers/clk/mediatek/clk-mt8516.c index 056953d594c6..bde0b8c761d4 100644 --- a/drivers/clk/mediatek/clk-mt8516.c +++ b/drivers/clk/mediatek/clk-mt8516.c @@ -655,7 +655,7 @@ static void __init mtk_topckgen_init(struct device_node *node) mtk_clk_register_fixed_clks(fixed_clks, ARRAY_SIZE(fixed_clks), clk_data); - mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), clk_data); + mtk_clk_register_gates(NULL, node, top_clks, ARRAY_SIZE(top_clks), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c index d31f01d0ba1c..6123b234d3c3 100644 --- a/drivers/clk/mediatek/clk-mtk.c +++ b/drivers/clk/mediatek/clk-mtk.c @@ -459,8 +459,8 @@ int mtk_clk_simple_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates_with_dev(node, mcd->clks, mcd->num_clks, - clk_data, &pdev->dev); + r = mtk_clk_register_gates(&pdev->dev, node, mcd->clks, mcd->num_clks, + clk_data); if (r) goto free_data; From 6f5f72a684a2823f21efbfd20c7e4b528c44a781 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 20 Jan 2023 10:20:35 +0100 Subject: [PATCH 381/553] clk: mediatek: clk-mtk: Propagate struct device for composites [ Upstream commit 01a6c1ab57c3a474c8d23c7d82c3fcce85f62612 ] Like done for cpumux clocks, propagate struct device for composite clocks registered through clk-mtk helpers to be able to get runtime pm support for MTK clocks. Signed-off-by: AngeloGioacchino Del Regno Tested-by: Miles Chen Link: https://lore.kernel.org/r/20230120092053.182923-6-angelogioacchino.delregno@collabora.com Tested-by: Mingming Su Signed-off-by: Stephen Boyd Stable-dep-of: 2f7b1d8b5505 ("clk: mediatek: Do a runtime PM get on controllers during probe") Signed-off-by: Sasha Levin --- drivers/clk/mediatek/clk-mt2701.c | 10 ++++++---- drivers/clk/mediatek/clk-mt2712.c | 12 ++++++++---- drivers/clk/mediatek/clk-mt6779.c | 10 ++++++---- drivers/clk/mediatek/clk-mt6795-pericfg.c | 3 ++- drivers/clk/mediatek/clk-mt6795-topckgen.c | 3 ++- drivers/clk/mediatek/clk-mt6797.c | 3 ++- drivers/clk/mediatek/clk-mt7622.c | 8 +++++--- drivers/clk/mediatek/clk-mt7629.c | 8 +++++--- drivers/clk/mediatek/clk-mt8135.c | 10 ++++++---- drivers/clk/mediatek/clk-mt8167.c | 10 ++++++---- drivers/clk/mediatek/clk-mt8173.c | 10 ++++++---- drivers/clk/mediatek/clk-mt8183.c | 15 +++++++++------ drivers/clk/mediatek/clk-mt8186-mcu.c | 3 ++- drivers/clk/mediatek/clk-mt8186-topckgen.c | 6 ++++-- drivers/clk/mediatek/clk-mt8192.c | 6 ++++-- drivers/clk/mediatek/clk-mt8195-topckgen.c | 3 ++- drivers/clk/mediatek/clk-mt8365.c | 7 ++++--- drivers/clk/mediatek/clk-mt8516.c | 10 ++++++---- drivers/clk/mediatek/clk-mtk.c | 11 ++++++----- drivers/clk/mediatek/clk-mtk.h | 3 ++- 20 files changed, 93 insertions(+), 58 deletions(-) diff --git a/drivers/clk/mediatek/clk-mt2701.c b/drivers/clk/mediatek/clk-mt2701.c index c7510f7ba4cc..e80fe9c942ee 100644 --- a/drivers/clk/mediatek/clk-mt2701.c +++ b/drivers/clk/mediatek/clk-mt2701.c @@ -679,8 +679,9 @@ static int mtk_topckgen_init(struct platform_device *pdev) mtk_clk_register_factors(top_fixed_divs, ARRAY_SIZE(top_fixed_divs), clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), - base, &mt2701_clk_lock, clk_data); + mtk_clk_register_composites(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), base, + &mt2701_clk_lock, clk_data); mtk_clk_register_dividers(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt2701_clk_lock, clk_data); @@ -905,8 +906,9 @@ static int mtk_pericfg_init(struct platform_device *pdev) mtk_clk_register_gates(&pdev->dev, node, peri_clks, ARRAY_SIZE(peri_clks), clk_data); - mtk_clk_register_composites(peri_muxs, ARRAY_SIZE(peri_muxs), base, - &mt2701_clk_lock, clk_data); + mtk_clk_register_composites(&pdev->dev, peri_muxs, + ARRAY_SIZE(peri_muxs), base, + &mt2701_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt2712.c b/drivers/clk/mediatek/clk-mt2712.c index 78ebb4f2335c..a0f0c9ed48d1 100644 --- a/drivers/clk/mediatek/clk-mt2712.c +++ b/drivers/clk/mediatek/clk-mt2712.c @@ -1320,8 +1320,9 @@ static int clk_mt2712_top_probe(struct platform_device *pdev) mtk_clk_register_factors(top_early_divs, ARRAY_SIZE(top_early_divs), top_clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), top_clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, - &mt2712_clk_lock, top_clk_data); + mtk_clk_register_composites(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), base, + &mt2712_clk_lock, top_clk_data); mtk_clk_register_dividers(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt2712_clk_lock, top_clk_data); mtk_clk_register_gates(&pdev->dev, node, top_clks, @@ -1395,8 +1396,11 @@ static int clk_mt2712_mcu_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_MCU_NR_CLK); - mtk_clk_register_composites(mcu_muxes, ARRAY_SIZE(mcu_muxes), base, - &mt2712_clk_lock, clk_data); + r = mtk_clk_register_composites(&pdev->dev, mcu_muxes, + ARRAY_SIZE(mcu_muxes), base, + &mt2712_clk_lock, clk_data); + if (r) + dev_err(&pdev->dev, "Could not register composites: %d\n", r); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt6779.c b/drivers/clk/mediatek/clk-mt6779.c index 5a396d2464ce..2c20e40d7c80 100644 --- a/drivers/clk/mediatek/clk-mt6779.c +++ b/drivers/clk/mediatek/clk-mt6779.c @@ -1251,11 +1251,13 @@ static int clk_mt6779_top_probe(struct platform_device *pdev) mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), node, &mt6779_clk_lock, clk_data); - mtk_clk_register_composites(top_aud_muxes, ARRAY_SIZE(top_aud_muxes), - base, &mt6779_clk_lock, clk_data); + mtk_clk_register_composites(&pdev->dev, top_aud_muxes, + ARRAY_SIZE(top_aud_muxes), base, + &mt6779_clk_lock, clk_data); - mtk_clk_register_composites(top_aud_divs, ARRAY_SIZE(top_aud_divs), - base, &mt6779_clk_lock, clk_data); + mtk_clk_register_composites(&pdev->dev, top_aud_divs, + ARRAY_SIZE(top_aud_divs), base, + &mt6779_clk_lock, clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } diff --git a/drivers/clk/mediatek/clk-mt6795-pericfg.c b/drivers/clk/mediatek/clk-mt6795-pericfg.c index f69e715e0c1f..08aaa9b09c36 100644 --- a/drivers/clk/mediatek/clk-mt6795-pericfg.c +++ b/drivers/clk/mediatek/clk-mt6795-pericfg.c @@ -114,7 +114,8 @@ static int clk_mt6795_pericfg_probe(struct platform_device *pdev) if (ret) goto free_clk_data; - ret = mtk_clk_register_composites(peri_clks, ARRAY_SIZE(peri_clks), base, + ret = mtk_clk_register_composites(&pdev->dev, peri_clks, + ARRAY_SIZE(peri_clks), base, &mt6795_peri_clk_lock, clk_data); if (ret) goto unregister_gates; diff --git a/drivers/clk/mediatek/clk-mt6795-topckgen.c b/drivers/clk/mediatek/clk-mt6795-topckgen.c index 2948dd1aee8f..845cc8704930 100644 --- a/drivers/clk/mediatek/clk-mt6795-topckgen.c +++ b/drivers/clk/mediatek/clk-mt6795-topckgen.c @@ -557,7 +557,8 @@ static int clk_mt6795_topckgen_probe(struct platform_device *pdev) if (ret) goto unregister_factors; - ret = mtk_clk_register_composites(top_aud_divs, ARRAY_SIZE(top_aud_divs), base, + ret = mtk_clk_register_composites(&pdev->dev, top_aud_divs, + ARRAY_SIZE(top_aud_divs), base, &mt6795_top_clk_lock, clk_data); if (ret) goto unregister_muxes; diff --git a/drivers/clk/mediatek/clk-mt6797.c b/drivers/clk/mediatek/clk-mt6797.c index 29211744b173..0429a80f3cad 100644 --- a/drivers/clk/mediatek/clk-mt6797.c +++ b/drivers/clk/mediatek/clk-mt6797.c @@ -398,7 +398,8 @@ static int mtk_topckgen_init(struct platform_device *pdev) mtk_clk_register_factors(top_fixed_divs, ARRAY_SIZE(top_fixed_divs), clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, + mtk_clk_register_composites(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), base, &mt6797_clk_lock, clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt7622.c b/drivers/clk/mediatek/clk-mt7622.c index bba88018f056..67a296646722 100644 --- a/drivers/clk/mediatek/clk-mt7622.c +++ b/drivers/clk/mediatek/clk-mt7622.c @@ -615,8 +615,9 @@ static int mtk_topckgen_init(struct platform_device *pdev) mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), - base, &mt7622_clk_lock, clk_data); + mtk_clk_register_composites(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), base, + &mt7622_clk_lock, clk_data); mtk_clk_register_dividers(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt7622_clk_lock, clk_data); @@ -685,7 +686,8 @@ static int mtk_pericfg_init(struct platform_device *pdev) mtk_clk_register_gates(&pdev->dev, node, peri_clks, ARRAY_SIZE(peri_clks), clk_data); - mtk_clk_register_composites(peri_muxes, ARRAY_SIZE(peri_muxes), base, + mtk_clk_register_composites(&pdev->dev, peri_muxes, + ARRAY_SIZE(peri_muxes), base, &mt7622_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt7629.c b/drivers/clk/mediatek/clk-mt7629.c index c0cdaf024296..2019e272d1cd 100644 --- a/drivers/clk/mediatek/clk-mt7629.c +++ b/drivers/clk/mediatek/clk-mt7629.c @@ -566,8 +566,9 @@ static int mtk_topckgen_init(struct platform_device *pdev) mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), - base, &mt7629_clk_lock, clk_data); + mtk_clk_register_composites(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), base, + &mt7629_clk_lock, clk_data); clk_prepare_enable(clk_data->hws[CLK_TOP_AXI_SEL]->clk); clk_prepare_enable(clk_data->hws[CLK_TOP_MEM_SEL]->clk); @@ -613,7 +614,8 @@ static int mtk_pericfg_init(struct platform_device *pdev) mtk_clk_register_gates(&pdev->dev, node, peri_clks, ARRAY_SIZE(peri_clks), clk_data); - mtk_clk_register_composites(peri_muxes, ARRAY_SIZE(peri_muxes), base, + mtk_clk_register_composites(&pdev->dev, peri_muxes, + ARRAY_SIZE(peri_muxes), base, &mt7629_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/mediatek/clk-mt8135.c b/drivers/clk/mediatek/clk-mt8135.c index 8137cf225272..a39ad58e2741 100644 --- a/drivers/clk/mediatek/clk-mt8135.c +++ b/drivers/clk/mediatek/clk-mt8135.c @@ -536,8 +536,9 @@ static void __init mtk_topckgen_init(struct device_node *node) mtk_clk_register_factors(root_clk_alias, ARRAY_SIZE(root_clk_alias), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, - &mt8135_clk_lock, clk_data); + mtk_clk_register_composites(NULL, top_muxes, + ARRAY_SIZE(top_muxes), base, + &mt8135_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -581,8 +582,9 @@ static void __init mtk_pericfg_init(struct device_node *node) mtk_clk_register_gates(NULL, node, peri_gates, ARRAY_SIZE(peri_gates), clk_data); - mtk_clk_register_composites(peri_clks, ARRAY_SIZE(peri_clks), base, - &mt8135_clk_lock, clk_data); + mtk_clk_register_composites(NULL, peri_clks, + ARRAY_SIZE(peri_clks), base, + &mt8135_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt8167.c b/drivers/clk/mediatek/clk-mt8167.c index 59fe82ba5c7a..91669ebafaf9 100644 --- a/drivers/clk/mediatek/clk-mt8167.c +++ b/drivers/clk/mediatek/clk-mt8167.c @@ -940,8 +940,9 @@ static void __init mtk_topckgen_init(struct device_node *node) mtk_clk_register_gates(NULL, node, top_clks, ARRAY_SIZE(top_clks), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, - &mt8167_clk_lock, clk_data); + mtk_clk_register_composites(NULL, top_muxes, + ARRAY_SIZE(top_muxes), base, + &mt8167_clk_lock, clk_data); mtk_clk_register_dividers(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt8167_clk_lock, clk_data); @@ -966,8 +967,9 @@ static void __init mtk_infracfg_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_IFR_NR_CLK); - mtk_clk_register_composites(ifr_muxes, ARRAY_SIZE(ifr_muxes), base, - &mt8167_clk_lock, clk_data); + mtk_clk_register_composites(NULL, ifr_muxes, + ARRAY_SIZE(ifr_muxes), base, + &mt8167_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt8173.c b/drivers/clk/mediatek/clk-mt8173.c index 74ed7dd129f4..d05c1109b4f8 100644 --- a/drivers/clk/mediatek/clk-mt8173.c +++ b/drivers/clk/mediatek/clk-mt8173.c @@ -869,8 +869,9 @@ static void __init mtk_topckgen_init(struct device_node *node) mtk_clk_register_fixed_clks(fixed_clks, ARRAY_SIZE(fixed_clks), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, - &mt8173_clk_lock, clk_data); + mtk_clk_register_composites(NULL, top_muxes, + ARRAY_SIZE(top_muxes), base, + &mt8173_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -920,8 +921,9 @@ static void __init mtk_pericfg_init(struct device_node *node) mtk_clk_register_gates(NULL, node, peri_gates, ARRAY_SIZE(peri_gates), clk_data); - mtk_clk_register_composites(peri_clks, ARRAY_SIZE(peri_clks), base, - &mt8173_clk_lock, clk_data); + mtk_clk_register_composites(NULL, peri_clks, + ARRAY_SIZE(peri_clks), base, + &mt8173_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mt8183.c b/drivers/clk/mediatek/clk-mt8183.c index ba0d6ba10b35..bf7b34233253 100644 --- a/drivers/clk/mediatek/clk-mt8183.c +++ b/drivers/clk/mediatek/clk-mt8183.c @@ -1241,11 +1241,13 @@ static int clk_mt8183_top_probe(struct platform_device *pdev) mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), node, &mt8183_clk_lock, top_clk_data); - mtk_clk_register_composites(top_aud_muxes, ARRAY_SIZE(top_aud_muxes), - base, &mt8183_clk_lock, top_clk_data); + mtk_clk_register_composites(&pdev->dev, top_aud_muxes, + ARRAY_SIZE(top_aud_muxes), base, + &mt8183_clk_lock, top_clk_data); - mtk_clk_register_composites(top_aud_divs, ARRAY_SIZE(top_aud_divs), - base, &mt8183_clk_lock, top_clk_data); + mtk_clk_register_composites(&pdev->dev, top_aud_divs, + ARRAY_SIZE(top_aud_divs), base, + &mt8183_clk_lock, top_clk_data); mtk_clk_register_gates(&pdev->dev, node, top_clks, ARRAY_SIZE(top_clks), top_clk_data); @@ -1308,8 +1310,9 @@ static int clk_mt8183_mcu_probe(struct platform_device *pdev) clk_data = mtk_alloc_clk_data(CLK_MCU_NR_CLK); - mtk_clk_register_composites(mcu_muxes, ARRAY_SIZE(mcu_muxes), base, - &mt8183_clk_lock, clk_data); + mtk_clk_register_composites(&pdev->dev, mcu_muxes, + ARRAY_SIZE(mcu_muxes), base, + &mt8183_clk_lock, clk_data); return of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); } diff --git a/drivers/clk/mediatek/clk-mt8186-mcu.c b/drivers/clk/mediatek/clk-mt8186-mcu.c index dfc305c1fc5d..e52a2d986c99 100644 --- a/drivers/clk/mediatek/clk-mt8186-mcu.c +++ b/drivers/clk/mediatek/clk-mt8186-mcu.c @@ -65,7 +65,8 @@ static int clk_mt8186_mcu_probe(struct platform_device *pdev) goto free_mcu_data; } - r = mtk_clk_register_composites(mcu_muxes, ARRAY_SIZE(mcu_muxes), base, + r = mtk_clk_register_composites(&pdev->dev, mcu_muxes, + ARRAY_SIZE(mcu_muxes), base, NULL, clk_data); if (r) goto free_mcu_data; diff --git a/drivers/clk/mediatek/clk-mt8186-topckgen.c b/drivers/clk/mediatek/clk-mt8186-topckgen.c index d7f2c4663c85..4ac157320a6b 100644 --- a/drivers/clk/mediatek/clk-mt8186-topckgen.c +++ b/drivers/clk/mediatek/clk-mt8186-topckgen.c @@ -720,12 +720,14 @@ static int clk_mt8186_topck_probe(struct platform_device *pdev) if (r) goto unregister_factors; - r = mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, + r = mtk_clk_register_composites(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), base, &mt8186_clk_lock, clk_data); if (r) goto unregister_muxes; - r = mtk_clk_register_composites(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, + r = mtk_clk_register_composites(&pdev->dev, top_adj_divs, + ARRAY_SIZE(top_adj_divs), base, &mt8186_clk_lock, clk_data); if (r) goto unregister_composite_muxes; diff --git a/drivers/clk/mediatek/clk-mt8192.c b/drivers/clk/mediatek/clk-mt8192.c index ac1eee513649..ab856d027618 100644 --- a/drivers/clk/mediatek/clk-mt8192.c +++ b/drivers/clk/mediatek/clk-mt8192.c @@ -1117,12 +1117,14 @@ static int clk_mt8192_top_probe(struct platform_device *pdev) if (r) goto unregister_factors; - r = mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, + r = mtk_clk_register_composites(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), base, &mt8192_clk_lock, top_clk_data); if (r) goto unregister_muxes; - r = mtk_clk_register_composites(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, + r = mtk_clk_register_composites(&pdev->dev, top_adj_divs, + ARRAY_SIZE(top_adj_divs), base, &mt8192_clk_lock, top_clk_data); if (r) goto unregister_top_composites; diff --git a/drivers/clk/mediatek/clk-mt8195-topckgen.c b/drivers/clk/mediatek/clk-mt8195-topckgen.c index e6e0298d6449..aae31ef3903d 100644 --- a/drivers/clk/mediatek/clk-mt8195-topckgen.c +++ b/drivers/clk/mediatek/clk-mt8195-topckgen.c @@ -1281,7 +1281,8 @@ static int clk_mt8195_topck_probe(struct platform_device *pdev) if (r) goto unregister_muxes; - r = mtk_clk_register_composites(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, + r = mtk_clk_register_composites(&pdev->dev, top_adj_divs, + ARRAY_SIZE(top_adj_divs), base, &mt8195_clk_lock, top_clk_data); if (r) goto unregister_muxes; diff --git a/drivers/clk/mediatek/clk-mt8365.c b/drivers/clk/mediatek/clk-mt8365.c index b30cbeae1c3d..0482a8aa43cc 100644 --- a/drivers/clk/mediatek/clk-mt8365.c +++ b/drivers/clk/mediatek/clk-mt8365.c @@ -952,7 +952,7 @@ static int clk_mt8365_top_probe(struct platform_device *pdev) if (ret) goto unregister_factors; - ret = mtk_clk_register_composites(top_misc_mux_gates, + ret = mtk_clk_register_composites(&pdev->dev, top_misc_mux_gates, ARRAY_SIZE(top_misc_mux_gates), base, &mt8365_clk_lock, clk_data); if (ret) @@ -1080,8 +1080,9 @@ static int clk_mt8365_mcu_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; - ret = mtk_clk_register_composites(mcu_muxes, ARRAY_SIZE(mcu_muxes), - base, &mt8365_clk_lock, clk_data); + ret = mtk_clk_register_composites(&pdev->dev, mcu_muxes, + ARRAY_SIZE(mcu_muxes), base, + &mt8365_clk_lock, clk_data); if (ret) goto free_clk_data; diff --git a/drivers/clk/mediatek/clk-mt8516.c b/drivers/clk/mediatek/clk-mt8516.c index bde0b8c761d4..6983d3a48dc9 100644 --- a/drivers/clk/mediatek/clk-mt8516.c +++ b/drivers/clk/mediatek/clk-mt8516.c @@ -658,8 +658,9 @@ static void __init mtk_topckgen_init(struct device_node *node) mtk_clk_register_gates(NULL, node, top_clks, ARRAY_SIZE(top_clks), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, - &mt8516_clk_lock, clk_data); + mtk_clk_register_composites(NULL, top_muxes, + ARRAY_SIZE(top_muxes), base, + &mt8516_clk_lock, clk_data); mtk_clk_register_dividers(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt8516_clk_lock, clk_data); @@ -684,8 +685,9 @@ static void __init mtk_infracfg_init(struct device_node *node) clk_data = mtk_alloc_clk_data(CLK_IFR_NR_CLK); - mtk_clk_register_composites(ifr_muxes, ARRAY_SIZE(ifr_muxes), base, - &mt8516_clk_lock, clk_data); + mtk_clk_register_composites(NULL, ifr_muxes, + ARRAY_SIZE(ifr_muxes), base, + &mt8516_clk_lock, clk_data); r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c index 6123b234d3c3..152f3d906ef8 100644 --- a/drivers/clk/mediatek/clk-mtk.c +++ b/drivers/clk/mediatek/clk-mtk.c @@ -197,8 +197,8 @@ void mtk_clk_unregister_factors(const struct mtk_fixed_factor *clks, int num, } EXPORT_SYMBOL_GPL(mtk_clk_unregister_factors); -static struct clk_hw *mtk_clk_register_composite(const struct mtk_composite *mc, - void __iomem *base, spinlock_t *lock) +static struct clk_hw *mtk_clk_register_composite(struct device *dev, + const struct mtk_composite *mc, void __iomem *base, spinlock_t *lock) { struct clk_hw *hw; struct clk_mux *mux = NULL; @@ -264,7 +264,7 @@ static struct clk_hw *mtk_clk_register_composite(const struct mtk_composite *mc, div_ops = &clk_divider_ops; } - hw = clk_hw_register_composite(NULL, mc->name, parent_names, num_parents, + hw = clk_hw_register_composite(dev, mc->name, parent_names, num_parents, mux_hw, mux_ops, div_hw, div_ops, gate_hw, gate_ops, @@ -308,7 +308,8 @@ static void mtk_clk_unregister_composite(struct clk_hw *hw) kfree(mux); } -int mtk_clk_register_composites(const struct mtk_composite *mcs, int num, +int mtk_clk_register_composites(struct device *dev, + const struct mtk_composite *mcs, int num, void __iomem *base, spinlock_t *lock, struct clk_hw_onecell_data *clk_data) { @@ -327,7 +328,7 @@ int mtk_clk_register_composites(const struct mtk_composite *mcs, int num, continue; } - hw = mtk_clk_register_composite(mc, base, lock); + hw = mtk_clk_register_composite(dev, mc, base, lock); if (IS_ERR(hw)) { pr_err("Failed to register clk %s: %pe\n", mc->name, diff --git a/drivers/clk/mediatek/clk-mtk.h b/drivers/clk/mediatek/clk-mtk.h index 63ae7941aa92..3993a60738c7 100644 --- a/drivers/clk/mediatek/clk-mtk.h +++ b/drivers/clk/mediatek/clk-mtk.h @@ -149,7 +149,8 @@ struct mtk_composite { .flags = 0, \ } -int mtk_clk_register_composites(const struct mtk_composite *mcs, int num, +int mtk_clk_register_composites(struct device *dev, + const struct mtk_composite *mcs, int num, void __iomem *base, spinlock_t *lock, struct clk_hw_onecell_data *clk_data); void mtk_clk_unregister_composites(const struct mtk_composite *mcs, int num, From a4fe8813a7868ba5867e42e60de7a2b8baac30ff Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 20 Jan 2023 10:20:36 +0100 Subject: [PATCH 382/553] clk: mediatek: clk-mux: Propagate struct device for mtk-mux [ Upstream commit d3d6bd5e25cdc460df33ae1db4f051c4bdd3aa60 ] Like done for other clocks, propagate struct device for mtk mux clocks registered through clk-mux helpers to enable runtime pm support. Signed-off-by: AngeloGioacchino Del Regno Tested-by: Miles Chen Link: https://lore.kernel.org/r/20230120092053.182923-7-angelogioacchino.delregno@collabora.com Tested-by: Mingming Su Signed-off-by: Stephen Boyd Stable-dep-of: 2f7b1d8b5505 ("clk: mediatek: Do a runtime PM get on controllers during probe") Signed-off-by: Sasha Levin --- drivers/clk/mediatek/clk-mt6765.c | 3 ++- drivers/clk/mediatek/clk-mt6779.c | 5 +++-- drivers/clk/mediatek/clk-mt6795-topckgen.c | 3 ++- drivers/clk/mediatek/clk-mt7986-infracfg.c | 3 ++- drivers/clk/mediatek/clk-mt7986-topckgen.c | 3 ++- drivers/clk/mediatek/clk-mt8183.c | 5 +++-- drivers/clk/mediatek/clk-mt8186-topckgen.c | 3 ++- drivers/clk/mediatek/clk-mt8192.c | 3 ++- drivers/clk/mediatek/clk-mt8195-topckgen.c | 3 ++- drivers/clk/mediatek/clk-mt8365.c | 3 ++- drivers/clk/mediatek/clk-mux.c | 14 ++++++++------ drivers/clk/mediatek/clk-mux.h | 3 ++- 12 files changed, 32 insertions(+), 19 deletions(-) diff --git a/drivers/clk/mediatek/clk-mt6765.c b/drivers/clk/mediatek/clk-mt6765.c index 4a7bc6e04580..c4941523f552 100644 --- a/drivers/clk/mediatek/clk-mt6765.c +++ b/drivers/clk/mediatek/clk-mt6765.c @@ -782,7 +782,8 @@ static int clk_mt6765_top_probe(struct platform_device *pdev) clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), node, + mtk_clk_register_muxes(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), node, &mt6765_clk_lock, clk_data); mtk_clk_register_gates(&pdev->dev, node, top_clks, ARRAY_SIZE(top_clks), clk_data); diff --git a/drivers/clk/mediatek/clk-mt6779.c b/drivers/clk/mediatek/clk-mt6779.c index 2c20e40d7c80..7fe9d12b2dfd 100644 --- a/drivers/clk/mediatek/clk-mt6779.c +++ b/drivers/clk/mediatek/clk-mt6779.c @@ -1248,8 +1248,9 @@ static int clk_mt6779_top_probe(struct platform_device *pdev) mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), - node, &mt6779_clk_lock, clk_data); + mtk_clk_register_muxes(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), node, + &mt6779_clk_lock, clk_data); mtk_clk_register_composites(&pdev->dev, top_aud_muxes, ARRAY_SIZE(top_aud_muxes), base, diff --git a/drivers/clk/mediatek/clk-mt6795-topckgen.c b/drivers/clk/mediatek/clk-mt6795-topckgen.c index 845cc8704930..2ab8bf5d6d6d 100644 --- a/drivers/clk/mediatek/clk-mt6795-topckgen.c +++ b/drivers/clk/mediatek/clk-mt6795-topckgen.c @@ -552,7 +552,8 @@ static int clk_mt6795_topckgen_probe(struct platform_device *pdev) if (ret) goto unregister_fixed_clks; - ret = mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), node, + ret = mtk_clk_register_muxes(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), node, &mt6795_top_clk_lock, clk_data); if (ret) goto unregister_factors; diff --git a/drivers/clk/mediatek/clk-mt7986-infracfg.c b/drivers/clk/mediatek/clk-mt7986-infracfg.c index 578f150e0ee5..0a4bf87ee160 100644 --- a/drivers/clk/mediatek/clk-mt7986-infracfg.c +++ b/drivers/clk/mediatek/clk-mt7986-infracfg.c @@ -178,7 +178,8 @@ static int clk_mt7986_infracfg_probe(struct platform_device *pdev) return -ENOMEM; mtk_clk_register_factors(infra_divs, ARRAY_SIZE(infra_divs), clk_data); - mtk_clk_register_muxes(infra_muxes, ARRAY_SIZE(infra_muxes), node, + mtk_clk_register_muxes(&pdev->dev, infra_muxes, + ARRAY_SIZE(infra_muxes), node, &mt7986_clk_lock, clk_data); mtk_clk_register_gates(&pdev->dev, node, infra_clks, ARRAY_SIZE(infra_clks), clk_data); diff --git a/drivers/clk/mediatek/clk-mt7986-topckgen.c b/drivers/clk/mediatek/clk-mt7986-topckgen.c index de5121cf2877..c9bf47e6098f 100644 --- a/drivers/clk/mediatek/clk-mt7986-topckgen.c +++ b/drivers/clk/mediatek/clk-mt7986-topckgen.c @@ -303,7 +303,8 @@ static int clk_mt7986_topckgen_probe(struct platform_device *pdev) mtk_clk_register_fixed_clks(top_fixed_clks, ARRAY_SIZE(top_fixed_clks), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); - mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), node, + mtk_clk_register_muxes(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), node, &mt7986_clk_lock, clk_data); clk_prepare_enable(clk_data->hws[CLK_TOP_SYSAXI_SEL]->clk); diff --git a/drivers/clk/mediatek/clk-mt8183.c b/drivers/clk/mediatek/clk-mt8183.c index bf7b34233253..78620244144e 100644 --- a/drivers/clk/mediatek/clk-mt8183.c +++ b/drivers/clk/mediatek/clk-mt8183.c @@ -1238,8 +1238,9 @@ static int clk_mt8183_top_probe(struct platform_device *pdev) mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), top_clk_data); - mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), - node, &mt8183_clk_lock, top_clk_data); + mtk_clk_register_muxes(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), node, + &mt8183_clk_lock, top_clk_data); mtk_clk_register_composites(&pdev->dev, top_aud_muxes, ARRAY_SIZE(top_aud_muxes), base, diff --git a/drivers/clk/mediatek/clk-mt8186-topckgen.c b/drivers/clk/mediatek/clk-mt8186-topckgen.c index 4ac157320a6b..70b6e008a188 100644 --- a/drivers/clk/mediatek/clk-mt8186-topckgen.c +++ b/drivers/clk/mediatek/clk-mt8186-topckgen.c @@ -715,7 +715,8 @@ static int clk_mt8186_topck_probe(struct platform_device *pdev) if (r) goto unregister_fixed_clks; - r = mtk_clk_register_muxes(top_mtk_muxes, ARRAY_SIZE(top_mtk_muxes), node, + r = mtk_clk_register_muxes(&pdev->dev, top_mtk_muxes, + ARRAY_SIZE(top_mtk_muxes), node, &mt8186_clk_lock, clk_data); if (r) goto unregister_factors; diff --git a/drivers/clk/mediatek/clk-mt8192.c b/drivers/clk/mediatek/clk-mt8192.c index ab856d027618..16feb86dcb1b 100644 --- a/drivers/clk/mediatek/clk-mt8192.c +++ b/drivers/clk/mediatek/clk-mt8192.c @@ -1112,7 +1112,8 @@ static int clk_mt8192_top_probe(struct platform_device *pdev) if (r) goto unregister_early_factors; - r = mtk_clk_register_muxes(top_mtk_muxes, ARRAY_SIZE(top_mtk_muxes), node, + r = mtk_clk_register_muxes(&pdev->dev, top_mtk_muxes, + ARRAY_SIZE(top_mtk_muxes), node, &mt8192_clk_lock, top_clk_data); if (r) goto unregister_factors; diff --git a/drivers/clk/mediatek/clk-mt8195-topckgen.c b/drivers/clk/mediatek/clk-mt8195-topckgen.c index aae31ef3903d..3485ebb17ab8 100644 --- a/drivers/clk/mediatek/clk-mt8195-topckgen.c +++ b/drivers/clk/mediatek/clk-mt8195-topckgen.c @@ -1262,7 +1262,8 @@ static int clk_mt8195_topck_probe(struct platform_device *pdev) if (r) goto unregister_fixed_clks; - r = mtk_clk_register_muxes(top_mtk_muxes, ARRAY_SIZE(top_mtk_muxes), node, + r = mtk_clk_register_muxes(&pdev->dev, top_mtk_muxes, + ARRAY_SIZE(top_mtk_muxes), node, &mt8195_clk_lock, top_clk_data); if (r) goto unregister_factors; diff --git a/drivers/clk/mediatek/clk-mt8365.c b/drivers/clk/mediatek/clk-mt8365.c index 0482a8aa43cc..c9faa07ec0a6 100644 --- a/drivers/clk/mediatek/clk-mt8365.c +++ b/drivers/clk/mediatek/clk-mt8365.c @@ -947,7 +947,8 @@ static int clk_mt8365_top_probe(struct platform_device *pdev) if (ret) goto unregister_fixed_clks; - ret = mtk_clk_register_muxes(top_muxes, ARRAY_SIZE(top_muxes), node, + ret = mtk_clk_register_muxes(&pdev->dev, top_muxes, + ARRAY_SIZE(top_muxes), node, &mt8365_clk_lock, clk_data); if (ret) goto unregister_factors; diff --git a/drivers/clk/mediatek/clk-mux.c b/drivers/clk/mediatek/clk-mux.c index ba1720b9e231..c8593554239d 100644 --- a/drivers/clk/mediatek/clk-mux.c +++ b/drivers/clk/mediatek/clk-mux.c @@ -154,9 +154,10 @@ const struct clk_ops mtk_mux_gate_clr_set_upd_ops = { }; EXPORT_SYMBOL_GPL(mtk_mux_gate_clr_set_upd_ops); -static struct clk_hw *mtk_clk_register_mux(const struct mtk_mux *mux, - struct regmap *regmap, - spinlock_t *lock) +static struct clk_hw *mtk_clk_register_mux(struct device *dev, + const struct mtk_mux *mux, + struct regmap *regmap, + spinlock_t *lock) { struct mtk_clk_mux *clk_mux; struct clk_init_data init = {}; @@ -177,7 +178,7 @@ static struct clk_hw *mtk_clk_register_mux(const struct mtk_mux *mux, clk_mux->lock = lock; clk_mux->hw.init = &init; - ret = clk_hw_register(NULL, &clk_mux->hw); + ret = clk_hw_register(dev, &clk_mux->hw); if (ret) { kfree(clk_mux); return ERR_PTR(ret); @@ -198,7 +199,8 @@ static void mtk_clk_unregister_mux(struct clk_hw *hw) kfree(mux); } -int mtk_clk_register_muxes(const struct mtk_mux *muxes, +int mtk_clk_register_muxes(struct device *dev, + const struct mtk_mux *muxes, int num, struct device_node *node, spinlock_t *lock, struct clk_hw_onecell_data *clk_data) @@ -222,7 +224,7 @@ int mtk_clk_register_muxes(const struct mtk_mux *muxes, continue; } - hw = mtk_clk_register_mux(mux, regmap, lock); + hw = mtk_clk_register_mux(dev, mux, regmap, lock); if (IS_ERR(hw)) { pr_err("Failed to register clk %s: %pe\n", mux->name, diff --git a/drivers/clk/mediatek/clk-mux.h b/drivers/clk/mediatek/clk-mux.h index 83ff420f4ebe..7ecb963b0ec6 100644 --- a/drivers/clk/mediatek/clk-mux.h +++ b/drivers/clk/mediatek/clk-mux.h @@ -83,7 +83,8 @@ extern const struct clk_ops mtk_mux_gate_clr_set_upd_ops; 0, _upd_ofs, _upd, CLK_SET_RATE_PARENT, \ mtk_mux_clr_set_upd_ops) -int mtk_clk_register_muxes(const struct mtk_mux *muxes, +int mtk_clk_register_muxes(struct device *dev, + const struct mtk_mux *muxes, int num, struct device_node *node, spinlock_t *lock, struct clk_hw_onecell_data *clk_data); From c1d87d56af063c87961511ee25f6b07a5676d27d Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 20 Jan 2023 10:20:42 +0100 Subject: [PATCH 383/553] clk: mediatek: clk-mtk: Extend mtk_clk_simple_probe() [ Upstream commit 7b6183108c8ccf0dc295f39cdf78bd8078455636 ] As a preparation to increase probe functions commonization across various MediaTek SoC clock controller drivers, extend function mtk_clk_simple_probe() to be able to register not only gates, but also fixed clocks, factors, muxes and composites. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Miles Chen Reviewed-by: Chen-Yu Tsai Tested-by: Miles Chen Link: https://lore.kernel.org/r/20230120092053.182923-13-angelogioacchino.delregno@collabora.com Tested-by: Mingming Su Signed-off-by: Stephen Boyd Stable-dep-of: 2f7b1d8b5505 ("clk: mediatek: Do a runtime PM get on controllers during probe") Signed-off-by: Sasha Levin --- drivers/clk/mediatek/clk-mtk.c | 101 ++++++++++++++++++++++++++++++--- drivers/clk/mediatek/clk-mtk.h | 10 ++++ 2 files changed, 103 insertions(+), 8 deletions(-) diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c index 152f3d906ef8..bfabd94a474a 100644 --- a/drivers/clk/mediatek/clk-mtk.c +++ b/drivers/clk/mediatek/clk-mtk.c @@ -11,12 +11,14 @@ #include #include #include +#include #include #include #include #include "clk-mtk.h" #include "clk-gate.h" +#include "clk-mux.h" static void mtk_init_clk_data(struct clk_hw_onecell_data *clk_data, unsigned int clk_num) @@ -450,20 +452,71 @@ int mtk_clk_simple_probe(struct platform_device *pdev) const struct mtk_clk_desc *mcd; struct clk_hw_onecell_data *clk_data; struct device_node *node = pdev->dev.of_node; - int r; + void __iomem *base; + int num_clks, r; mcd = of_device_get_match_data(&pdev->dev); if (!mcd) return -EINVAL; - clk_data = mtk_alloc_clk_data(mcd->num_clks); + /* Composite clocks needs us to pass iomem pointer */ + if (mcd->composite_clks) { + if (!mcd->shared_io) + base = devm_platform_ioremap_resource(pdev, 0); + else + base = of_iomap(node, 0); + + if (IS_ERR_OR_NULL(base)) + return IS_ERR(base) ? PTR_ERR(base) : -ENOMEM; + } + + /* Calculate how many clk_hw_onecell_data entries to allocate */ + num_clks = mcd->num_clks + mcd->num_composite_clks; + num_clks += mcd->num_fixed_clks + mcd->num_factor_clks; + num_clks += mcd->num_mux_clks; + + clk_data = mtk_alloc_clk_data(num_clks); if (!clk_data) return -ENOMEM; - r = mtk_clk_register_gates(&pdev->dev, node, mcd->clks, mcd->num_clks, - clk_data); - if (r) - goto free_data; + if (mcd->fixed_clks) { + r = mtk_clk_register_fixed_clks(mcd->fixed_clks, + mcd->num_fixed_clks, clk_data); + if (r) + goto free_data; + } + + if (mcd->factor_clks) { + r = mtk_clk_register_factors(mcd->factor_clks, + mcd->num_factor_clks, clk_data); + if (r) + goto unregister_fixed_clks; + } + + if (mcd->mux_clks) { + r = mtk_clk_register_muxes(&pdev->dev, mcd->mux_clks, + mcd->num_mux_clks, node, + mcd->clk_lock, clk_data); + if (r) + goto unregister_factors; + }; + + if (mcd->composite_clks) { + /* We don't check composite_lock because it's optional */ + r = mtk_clk_register_composites(&pdev->dev, + mcd->composite_clks, + mcd->num_composite_clks, + base, mcd->clk_lock, clk_data); + if (r) + goto unregister_muxes; + } + + if (mcd->clks) { + r = mtk_clk_register_gates(&pdev->dev, node, mcd->clks, + mcd->num_clks, clk_data); + if (r) + goto unregister_composites; + } r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); if (r) @@ -481,9 +534,28 @@ int mtk_clk_simple_probe(struct platform_device *pdev) return r; unregister_clks: - mtk_clk_unregister_gates(mcd->clks, mcd->num_clks, clk_data); + if (mcd->clks) + mtk_clk_unregister_gates(mcd->clks, mcd->num_clks, clk_data); +unregister_composites: + if (mcd->composite_clks) + mtk_clk_unregister_composites(mcd->composite_clks, + mcd->num_composite_clks, clk_data); +unregister_muxes: + if (mcd->mux_clks) + mtk_clk_unregister_muxes(mcd->mux_clks, + mcd->num_mux_clks, clk_data); +unregister_factors: + if (mcd->factor_clks) + mtk_clk_unregister_factors(mcd->factor_clks, + mcd->num_factor_clks, clk_data); +unregister_fixed_clks: + if (mcd->fixed_clks) + mtk_clk_unregister_fixed_clks(mcd->fixed_clks, + mcd->num_fixed_clks, clk_data); free_data: mtk_free_clk_data(clk_data); + if (mcd->shared_io && base) + iounmap(base); return r; } EXPORT_SYMBOL_GPL(mtk_clk_simple_probe); @@ -495,7 +567,20 @@ int mtk_clk_simple_remove(struct platform_device *pdev) struct device_node *node = pdev->dev.of_node; of_clk_del_provider(node); - mtk_clk_unregister_gates(mcd->clks, mcd->num_clks, clk_data); + if (mcd->clks) + mtk_clk_unregister_gates(mcd->clks, mcd->num_clks, clk_data); + if (mcd->composite_clks) + mtk_clk_unregister_composites(mcd->composite_clks, + mcd->num_composite_clks, clk_data); + if (mcd->mux_clks) + mtk_clk_unregister_muxes(mcd->mux_clks, + mcd->num_mux_clks, clk_data); + if (mcd->factor_clks) + mtk_clk_unregister_factors(mcd->factor_clks, + mcd->num_factor_clks, clk_data); + if (mcd->fixed_clks) + mtk_clk_unregister_fixed_clks(mcd->fixed_clks, + mcd->num_fixed_clks, clk_data); mtk_free_clk_data(clk_data); return 0; diff --git a/drivers/clk/mediatek/clk-mtk.h b/drivers/clk/mediatek/clk-mtk.h index 3993a60738c7..880b3d6d8011 100644 --- a/drivers/clk/mediatek/clk-mtk.h +++ b/drivers/clk/mediatek/clk-mtk.h @@ -196,7 +196,17 @@ void mtk_clk_unregister_ref2usb_tx(struct clk_hw *hw); struct mtk_clk_desc { const struct mtk_gate *clks; size_t num_clks; + const struct mtk_composite *composite_clks; + size_t num_composite_clks; + const struct mtk_fixed_clk *fixed_clks; + size_t num_fixed_clks; + const struct mtk_fixed_factor *factor_clks; + size_t num_factor_clks; + const struct mtk_mux *mux_clks; + size_t num_mux_clks; const struct mtk_clk_rst_desc *rst_desc; + spinlock_t *clk_lock; + bool shared_io; }; int mtk_clk_simple_probe(struct platform_device *pdev); From 165d226472575b213dd90dfda19d1605dd7c19a8 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Tue, 12 Mar 2024 19:51:55 +0800 Subject: [PATCH 384/553] clk: mediatek: Do a runtime PM get on controllers during probe [ Upstream commit 2f7b1d8b5505efb0057cd1ab85fca206063ea4c3 ] mt8183-mfgcfg has a mutual dependency with genpd during the probing stage, which leads to a deadlock in the following call stack: CPU0: genpd_lock --> clk_prepare_lock genpd_power_off_work_fn() genpd_lock() generic_pm_domain::power_off() clk_unprepare() clk_prepare_lock() CPU1: clk_prepare_lock --> genpd_lock clk_register() __clk_core_init() clk_prepare_lock() clk_pm_runtime_get() genpd_lock() Do a runtime PM get at the probe function to make sure clk_register() won't acquire the genpd lock. Instead of only modifying mt8183-mfgcfg, do this on all mediatek clock controller probings because we don't believe this would cause any regression. Verified on MT8183 and MT8192 Chromebooks. Fixes: acddfc2c261b ("clk: mediatek: Add MT8183 clock support") Signed-off-by: Pin-yen Lin Link: https://lore.kernel.org/r/20240312115249.3341654-1-treapking@chromium.org Reviewed-by: AngeloGioacchino Del Regno Tested-by: AngeloGioacchino Del Regno Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/mediatek/clk-mtk.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c index bfabd94a474a..fa2c1b1c7dee 100644 --- a/drivers/clk/mediatek/clk-mtk.c +++ b/drivers/clk/mediatek/clk-mtk.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "clk-mtk.h" @@ -470,6 +471,16 @@ int mtk_clk_simple_probe(struct platform_device *pdev) return IS_ERR(base) ? PTR_ERR(base) : -ENOMEM; } + + devm_pm_runtime_enable(&pdev->dev); + /* + * Do a pm_runtime_resume_and_get() to workaround a possible + * deadlock between clk_register() and the genpd framework. + */ + r = pm_runtime_resume_and_get(&pdev->dev); + if (r) + return r; + /* Calculate how many clk_hw_onecell_data entries to allocate */ num_clks = mcd->num_clks + mcd->num_composite_clks; num_clks += mcd->num_fixed_clks + mcd->num_factor_clks; @@ -531,6 +542,8 @@ int mtk_clk_simple_probe(struct platform_device *pdev) goto unregister_clks; } + pm_runtime_put(&pdev->dev); + return r; unregister_clks: @@ -556,6 +569,8 @@ free_data: mtk_free_clk_data(clk_data); if (mcd->shared_io && base) iounmap(base); + + pm_runtime_put(&pdev->dev); return r; } EXPORT_SYMBOL_GPL(mtk_clk_simple_probe); From d17075a93501062fb9f9582c899be455221c5633 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 12 Apr 2024 11:10:33 -0700 Subject: [PATCH 385/553] x86/bugs: Fix BHI retpoline check [ Upstream commit 69129794d94c544810e68b2b4eaa7e44063f9bf2 ] Confusingly, X86_FEATURE_RETPOLINE doesn't mean retpolines are enabled, as it also includes the original "AMD retpoline" which isn't a retpoline at all. Also replace cpu_feature_enabled() with boot_cpu_has() because this is before alternatives are patched and cpu_feature_enabled()'s fallback path is slower than plain old boot_cpu_has(). Fixes: ec9404e40e8f ("x86/bhi: Add BHI mitigation knob") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Reviewed-by: Pawan Gupta Cc: Borislav Petkov Cc: Linus Torvalds Link: https://lore.kernel.org/r/ad3807424a3953f0323c011a643405619f2a4927.1712944776.git.jpoimboe@kernel.org Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/bugs.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6d69123de366..3f38592ec771 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1629,7 +1629,8 @@ static void __init bhi_select_mitigation(void) return; /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ - if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { + if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) { spec_ctrl_disable_kernel_rrsba(); if (rrsba_disabled) return; @@ -2783,11 +2784,13 @@ static const char *spectre_bhi_state(void) { if (!boot_cpu_has_bug(X86_BUG_BHI)) return "; BHI: Not affected"; - else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) return "; BHI: BHI_DIS_S"; - else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) return "; BHI: SW loop, KVM: SW loop"; - else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && rrsba_disabled) + else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE) && + rrsba_disabled) return "; BHI: Retpoline"; else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) return "; BHI: Vulnerable, KVM: SW loop"; From 943c3e45c80a8356a683bc54a1004d7efd8949f3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 16 Apr 2024 23:04:34 -0700 Subject: [PATCH 386/553] x86/cpufeatures: Fix dependencies for GFNI, VAES, and VPCLMULQDQ [ Upstream commit 9543f6e26634537997b6e909c20911b7bf4876de ] Fix cpuid_deps[] to list the correct dependencies for GFNI, VAES, and VPCLMULQDQ. These features don't depend on AVX512, and there exist CPUs that support these features but not AVX512. GFNI actually doesn't even depend on AVX. This prevents GFNI from being unnecessarily disabled if AVX is disabled to mitigate the GDS vulnerability. This also prevents all three features from being unnecessarily disabled if AVX512VL (or its dependency AVX512F) were to be disabled, but it looks like there isn't any case where this happens anyway. Fixes: c128dbfa0f87 ("x86/cpufeatures: Enable new SSE/AVX/AVX512 CPU features") Signed-off-by: Eric Biggers Signed-off-by: Borislav Petkov (AMD) Acked-by: Dave Hansen Link: https://lore.kernel.org/r/20240417060434.47101-1-ebiggers@kernel.org Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/cpuid-deps.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index c881bcafba7d..9c19f40b1b27 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -44,7 +44,10 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_F16C, X86_FEATURE_XMM2, }, { X86_FEATURE_AES, X86_FEATURE_XMM2 }, { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 }, + { X86_FEATURE_GFNI, X86_FEATURE_XMM2 }, { X86_FEATURE_FMA, X86_FEATURE_AVX }, + { X86_FEATURE_VAES, X86_FEATURE_AVX }, + { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX }, { X86_FEATURE_AVX2, X86_FEATURE_AVX, }, { X86_FEATURE_AVX512F, X86_FEATURE_AVX, }, { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F }, @@ -56,9 +59,6 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL }, - { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL }, - { X86_FEATURE_VAES, X86_FEATURE_AVX512VL }, - { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, From d05380576fb42adb427b1d5dd11f09fe33833b1a Mon Sep 17 00:00:00 2001 From: Ai Chao Date: Fri, 19 Apr 2024 16:21:59 +0800 Subject: [PATCH 387/553] ALSA: hda/realtek - Enable audio jacks of Haier Boyue G42 with ALC269VC commit 7ee5faad0f8c3ad86c8cfc2f6aac91d2ba29790f upstream. The Haier Boyue G42 with ALC269VC cannot detect the MIC of headset, the line out and internal speaker until ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS quirk applied. Signed-off-by: Ai Chao Cc: Message-ID: <20240419082159.476879-1-aichao@kylinos.cn> Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e8cf38dc8a5e..77c40063d63a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10122,6 +10122,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), From a6d2a8b211c874971ee4cf3ddd167408177f6e76 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Sat, 30 Mar 2024 19:01:14 +0000 Subject: [PATCH 388/553] binder: check offset alignment in binder_get_object() commit aaef73821a3b0194a01bd23ca77774f704a04d40 upstream. Commit 6d98eb95b450 ("binder: avoid potential data leakage when copying txn") introduced changes to how binder objects are copied. In doing so, it unintentionally removed an offset alignment check done through calls to binder_alloc_copy_from_buffer() -> check_buffer(). These calls were replaced in binder_get_object() with copy_from_user(), so now an explicit offset alignment check is needed here. This avoids later complications when unwinding the objects gets harder. It is worth noting this check existed prior to commit 7a67a39320df ("binder: add function to copy binder object from buffer"), likely removed due to redundancy at the time. Fixes: 6d98eb95b450 ("binder: avoid potential data leakage when copying txn") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas Acked-by: Todd Kjos Link: https://lore.kernel.org/r/20240330190115.1877819-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 55cd17a13e75..8c2b7c074eca 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1707,8 +1707,10 @@ static size_t binder_get_object(struct binder_proc *proc, size_t object_size = 0; read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset); - if (offset > buffer->data_size || read_size < sizeof(*hdr)) + if (offset > buffer->data_size || read_size < sizeof(*hdr) || + !IS_ALIGNED(offset, sizeof(u32))) return 0; + if (u) { if (copy_from_user(object, u + offset, read_size)) return 0; From 38e10c9faa95ef37c927fdb19dbdc97153ce6150 Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Fri, 1 Mar 2024 15:11:18 +0200 Subject: [PATCH 389/553] thunderbolt: Avoid notify PM core about runtime PM resume commit dcd12acaf384c30437fa5a9a1f71df06fc9835fd upstream. Currently we notify PM core about occurred wakes after any resume. This is not actually needed after resume from runtime suspend. Hence, notify PM core about occurred wakes only after resume from system sleep. Also, if the wake occurred in USB4 router upstream port, we don't notify the PM core about it since it is not actually needed and can cause unexpected autowake (e.g. if /sys/power/wakeup_count is used). While there add the missing kernel-doc for tb_switch_resume(). Signed-off-by: Gil Fine Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/switch.c | 27 +++++++++++++++++++++++++-- drivers/thunderbolt/tb.c | 4 ++-- drivers/thunderbolt/tb.h | 3 ++- drivers/thunderbolt/usb4.c | 13 +++++++------ 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 55698a0978f0..b4c01bc7120a 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -3135,7 +3135,26 @@ static int tb_switch_set_wake(struct tb_switch *sw, unsigned int flags) return tb_lc_set_wake(sw, flags); } -int tb_switch_resume(struct tb_switch *sw) +static void tb_switch_check_wakes(struct tb_switch *sw) +{ + if (device_may_wakeup(&sw->dev)) { + if (tb_switch_is_usb4(sw)) + usb4_switch_check_wakes(sw); + } +} + +/** + * tb_switch_resume() - Resume a switch after sleep + * @sw: Switch to resume + * @runtime: Is this resume from runtime suspend or system sleep + * + * Resumes and re-enumerates router (and all its children), if still plugged + * after suspend. Don't enumerate device router whose UID was changed during + * suspend. If this is resume from system sleep, notifies PM core about the + * wakes occurred during suspend. Disables all wakes, except USB4 wake of + * upstream port for USB4 routers that shall be always enabled. + */ +int tb_switch_resume(struct tb_switch *sw, bool runtime) { struct tb_port *port; int err; @@ -3184,6 +3203,9 @@ int tb_switch_resume(struct tb_switch *sw) if (err) return err; + if (!runtime) + tb_switch_check_wakes(sw); + /* Disable wakes */ tb_switch_set_wake(sw, 0); @@ -3213,7 +3235,8 @@ int tb_switch_resume(struct tb_switch *sw) */ if (tb_port_unlock(port)) tb_port_warn(port, "failed to unlock port\n"); - if (port->remote && tb_switch_resume(port->remote->sw)) { + if (port->remote && + tb_switch_resume(port->remote->sw, runtime)) { tb_port_warn(port, "lost during suspend, disconnecting\n"); tb_sw_set_unplugged(port->remote->sw); diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index e83269dc2b06..c5e4fa478e64 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -1628,7 +1628,7 @@ static int tb_resume_noirq(struct tb *tb) /* remove any pci devices the firmware might have setup */ tb_switch_reset(tb->root_switch); - tb_switch_resume(tb->root_switch); + tb_switch_resume(tb->root_switch, false); tb_free_invalid_tunnels(tb); tb_free_unplugged_children(tb->root_switch); tb_restore_children(tb->root_switch); @@ -1754,7 +1754,7 @@ static int tb_runtime_resume(struct tb *tb) struct tb_tunnel *tunnel, *n; mutex_lock(&tb->lock); - tb_switch_resume(tb->root_switch); + tb_switch_resume(tb->root_switch, true); tb_free_invalid_tunnels(tb); tb_restore_children(tb->root_switch); list_for_each_entry_safe(tunnel, n, &tcm->tunnel_list, list) diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index b3fec5f8e20c..acf5b8620845 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -787,7 +787,7 @@ int tb_switch_configure(struct tb_switch *sw); int tb_switch_add(struct tb_switch *sw); void tb_switch_remove(struct tb_switch *sw); void tb_switch_suspend(struct tb_switch *sw, bool runtime); -int tb_switch_resume(struct tb_switch *sw); +int tb_switch_resume(struct tb_switch *sw, bool runtime); int tb_switch_reset(struct tb_switch *sw); int tb_switch_wait_for_bit(struct tb_switch *sw, u32 offset, u32 bit, u32 value, int timeout_msec); @@ -1182,6 +1182,7 @@ static inline struct tb_retimer *tb_to_retimer(struct device *dev) return NULL; } +void usb4_switch_check_wakes(struct tb_switch *sw); int usb4_switch_setup(struct tb_switch *sw); int usb4_switch_read_uid(struct tb_switch *sw, u64 *uid); int usb4_switch_drom_read(struct tb_switch *sw, unsigned int address, void *buf, diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index 3c821f5e4481..b0394ba6d111 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -153,15 +153,18 @@ static inline int usb4_switch_op_data(struct tb_switch *sw, u16 opcode, tx_dwords, rx_data, rx_dwords); } -static void usb4_switch_check_wakes(struct tb_switch *sw) +/** + * usb4_switch_check_wakes() - Check for wakes and notify PM core about them + * @sw: Router whose wakes to check + * + * Checks wakes occurred during suspend and notify the PM core about them. + */ +void usb4_switch_check_wakes(struct tb_switch *sw) { struct tb_port *port; bool wakeup = false; u32 val; - if (!device_may_wakeup(&sw->dev)) - return; - if (tb_route(sw)) { if (tb_sw_read(sw, &val, TB_CFG_SWITCH, ROUTER_CS_6, 1)) return; @@ -226,8 +229,6 @@ int usb4_switch_setup(struct tb_switch *sw) u32 val = 0; int ret; - usb4_switch_check_wakes(sw); - if (!tb_route(sw)) return 0; From 9eae1facfc061802e045a49ae83ac1f0ff9900a3 Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Fri, 1 Mar 2024 15:22:53 +0200 Subject: [PATCH 390/553] thunderbolt: Fix wake configurations after device unplug commit c38fa07dc69f0b9e6f43ecab96dc7861a70c827c upstream. Currently we don't configure correctly the wake events after unplug of device router. What can happen is that the downstream ports of host router will be configured to wake on: USB4-wake and wake-on-disconnect, but not on wake-on-connect. This may cause the later plugged device not to wake the domain and fail in enumeration. Fix this by clearing downstream port's "USB4 Port is Configured" bit, after unplug of a device router. Signed-off-by: Gil Fine Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/switch.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index b4c01bc7120a..d3058ede5306 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -2880,22 +2880,29 @@ void tb_switch_unconfigure_link(struct tb_switch *sw) { struct tb_port *up, *down; - if (sw->is_unplugged) - return; if (!tb_route(sw) || tb_switch_is_icm(sw)) return; + /* + * Unconfigure downstream port so that wake-on-connect can be + * configured after router unplug. No need to unconfigure upstream port + * since its router is unplugged. + */ + up = tb_upstream_port(sw); + down = up->remote; + if (tb_switch_is_usb4(down->sw)) + usb4_port_unconfigure(down); + else + tb_lc_unconfigure_port(down); + + if (sw->is_unplugged) + return; + up = tb_upstream_port(sw); if (tb_switch_is_usb4(up->sw)) usb4_port_unconfigure(up); else tb_lc_unconfigure_port(up); - - down = up->remote; - if (tb_switch_is_usb4(down->sw)) - usb4_port_unconfigure(down); - else - tb_lc_unconfigure_port(down); } static void tb_switch_credits_init(struct tb_switch *sw) From ac882d6b21bffecb57bcc4486701239eef5aa67b Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Mon, 8 Apr 2024 10:16:33 -0700 Subject: [PATCH 391/553] comedi: vmk80xx: fix incomplete endpoint checking commit d1718530e3f640b7d5f0050e725216eab57a85d8 upstream. While vmk80xx does have endpoint checking implemented, some things can fall through the cracks. Depending on the hardware model, URBs can have either bulk or interrupt type, and current version of vmk80xx_find_usb_endpoints() function does not take that fully into account. While this warning does not seem to be too harmful, at the very least it will crash systems with 'panic_on_warn' set on them. Fix the issue found by Syzkaller [1] by somewhat simplifying the endpoint checking process with usb_find_common_endpoints() and ensuring that only expected endpoint types are present. This patch has not been tested on real hardware. [1] Syzkaller report: usb 1-1: BOGUS urb xfer, pipe 1 != type 3 WARNING: CPU: 0 PID: 781 at drivers/usb/core/urb.c:504 usb_submit_urb+0xc4e/0x18c0 drivers/usb/core/urb.c:503 ... Call Trace: usb_start_wait_urb+0x113/0x520 drivers/usb/core/message.c:59 vmk80xx_reset_device drivers/comedi/drivers/vmk80xx.c:227 [inline] vmk80xx_auto_attach+0xa1c/0x1a40 drivers/comedi/drivers/vmk80xx.c:818 comedi_auto_config+0x238/0x380 drivers/comedi/drivers.c:1067 usb_probe_interface+0x5cd/0xb00 drivers/usb/core/driver.c:399 ... Similar issue also found by Syzkaller: Link: https://syzkaller.appspot.com/bug?extid=5205eb2f17de3e01946e Reported-and-tested-by: syzbot+5f29dc6a889fc42bd896@syzkaller.appspotmail.com Cc: stable Fixes: 49253d542cc0 ("staging: comedi: vmk80xx: factor out usb endpoint detection") Reviewed-by: Ian Abbott Signed-off-by: Nikita Zhandarovich Link: https://lore.kernel.org/r/20240408171633.31649-1-n.zhandarovich@fintech.ru Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers/vmk80xx.c | 35 +++++++++++--------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/drivers/comedi/drivers/vmk80xx.c b/drivers/comedi/drivers/vmk80xx.c index 4536ed43f65b..84dce5184a77 100644 --- a/drivers/comedi/drivers/vmk80xx.c +++ b/drivers/comedi/drivers/vmk80xx.c @@ -641,32 +641,21 @@ static int vmk80xx_find_usb_endpoints(struct comedi_device *dev) struct vmk80xx_private *devpriv = dev->private; struct usb_interface *intf = comedi_to_usb_interface(dev); struct usb_host_interface *iface_desc = intf->cur_altsetting; - struct usb_endpoint_descriptor *ep_desc; - int i; + struct usb_endpoint_descriptor *ep_rx_desc, *ep_tx_desc; + int ret; - if (iface_desc->desc.bNumEndpoints != 2) + if (devpriv->model == VMK8061_MODEL) + ret = usb_find_common_endpoints(iface_desc, &ep_rx_desc, + &ep_tx_desc, NULL, NULL); + else + ret = usb_find_common_endpoints(iface_desc, NULL, NULL, + &ep_rx_desc, &ep_tx_desc); + + if (ret) return -ENODEV; - for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) { - ep_desc = &iface_desc->endpoint[i].desc; - - if (usb_endpoint_is_int_in(ep_desc) || - usb_endpoint_is_bulk_in(ep_desc)) { - if (!devpriv->ep_rx) - devpriv->ep_rx = ep_desc; - continue; - } - - if (usb_endpoint_is_int_out(ep_desc) || - usb_endpoint_is_bulk_out(ep_desc)) { - if (!devpriv->ep_tx) - devpriv->ep_tx = ep_desc; - continue; - } - } - - if (!devpriv->ep_rx || !devpriv->ep_tx) - return -ENODEV; + devpriv->ep_rx = ep_rx_desc; + devpriv->ep_tx = ep_tx_desc; if (!usb_endpoint_maxp(devpriv->ep_rx) || !usb_endpoint_maxp(devpriv->ep_tx)) return -EINVAL; From 2c9b943e9924cf1269e44289bc5e60e51b0f5270 Mon Sep 17 00:00:00 2001 From: Emil Kronborg Date: Wed, 20 Mar 2024 12:15:36 +0000 Subject: [PATCH 392/553] serial: mxs-auart: add spinlock around changing cts state commit 54c4ec5f8c471b7c1137a1f769648549c423c026 upstream. The uart_handle_cts_change() function in serial_core expects the caller to hold uport->lock. For example, I have seen the below kernel splat, when the Bluetooth driver is loaded on an i.MX28 board. [ 85.119255] ------------[ cut here ]------------ [ 85.124413] WARNING: CPU: 0 PID: 27 at /drivers/tty/serial/serial_core.c:3453 uart_handle_cts_change+0xb4/0xec [ 85.134694] Modules linked in: hci_uart bluetooth ecdh_generic ecc wlcore_sdio configfs [ 85.143314] CPU: 0 PID: 27 Comm: kworker/u3:0 Not tainted 6.6.3-00021-gd62a2f068f92 #1 [ 85.151396] Hardware name: Freescale MXS (Device Tree) [ 85.156679] Workqueue: hci0 hci_power_on [bluetooth] (...) [ 85.191765] uart_handle_cts_change from mxs_auart_irq_handle+0x380/0x3f4 [ 85.198787] mxs_auart_irq_handle from __handle_irq_event_percpu+0x88/0x210 (...) Cc: stable@vger.kernel.org Fixes: 4d90bb147ef6 ("serial: core: Document and assert lock requirements for irq helpers") Reviewed-by: Frank Li Signed-off-by: Emil Kronborg Link: https://lore.kernel.org/r/20240320121530.11348-1-emil.kronborg@protonmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mxs-auart.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c index d21a4f3ef2fe..8b31017e7e56 100644 --- a/drivers/tty/serial/mxs-auart.c +++ b/drivers/tty/serial/mxs-auart.c @@ -1094,11 +1094,13 @@ static void mxs_auart_set_ldisc(struct uart_port *port, static irqreturn_t mxs_auart_irq_handle(int irq, void *context) { - u32 istat; + u32 istat, stat; struct mxs_auart_port *s = context; u32 mctrl_temp = s->mctrl_prev; - u32 stat = mxs_read(s, REG_STAT); + uart_port_lock(&s->port); + + stat = mxs_read(s, REG_STAT); istat = mxs_read(s, REG_INTR); /* ack irq */ @@ -1134,6 +1136,8 @@ static irqreturn_t mxs_auart_irq_handle(int irq, void *context) istat &= ~AUART_INTR_TXIS; } + uart_port_unlock(&s->port); + return IRQ_HANDLED; } From bbaafbb4651fede8d3c3881601ecaa4f834f9d3f Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 8 Apr 2024 19:23:43 +1000 Subject: [PATCH 393/553] serial/pmac_zilog: Remove flawed mitigation for rx irq flood commit 1be3226445362bfbf461c92a5bcdb1723f2e4907 upstream. The mitigation was intended to stop the irq completely. That may be better than a hard lock-up but it turns out that you get a crash anyway if you're using pmac_zilog as a serial console: ttyPZ0: pmz: rx irq flood ! BUG: spinlock recursion on CPU#0, swapper/0 That's because the pr_err() call in pmz_receive_chars() results in pmz_console_write() attempting to lock a spinlock already locked in pmz_interrupt(). With CONFIG_DEBUG_SPINLOCK=y, this produces a fatal BUG splat. The spinlock in question is the one in struct uart_port. Even when it's not fatal, the serial port rx function ceases to work. Also, the iteration limit doesn't play nicely with QEMU, as can be seen in the bug report linked below. A web search for other reports of the error message "pmz: rx irq flood" didn't produce anything. So I don't think this code is needed any more. Remove it. Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Aneesh Kumar K.V Cc: Naveen N. Rao Cc: Andy Shevchenko Cc: stable@kernel.org Cc: linux-m68k@lists.linux-m68k.org Link: https://github.com/vivier/qemu-m68k/issues/44 Link: https://lore.kernel.org/all/1078874617.9746.36.camel@gaston/ Acked-by: Michael Ellerman Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Finn Thain Link: https://lore.kernel.org/r/e853cf2c762f23101cd2ddec0cc0c2be0e72685f.1712568223.git.fthain@linux-m68k.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/pmac_zilog.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c index fe2e4ec423f7..daf15d23bb42 100644 --- a/drivers/tty/serial/pmac_zilog.c +++ b/drivers/tty/serial/pmac_zilog.c @@ -210,7 +210,6 @@ static bool pmz_receive_chars(struct uart_pmac_port *uap) { struct tty_port *port; unsigned char ch, r1, drop, flag; - int loops = 0; /* Sanity check, make sure the old bug is no longer happening */ if (uap->port.state == NULL) { @@ -291,24 +290,11 @@ static bool pmz_receive_chars(struct uart_pmac_port *uap) if (r1 & Rx_OVR) tty_insert_flip_char(port, 0, TTY_OVERRUN); next_char: - /* We can get stuck in an infinite loop getting char 0 when the - * line is in a wrong HW state, we break that here. - * When that happens, I disable the receive side of the driver. - * Note that what I've been experiencing is a real irq loop where - * I'm getting flooded regardless of the actual port speed. - * Something strange is going on with the HW - */ - if ((++loops) > 1000) - goto flood; ch = read_zsreg(uap, R0); if (!(ch & Rx_CH_AV)) break; } - return true; - flood: - pmz_interrupt_control(uap, 0); - pmz_error("pmz: rx irq flood !\n"); return true; } From 87d15af82d24f5193319028e0be1cd9fc34e8f8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 17 Apr 2024 11:03:27 +0200 Subject: [PATCH 394/553] serial: stm32: Return IRQ_NONE in the ISR if no handling happend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 13c785323b36b845300b256d0e5963c3727667d7 upstream. If there is a stuck irq that the handler doesn't address, returning IRQ_HANDLED unconditionally makes it impossible for the irq core to detect the problem and disable the irq. So only return IRQ_HANDLED if an event was handled. A stuck irq is still problematic, but with this change at least it only makes the UART nonfunctional instead of occupying the (usually only) CPU by 100% and so stall the whole machine. Fixes: 48a6092fb41f ("serial: stm32-usart: Add STM32 USART Driver") Cc: stable@vger.kernel.org Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/5f92603d0dfd8a5b8014b2b10a902d91e0bb881f.1713344161.git.u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/stm32-usart.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 2a9c4058824a..ec2558ebb315 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -755,6 +755,7 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; u32 sr; unsigned int size; + irqreturn_t ret = IRQ_NONE; sr = readl_relaxed(port->membase + ofs->isr); @@ -763,11 +764,14 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) (sr & USART_SR_TC)) { stm32_usart_tc_interrupt_disable(port); stm32_usart_rs485_rts_disable(port); + ret = IRQ_HANDLED; } - if ((sr & USART_SR_RTOF) && ofs->icr != UNDEF_REG) + if ((sr & USART_SR_RTOF) && ofs->icr != UNDEF_REG) { writel_relaxed(USART_ICR_RTOCF, port->membase + ofs->icr); + ret = IRQ_HANDLED; + } if ((sr & USART_SR_WUF) && ofs->icr != UNDEF_REG) { /* Clear wake up flag and disable wake up interrupt */ @@ -776,6 +780,7 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_WUFIE); if (irqd_is_wakeup_set(irq_get_irq_data(port->irq))) pm_wakeup_event(tport->tty->dev, 0); + ret = IRQ_HANDLED; } /* @@ -790,6 +795,7 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) uart_unlock_and_check_sysrq(port); if (size) tty_flip_buffer_push(tport); + ret = IRQ_HANDLED; } } @@ -797,6 +803,7 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) spin_lock(&port->lock); stm32_usart_transmit_chars(port); spin_unlock(&port->lock); + ret = IRQ_HANDLED; } /* Receiver timeout irq for DMA RX */ @@ -806,9 +813,10 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) uart_unlock_and_check_sysrq(port); if (size) tty_flip_buffer_push(tport); + ret = IRQ_HANDLED; } - return IRQ_HANDLED; + return ret; } static void stm32_usart_set_mctrl(struct uart_port *port, unsigned int mctrl) From 282b223cfd5be01a28d08d90f724f0c3feab2797 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 17 Apr 2024 11:03:28 +0200 Subject: [PATCH 395/553] serial: stm32: Reset .throttled state in .startup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ea2624b5b829b8f93c0dce25721d835969b34faf upstream. When an UART is opened that still has .throttled set from a previous open, the RX interrupt is enabled but the irq handler doesn't consider it. This easily results in a stuck irq with the effect to occupy the CPU in a tight loop. So reset the throttle state in .startup() to ensure that RX irqs are handled. Fixes: d1ec8a2eabe9 ("serial: stm32: update throttle and unthrottle ops for dma mode") Cc: stable@vger.kernel.org Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/a784f80d3414f7db723b2ec66efc56e1ad666cbf.1713344161.git.u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/stm32-usart.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index ec2558ebb315..7d11511c8c12 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -1021,6 +1021,7 @@ static int stm32_usart_startup(struct uart_port *port) val |= USART_CR2_SWAP; writel_relaxed(val, port->membase + ofs->cr2); } + stm32_port->throttled = false; /* RX FIFO Flush */ if (ofs->rqr != UNDEF_REG) From 3c4ba8a6c5894a061f5eb851b5987898053608eb Mon Sep 17 00:00:00 2001 From: bolan wang Date: Wed, 6 Mar 2024 19:03:39 +0800 Subject: [PATCH 396/553] USB: serial: option: add Fibocom FM135-GL variants commit 356952b13af5b2c338df1e06889fd1b5e12cbbf4 upstream. Update the USB serial option driver support for the Fibocom FM135-GL LTE modules. - VID:PID 2cb7:0115, FM135-GL for laptop debug M.2 cards(with MBIM interface for /Linux/Chrome OS) 0x0115: mbim, diag, at, pipe Here are the outputs of usb-devices: T: Bus=01 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=0115 Rev=05.15 S: Manufacturer=Fibocom Wireless Inc. S: Product=Fibocom Module S: SerialNumber=12345678 C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: bolan wang Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 1a3e5a9414f0..72b72bc4b417 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2272,6 +2272,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0115, 0xff), /* Fibocom FM135 (laptop MBIM) */ + .driver_info = RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a3, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ From 3e34029b3c118e29c973d7869416b75529451a95 Mon Sep 17 00:00:00 2001 From: Chuanhong Guo Date: Tue, 12 Mar 2024 14:29:12 +0800 Subject: [PATCH 397/553] USB: serial: option: add support for Fibocom FM650/FG650 commit fb1f4584b1215e8c209f6b3a4028ed8351a0e961 upstream. Fibocom FM650/FG650 are 5G modems with ECM/NCM/RNDIS/MBIM modes. This patch adds support to all 4 modes. In all 4 modes, the first serial port is the AT console while the other 3 appear to be diagnostic interfaces for dumping modem logs. usb-devices output for all modes: ECM: T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 5 Spd=5000 MxCh= 0 D: Ver= 3.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2cb7 ProdID=0a04 Rev=04.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=FG650 Module S: SerialNumber=0123456789ABCDEF C: #Ifs= 5 Cfg#= 1 Atr=c0 MxPwr=504mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms NCM: T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 6 Spd=5000 MxCh= 0 D: Ver= 3.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2cb7 ProdID=0a05 Rev=04.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=FG650 Module S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=c0 MxPwr=504mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0d Prot=00 Driver=cdc_ncm E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=01 Driver=cdc_ncm E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms RNDIS: T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 4 Spd=5000 MxCh= 0 D: Ver= 3.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2cb7 ProdID=0a06 Rev=04.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=FG650 Module S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=c0 MxPwr=504mA I: If#= 0 Alt= 0 #EPs= 1 Cls=e0(wlcon) Sub=01 Prot=03 Driver=rndis_host E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms MBIM: T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 7 Spd=5000 MxCh= 0 D: Ver= 3.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2cb7 ProdID=0a07 Rev=04.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=FG650 Module S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=c0 MxPwr=504mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms Signed-off-by: Chuanhong Guo Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 72b72bc4b417..f05d94a9f067 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2279,6 +2279,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a3, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */ .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a04, 0xff) }, /* Fibocom FM650-CN (ECM mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a05, 0xff) }, /* Fibocom FM650-CN (NCM mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a06, 0xff) }, /* Fibocom FM650-CN (RNDIS mode) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a07, 0xff) }, /* Fibocom FM650-CN (MBIM mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ From 9eba0750255ac458016fe181e5c22ad8ee376fba Mon Sep 17 00:00:00 2001 From: Coia Prant Date: Mon, 15 Apr 2024 07:26:25 -0700 Subject: [PATCH 398/553] USB: serial: option: add Lonsung U8300/U9300 product commit cf16ffa17c398434a77b8a373e69287c95b60de2 upstream. Update the USB serial option driver to support Longsung U8300/U9300. For U8300 Interface 4 is used by for QMI interface in stock firmware of U8300, the router which uses U8300 modem. Interface 5 is used by for ADB interface in stock firmware of U8300, the router which uses U8300 modem. Interface mapping is: 0: unknown (Debug), 1: AT (Modem), 2: AT, 3: PPP (NDIS / Pipe), 4: QMI, 5: ADB T: Bus=05 Lev=01 Prnt=03 Port=02 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1c9e ProdID=9b05 Rev=03.18 S: Manufacturer=Android S: Product=Android C: #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms For U9300 Interface 1 is used by for ADB interface in stock firmware of U9300, the router which uses U9300 modem. Interface 4 is used by for QMI interface in stock firmware of U9300, the router which uses U9300 modem. Interface mapping is: 0: ADB, 1: AT (Modem), 2: AT, 3: PPP (NDIS / Pipe), 4: QMI Note: Interface 3 of some models of the U9300 series can send AT commands. T: Bus=05 Lev=01 Prnt=05 Port=04 Cnt=01 Dev#= 6 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1c9e ProdID=9b3c Rev=03.18 S: Manufacturer=Android S: Product=Android C: #Ifs= 5 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms Tested successfully using Modem Manager on U9300. Tested successfully AT commands using If=1, If=2 and If=3 on U9300. Signed-off-by: Coia Prant Reviewed-by: Lars Melin [ johan: drop product defines, trim commit message ] Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index f05d94a9f067..a8cc7e452f0e 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2052,6 +2052,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9803, 0xff), .driver_info = RSVD(4) }, + { USB_DEVICE(LONGCHEER_VENDOR_ID, 0x9b05), /* Longsung U8300 */ + .driver_info = RSVD(4) | RSVD(5) }, + { USB_DEVICE(LONGCHEER_VENDOR_ID, 0x9b3c), /* Longsung U9300 */ + .driver_info = RSVD(0) | RSVD(4) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) }, { USB_DEVICE(HAIER_VENDOR_ID, HAIER_PRODUCT_CE100) }, From 25a299c566bf40a1315ebc705edcc47ffa12bf1b Mon Sep 17 00:00:00 2001 From: Jerry Meng Date: Mon, 15 Apr 2024 15:04:29 +0800 Subject: [PATCH 399/553] USB: serial: option: support Quectel EM060K sub-models commit c840244aba7ad2b83ed904378b36bd6aef25511c upstream. EM060K_129, EM060K_12a, EM060K_12b and EM0060K_12c are EM060K's sub-models, having the same name "Quectel EM060K-GL" and the same interface layout. MBIM + GNSS + DIAG + NMEA + AT + QDSS + DPL T: Bus=03 Lev=01 Prnt=01 Port=01 Cnt=02 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0129 Rev= 5.04 S: Manufacturer=Quectel S: Product=Quectel EM060K-GL S: SerialNumber=f6fa08b6 C:* #Ifs= 8 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 7 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=8f(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Jerry Meng Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index a8cc7e452f0e..b3da92344848 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -255,6 +255,10 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM061K_LMS 0x0124 #define QUECTEL_PRODUCT_EC25 0x0125 #define QUECTEL_PRODUCT_EM060K_128 0x0128 +#define QUECTEL_PRODUCT_EM060K_129 0x0129 +#define QUECTEL_PRODUCT_EM060K_12a 0x012a +#define QUECTEL_PRODUCT_EM060K_12b 0x012b +#define QUECTEL_PRODUCT_EM060K_12c 0x012c #define QUECTEL_PRODUCT_EG91 0x0191 #define QUECTEL_PRODUCT_EG95 0x0195 #define QUECTEL_PRODUCT_BG96 0x0296 @@ -1218,6 +1222,18 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0x00, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0x00, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x40) }, From 19f98f214bd6214a91f395541b29c3017c1a2515 Mon Sep 17 00:00:00 2001 From: Vanillan Wang Date: Tue, 16 Apr 2024 18:02:55 +0800 Subject: [PATCH 400/553] USB: serial: option: add Rolling RW101-GL and RW135-GL support commit 311f97a4c7c22a01f8897bddf00428dfd0668e79 upstream. Update the USB serial option driver support for the Rolling LTE modules. - VID:PID 33f8:01a2, RW101-GL for laptop debug M.2 cards(with MBIM interface for /Linux/Chrome OS) 0x01a2: mbim, diag, at, pipe - VID:PID 33f8:01a3, RW101-GL for laptop debug M.2 cards(with MBIM interface for /Linux/Chrome OS) 0x01a3: mbim, pipe - VID:PID 33f8:01a4, RW101-GL for laptop debug M.2 cards(with MBIM interface for /Linux/Chrome OS) 0x01a4: mbim, diag, at, pipe - VID:PID 33f8:0104, RW101-GL for laptop debug M.2 cards(with RMNET interface for /Linux/Chrome OS) 0x0104: RMNET, diag, at, pipe - VID:PID 33f8:0115, RW135-GL for laptop debug M.2 cards(with MBIM interface for /Linux/Chrome OS) 0x0115: MBIM, diag, at, pipe Here are the outputs of usb-devices: T: Bus=01 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=33f8 ProdID=01a2 Rev=05.15 S: Manufacturer=Rolling Wireless S.a.r.l. S: Product=Rolling Module S: SerialNumber=12345678 C: #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms T: Bus=01 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=33f8 ProdID=01a3 Rev=05.15 S: Manufacturer=Rolling Wireless S.a.r.l. S: Product=Rolling Module S: SerialNumber=12345678 C: #Ifs= 3 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms T: Bus=01 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 17 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=33f8 ProdID=01a4 Rev=05.15 S: Manufacturer=Rolling Wireless S.a.r.l. S: Product=Rolling Module S: SerialNumber=12345678 C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=33f8 ProdID=0104 Rev=05.04 S: Manufacturer=Rolling Wireless S.a.r.l. S: Product=Rolling Module S: SerialNumber=ba2eb033 C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=05(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms T: Bus=01 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=33f8 ProdID=0115 Rev=05.15 S: Manufacturer=Rolling Wireless S.a.r.l. S: Product=Rolling Module S: SerialNumber=12345678 C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Vanillan Wang Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index b3da92344848..17be469837fc 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2307,6 +2307,14 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ + { USB_DEVICE(0x33f8, 0x0104), /* Rolling RW101-GL (laptop RMNET) */ + .driver_info = RSVD(4) | RSVD(5) }, + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a2, 0xff) }, /* Rolling RW101-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a3, 0xff) }, /* Rolling RW101-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a4, 0xff), /* Rolling RW101-GL (laptop MBIM) */ + .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0115, 0xff), /* Rolling RW135-GL (laptop MBIM) */ + .driver_info = RSVD(5) }, { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) }, From 4ed7c7720aa0036c168add333e652c986de81b08 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Thu, 18 Apr 2024 13:34:30 +0200 Subject: [PATCH 401/553] USB: serial: option: add Telit FN920C04 rmnet compositions commit 582ee2f9d268d302595db3e36b985e5cbb93284d upstream. Add the following Telit FN920C04 compositions: 0x10a0: rmnet + tty (AT/NMEA) + tty (AT) + tty (diag) T: Bus=03 Lev=01 Prnt=03 Port=06 Cnt=01 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10a0 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN920 S: SerialNumber=92c4c4d8 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10a4: rmnet + tty (AT) + tty (AT) + tty (diag) T: Bus=03 Lev=01 Prnt=03 Port=06 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10a4 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN920 S: SerialNumber=92c4c4d8 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10a9: rmnet + tty (AT) + tty (diag) + DPL (data packet logging) + adb T: Bus=03 Lev=01 Prnt=03 Port=06 Cnt=01 Dev#= 9 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10a9 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN920 S: SerialNumber=92c4c4d8 C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Daniele Palmas Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 17be469837fc..b5ee8518fcc7 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1376,6 +1376,12 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1083, 0xff), /* Telit FE990 (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a0, 0xff), /* Telit FN20C04 (rmnet) */ + .driver_info = RSVD(0) | NCTRL(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a4, 0xff), /* Telit FN20C04 (rmnet) */ + .driver_info = RSVD(0) | NCTRL(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a9, 0xff), /* Telit FN20C04 (rmnet) */ + .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), From 8672ad663a22d0e4a325bb7d817b36ec412b967c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 18 Apr 2024 16:33:28 +0200 Subject: [PATCH 402/553] Revert "usb: cdc-wdm: close race between read and workqueue" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1607830dadeefc407e4956336d9fcd9e9defd810 upstream. This reverts commit 339f83612f3a569b194680768b22bf113c26a29d. It has been found to cause problems in a number of Chromebook devices, so revert the change until it can be brought back in a safe way. Link: https://lore.kernel.org/r/385a3519-b45d-48c5-a6fd-a3fdb6bec92f@chromium.org Reported-by:: Aleksander Morgado Fixes: 339f83612f3a ("usb: cdc-wdm: close race between read and workqueue") Cc: stable Cc: Oliver Neukum Cc: Bjørn Mork Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index fdc1a66b129a..1f0951be15ab 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -485,7 +485,6 @@ out_free_mem: static int service_outstanding_interrupt(struct wdm_device *desc) { int rv = 0; - int used; /* submit read urb only if the device is waiting for it */ if (!desc->resp_count || !--desc->resp_count) @@ -500,10 +499,7 @@ static int service_outstanding_interrupt(struct wdm_device *desc) goto out; } - used = test_and_set_bit(WDM_RESPONDING, &desc->flags); - if (used) - goto out; - + set_bit(WDM_RESPONDING, &desc->flags); spin_unlock_irq(&desc->iuspin); rv = usb_submit_urb(desc->response, GFP_KERNEL); spin_lock_irq(&desc->iuspin); From 9de10b59d16880a0a3ae2876c142fe54ce45d816 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Tue, 9 Apr 2024 12:27:54 +0000 Subject: [PATCH 403/553] usb: dwc2: host: Fix dereference issue in DDMA completion flow. commit eed04fa96c48790c1cce73c8a248e9d460b088f8 upstream. Fixed variable dereference issue in DDMA completion flow. Fixes: b258e4268850 ("usb: dwc2: host: Fix ISOC flow in DDMA mode") CC: stable@vger.kernel.org Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-usb/2024040834-ethically-rumble-701f@gregkh/T/#m4c4b83bef0ebb4b67fe2e0a7d6466cbb6f416e39 Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/cc826d3ef53c934d8e6d98870f17f3cdc3d2755d.1712665387.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/hcd_ddma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/hcd_ddma.c b/drivers/usb/dwc2/hcd_ddma.c index 79582b102c7e..994a78ad084b 100644 --- a/drivers/usb/dwc2/hcd_ddma.c +++ b/drivers/usb/dwc2/hcd_ddma.c @@ -867,13 +867,15 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg, struct dwc2_dma_desc *dma_desc; struct dwc2_hcd_iso_packet_desc *frame_desc; u16 frame_desc_idx; - struct urb *usb_urb = qtd->urb->priv; + struct urb *usb_urb; u16 remain = 0; int rc = 0; if (!qtd->urb) return -EINVAL; + usb_urb = qtd->urb->priv; + dma_sync_single_for_cpu(hsotg->dev, qh->desc_list_dma + (idx * sizeof(struct dwc2_dma_desc)), sizeof(struct dwc2_dma_desc), From a676b17edb52afe3b06eaa80fcb3f1dd2df7c460 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 5 Mar 2024 14:51:38 +0800 Subject: [PATCH 404/553] usb: Disable USB3 LPM at shutdown commit d920a2ed8620be04a3301e1a9c2b7cc1de65f19d upstream. SanDisks USB3 storage may disapper after system reboot: usb usb2-port3: link state change xhci_hcd 0000:00:14.0: clear port3 link state change, portsc: 0x2c0 usb usb2-port3: do warm reset, port only xhci_hcd 0000:00:14.0: xhci_hub_status_data: stopping usb2 port polling xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x2b0, return 0x2b0 usb usb2-port3: not warm reset yet, waiting 50ms xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x2f0, return 0x2f0 usb usb2-port3: not warm reset yet, waiting 200ms ... xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x6802c0, return 0x7002c0 usb usb2-port3: not warm reset yet, waiting 200ms xhci_hcd 0000:00:14.0: clear port3 reset change, portsc: 0x4802c0 xhci_hcd 0000:00:14.0: clear port3 warm(BH) reset change, portsc: 0x4002c0 xhci_hcd 0000:00:14.0: clear port3 link state change, portsc: 0x2c0 xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x2c0, return 0x2c0 usb usb2-port3: not enabled, trying warm reset again... This is due to the USB device still cause port change event after xHCI is shuted down: xhci_hcd 0000:38:00.0: // Setting command ring address to 0xffffe001 xhci_hcd 0000:38:00.0: xhci_resume: starting usb3 port polling. xhci_hcd 0000:38:00.0: xhci_hub_status_data: stopping usb4 port polling xhci_hcd 0000:38:00.0: xhci_hub_status_data: stopping usb3 port polling xhci_hcd 0000:38:00.0: hcd_pci_runtime_resume: 0 xhci_hcd 0000:38:00.0: xhci_shutdown: stopping usb3 port polling. xhci_hcd 0000:38:00.0: // Halt the HC xhci_hcd 0000:38:00.0: xhci_shutdown completed - status = 1 xhci_hcd 0000:00:14.0: xhci_shutdown: stopping usb1 port polling. xhci_hcd 0000:00:14.0: // Halt the HC xhci_hcd 0000:00:14.0: xhci_shutdown completed - status = 1 xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x1203, return 0x203 xhci_hcd 0000:00:14.0: set port reset, actual port 2-3 status = 0x1311 xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x201203, return 0x100203 xhci_hcd 0000:00:14.0: clear port3 reset change, portsc: 0x1203 xhci_hcd 0000:00:14.0: clear port3 warm(BH) reset change, portsc: 0x1203 xhci_hcd 0000:00:14.0: clear port3 link state change, portsc: 0x1203 xhci_hcd 0000:00:14.0: clear port3 connect change, portsc: 0x1203 xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x1203, return 0x203 usb 2-3: device not accepting address 2, error -108 xhci_hcd 0000:00:14.0: xHCI dying or halted, can't queue_command xhci_hcd 0000:00:14.0: Set port 2-3 link state, portsc: 0x1203, write 0x11261 xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x1263, return 0x263 xhci_hcd 0000:00:14.0: set port reset, actual port 2-3 status = 0x1271 xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x12b1, return 0x2b1 usb usb2-port3: not reset yet, waiting 60ms ACPI: PM: Preparing to enter system sleep state S5 xhci_hcd 0000:00:14.0: Get port status 2-3 read: 0x12f1, return 0x2f1 usb usb2-port3: not reset yet, waiting 200ms reboot: Restarting system The port change event is caused by LPM transition, so disabling LPM at shutdown to make sure the device is in U0 for warmboot. Signed-off-by: Kai-Heng Feng Cc: stable Link: https://lore.kernel.org/r/20240305065140.66801-1-kai.heng.feng@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/port.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index e91fa567d08d..93a63b7f164d 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -409,8 +409,10 @@ static void usb_port_shutdown(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); - if (port_dev->child) + if (port_dev->child) { usb_disable_usb2_hardware_lpm(port_dev->child); + usb_unlocked_disable_lpm(port_dev->child); + } } static const struct dev_pm_ops usb_port_pm_ops = { From 0588bbbd718a8130b98c54518f1e0b569ce60a93 Mon Sep 17 00:00:00 2001 From: Norihiko Hama Date: Wed, 27 Mar 2024 11:35:50 +0900 Subject: [PATCH 405/553] usb: gadget: f_ncm: Fix UAF ncm object at re-bind after usb ep transport error commit 6334b8e4553cc69f51e383c9de545082213d785e upstream. When ncm function is working and then stop usb0 interface for link down, eth_stop() is called. At this piont, accidentally if usb transport error should happen in usb_ep_enable(), 'in_ep' and/or 'out_ep' may not be enabled. After that, ncm_disable() is called to disable for ncm unbind but gether_disconnect() is never called since 'in_ep' is not enabled. As the result, ncm object is released in ncm unbind but 'dev->port_usb' associated to 'ncm->port' is not NULL. And when ncm bind again to recover netdev, ncm object is reallocated but usb0 interface is already associated to previous released ncm object. Therefore, once usb0 interface is up and eth_start_xmit() is called, released ncm object is dereferrenced and it might cause use-after-free memory. [function unlink via configfs] usb0: eth_stop dev->port_usb=ffffff9b179c3200 --> error happens in usb_ep_enable(). NCM: ncm_disable: ncm=ffffff9b179c3200 --> no gether_disconnect() since ncm->port.in_ep->enabled is false. NCM: ncm_unbind: ncm unbind ncm=ffffff9b179c3200 NCM: ncm_free: ncm free ncm=ffffff9b179c3200 <-- released ncm [function link via configfs] NCM: ncm_alloc: ncm alloc ncm=ffffff9ac4f8a000 NCM: ncm_bind: ncm bind ncm=ffffff9ac4f8a000 NCM: ncm_set_alt: ncm=ffffff9ac4f8a000 alt=0 usb0: eth_open dev->port_usb=ffffff9b179c3200 <-- previous released ncm usb0: eth_start dev->port_usb=ffffff9b179c3200 <-- eth_start_xmit() --> dev->wrap() Unable to handle kernel paging request at virtual address dead00000000014f This patch addresses the issue by checking if 'ncm->netdev' is not NULL at ncm_disable() to call gether_disconnect() to deassociate 'dev->port_usb'. It's more reasonable to check 'ncm->netdev' to call gether_connect/disconnect rather than check 'ncm->port.in_ep->enabled' since it might not be enabled but the gether connection might be established. Signed-off-by: Norihiko Hama Cc: stable Link: https://lore.kernel.org/r/20240327023550.51214-1-Norihiko.Hama@alpsalpine.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index 14601a2d2542..b267ed9dc6d9 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -884,7 +884,7 @@ static int ncm_set_alt(struct usb_function *f, unsigned intf, unsigned alt) if (alt > 1) goto fail; - if (ncm->port.in_ep->enabled) { + if (ncm->netdev) { DBG(cdev, "reset ncm\n"); ncm->netdev = NULL; gether_disconnect(&ncm->port); @@ -1369,7 +1369,7 @@ static void ncm_disable(struct usb_function *f) DBG(cdev, "ncm deactivated\n"); - if (ncm->port.in_ep->enabled) { + if (ncm->netdev) { ncm->netdev = NULL; gether_disconnect(&ncm->port); } From 7c6f941492b7f1900dbf81c53058d3f848c39861 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 12 Mar 2024 07:19:58 +0200 Subject: [PATCH 406/553] mei: me: disable RPL-S on SPS and IGN firmwares commit 0dc04112bee6fdd6eb847ccb32214703022c0269 upstream. Extend the quirk to disable MEI interface on Intel PCH Ignition (IGN) and SPS firmwares for RPL-S devices. These firmwares do not support the MEI protocol. Fixes: 3ed8c7d39cfe ("mei: me: add raptor lake point S DID") Cc: stable@vger.kernel.org Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20240312051958.118478-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/pci-me.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index f8219cbd2c7c..a617f64a351d 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -116,7 +116,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_SPS_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)}, From 89af25bd4b4bf6a71295f07e07a8ae7dc03c6595 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sat, 23 Mar 2024 17:48:43 +0100 Subject: [PATCH 407/553] speakup: Avoid crash on very long word commit c8d2f34ea96ea3bce6ba2535f867f0d4ee3b22e1 upstream. In case a console is set up really large and contains a really long word (> 256 characters), we have to stop before the length of the word buffer. Signed-off-by: Samuel Thibault Fixes: c6e3fd22cd538 ("Staging: add speakup to the staging directory") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240323164843.1426997-1-samuel.thibault@ens-lyon.org Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c index 73db0cb44fc7..45d906f17ea3 100644 --- a/drivers/accessibility/speakup/main.c +++ b/drivers/accessibility/speakup/main.c @@ -573,7 +573,7 @@ static u_long get_word(struct vc_data *vc) } attr_ch = get_char(vc, (u_short *)tmp_pos, &spk_attr); buf[cnt++] = attr_ch; - while (tmpx < vc->vc_cols - 1) { + while (tmpx < vc->vc_cols - 1 && cnt < sizeof(buf) - 1) { tmp_pos += 2; tmpx++; ch = get_char(vc, (u_short *)tmp_pos, &temp); From 5d43e072285e81b0b63cee7189b3357c7768a43b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 13 Mar 2024 17:43:41 -0400 Subject: [PATCH 408/553] fs: sysfs: Fix reference leak in sysfs_break_active_protection() commit a90bca2228c0646fc29a72689d308e5fe03e6d78 upstream. The sysfs_break_active_protection() routine has an obvious reference leak in its error path. If the call to kernfs_find_and_get() fails then kn will be NULL, so the companion sysfs_unbreak_active_protection() routine won't get called (and would only cause an access violation by trying to dereference kn->parent if it was called). As a result, the reference to kobj acquired at the start of the function will never be released. Fix the leak by adding an explicit kobject_put() call when kn is NULL. Signed-off-by: Alan Stern Fixes: 2afc9166f79b ("scsi: sysfs: Introduce sysfs_{un,}break_active_protection()") Cc: Bart Van Assche Cc: stable@vger.kernel.org Reviewed-by: Bart Van Assche Acked-by: Tejun Heo Link: https://lore.kernel.org/r/8a4d3f0f-c5e3-4b70-a188-0ca433f9e6f9@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index a12ac0356c69..f21e73d10724 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -450,6 +450,8 @@ struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, kn = kernfs_find_and_get(kobj->sd, attr->name); if (kn) kernfs_break_active_protection(kn); + else + kobject_put(kobj); return kn; } EXPORT_SYMBOL_GPL(sysfs_break_active_protection); From e487b8eccf67c0703299e977d2a6d8a0ce0a8eae Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 5 Apr 2024 16:55:54 -0700 Subject: [PATCH 409/553] KVM: x86: Snapshot if a vCPU's vendor model is AMD vs. Intel compatible commit fd706c9b1674e2858766bfbf7430534c2b26fbef upstream. Add kvm_vcpu_arch.is_amd_compatible to cache if a vCPU's vendor model is compatible with AMD, i.e. if the vCPU vendor is AMD or Hygon, along with helpers to check if a vCPU is compatible AMD vs. Intel. To handle Intel vs. AMD behavior related to masking the LVTPC entry, KVM will need to check for vendor compatibility on every PMI injection, i.e. querying for AMD will soon be a moderately hot path. Note! This subtly (or maybe not-so-subtly) makes "Intel compatible" KVM's default behavior, both if userspace omits (or never sets) CPUID 0x0 and if userspace sets a completely unknown vendor. One could argue that KVM should treat such vCPUs as not being compatible with Intel *or* AMD, but that would add useless complexity to KVM. KVM needs to do *something* in the face of vendor specific behavior, and so unless KVM conjured up a magic third option, choosing to treat unknown vendors as neither Intel nor AMD means that checks on AMD compatibility would yield Intel behavior, and checks for Intel compatibility would yield AMD behavior. And that's far worse as it would effectively yield random behavior depending on whether KVM checked for AMD vs. Intel vs. !AMD vs. !Intel. And practically speaking, all x86 CPUs follow either Intel or AMD architecture, i.e. "supporting" an unknown third architecture adds no value. Deliberately don't convert any of the existing guest_cpuid_is_intel() checks, as the Intel side of things is messier due to some flows explicitly checking for exactly vendor==Intel, versus some flows assuming anything that isn't "AMD compatible" gets Intel behavior. The Intel code will be cleaned up in the future. Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-ID: <20240405235603.1173076-2-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/cpuid.c | 1 + arch/x86/kvm/cpuid.h | 10 ++++++++++ arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/x86.c | 2 +- 5 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dfcdcafe3a2c..887a171488ea 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -773,6 +773,7 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; u32 kvm_cpuid_base; + bool is_amd_compatible; u64 reserved_gpa_bits; int maxphyaddr; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 62a44455c51d..f02961cbbb75 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -340,6 +340,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_update_pv_runtime(vcpu); + vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index b1658c0de847..18fd2e845989 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -125,6 +125,16 @@ static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu) return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx); } +static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.is_amd_compatible; +} + +static inline bool guest_cpuid_is_intel_compatible(struct kvm_vcpu *vcpu) +{ + return !guest_cpuid_is_amd_compatible(vcpu); +} + static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index d30325e297a0..13134954e24d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4649,7 +4649,7 @@ static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu, context->cpu_role.base.level, is_efer_nx(context), guest_can_use_gbpages(vcpu), is_cr4_pse(context), - guest_cpuid_is_amd_or_hygon(vcpu)); + guest_cpuid_is_amd_compatible(vcpu)); } static void diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f724765032bc..a2ea636a2308 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3278,7 +3278,7 @@ static bool is_mci_status_msr(u32 msr) static bool can_set_mci_status(struct kvm_vcpu *vcpu) { /* McStatusWrEn enabled? */ - if (guest_cpuid_is_amd_or_hygon(vcpu)) + if (guest_cpuid_is_amd_compatible(vcpu)) return !!(vcpu->arch.msr_hwcr & BIT_ULL(18)); return false; From 0fb74c00d140a66128afc0003785dcc57e69d312 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 6 Mar 2024 16:58:33 -0800 Subject: [PATCH 410/553] KVM: x86/pmu: Disable support for adaptive PEBS commit 9e985cbf2942a1bb8fcef9adc2a17d90fd7ca8ee upstream. Drop support for virtualizing adaptive PEBS, as KVM's implementation is architecturally broken without an obvious/easy path forward, and because exposing adaptive PEBS can leak host LBRs to the guest, i.e. can leak host kernel addresses to the guest. Bug #1 is that KVM doesn't account for the upper 32 bits of IA32_FIXED_CTR_CTRL when (re)programming fixed counters, e.g fixed_ctrl_field() drops the upper bits, reprogram_fixed_counters() stores local variables as u8s and truncates the upper bits too, etc. Bug #2 is that, because KVM _always_ sets precise_ip to a non-zero value for PEBS events, perf will _always_ generate an adaptive record, even if the guest requested a basic record. Note, KVM will also enable adaptive PEBS in individual *counter*, even if adaptive PEBS isn't exposed to the guest, but this is benign as MSR_PEBS_DATA_CFG is guaranteed to be zero, i.e. the guest will only ever see Basic records. Bug #3 is in perf. intel_pmu_disable_fixed() doesn't clear the upper bits either, i.e. leaves ICL_FIXED_0_ADAPTIVE set, and intel_pmu_enable_fixed() effectively doesn't clear ICL_FIXED_0_ADAPTIVE either. I.e. perf _always_ enables ADAPTIVE counters, regardless of what KVM requests. Bug #4 is that adaptive PEBS *might* effectively bypass event filters set by the host, as "Updated Memory Access Info Group" records information that might be disallowed by userspace via KVM_SET_PMU_EVENT_FILTER. Bug #5 is that KVM doesn't ensure LBR MSRs hold guest values (or at least zeros) when entering a vCPU with adaptive PEBS, which allows the guest to read host LBRs, i.e. host RIPs/addresses, by enabling "LBR Entries" records. Disable adaptive PEBS support as an immediate fix due to the severity of the LBR leak in particular, and because fixing all of the bugs will be non-trivial, e.g. not suitable for backporting to stable kernels. Note! This will break live migration, but trying to make KVM play nice with live migration would be quite complicated, wouldn't be guaranteed to work (i.e. KVM might still kill/confuse the guest), and it's not clear that there are any publicly available VMMs that support adaptive PEBS, let alone live migrate VMs that support adaptive PEBS, e.g. QEMU doesn't support PEBS in any capacity. Link: https://lore.kernel.org/all/20240306230153.786365-1-seanjc@google.com Link: https://lore.kernel.org/all/ZeepGjHCeSfadANM@google.com Fixes: c59a1f106f5c ("KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS") Cc: stable@vger.kernel.org Cc: Like Xu Cc: Mingwei Zhang Cc: Zhenyu Wang Cc: Zhang Xiong Cc: Lv Zhiyuan Cc: Dapeng Mi Cc: Jim Mattson Acked-by: Like Xu Link: https://lore.kernel.org/r/20240307005833.827147-1-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/vmx.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5c1590855ffc..10aff2c9a4e4 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7742,8 +7742,28 @@ static u64 vmx_get_perf_capabilities(void) if (vmx_pebs_supported()) { perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; - if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4) - perf_cap &= ~PERF_CAP_PEBS_BASELINE; + + /* + * Disallow adaptive PEBS as it is functionally broken, can be + * used by the guest to read *host* LBRs, and can be used to + * bypass userspace event filters. To correctly and safely + * support adaptive PEBS, KVM needs to: + * + * 1. Account for the ADAPTIVE flag when (re)programming fixed + * counters. + * + * 2. Gain support from perf (or take direct control of counter + * programming) to support events without adaptive PEBS + * enabled for the hardware counter. + * + * 3. Ensure LBR MSRs cannot hold host data on VM-Entry with + * adaptive PEBS enabled and MSR_PEBS_DATA_CFG.LBRS=1. + * + * 4. Document which PMU events are effectively exposed to the + * guest via adaptive PEBS, and make adaptive PEBS mutually + * exclusive with KVM_SET_PMU_EVENT_FILTER if necessary. + */ + perf_cap &= ~PERF_CAP_PEBS_BASELINE; } return perf_cap; From e09465aeccf1b1209ecbdb1acd914d944a4625b5 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Fri, 5 Apr 2024 16:55:55 -0700 Subject: [PATCH 411/553] KVM: x86/pmu: Do not mask LVTPC when handling a PMI on AMD platforms commit 49ff3b4aec51e3abfc9369997cc603319b02af9a upstream. On AMD and Hygon platforms, the local APIC does not automatically set the mask bit of the LVTPC register when handling a PMI and there is no need to clear it in the kernel's PMI handler. For guests, the mask bit is currently set by kvm_apic_local_deliver() and unless it is cleared by the guest kernel's PMI handler, PMIs stop arriving and break use-cases like sampling with perf record. This does not affect non-PerfMonV2 guests because PMIs are handled in the guest kernel by x86_pmu_handle_irq() which always clears the LVTPC mask bit irrespective of the vendor. Before: $ perf record -e cycles:u true [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (1 samples) ] After: $ perf record -e cycles:u true [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (19 samples) ] Fixes: a16eb25b09c0 ("KVM: x86: Mask LVTPC when handling a PMI") Cc: stable@vger.kernel.org Signed-off-by: Sandipan Das Reviewed-by: Jim Mattson [sean: use is_intel_compatible instead of !is_amd_or_hygon()] Signed-off-by: Sean Christopherson Message-ID: <20240405235603.1173076-3-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bfeafe485552..c90fef0258c5 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2548,7 +2548,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL); - if (r && lvt_type == APIC_LVTPC) + if (r && lvt_type == APIC_LVTPC && + guest_cpuid_is_intel_compatible(apic->vcpu)) kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED); return r; } From f7e71a7cf399f53ff9fc314ca3836dc913b05bd6 Mon Sep 17 00:00:00 2001 From: Yaxiong Tian Date: Wed, 17 Apr 2024 10:52:48 +0800 Subject: [PATCH 412/553] arm64: hibernate: Fix level3 translation fault in swsusp_save() commit 50449ca66cc5a8cbc64749cf4b9f3d3fc5f4b457 upstream. On arm64 machines, swsusp_save() faults if it attempts to access MEMBLOCK_NOMAP memory ranges. This can be reproduced in QEMU using UEFI when booting with rodata=off debug_pagealloc=off and CONFIG_KFENCE=n: Unable to handle kernel paging request at virtual address ffffff8000000000 Mem abort info: ESR = 0x0000000096000007 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x07: level 3 translation fault Data abort info: ISV = 0, ISS = 0x00000007, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 swapper pgtable: 4k pages, 39-bit VAs, pgdp=00000000eeb0b000 [ffffff8000000000] pgd=180000217fff9803, p4d=180000217fff9803, pud=180000217fff9803, pmd=180000217fff8803, pte=0000000000000000 Internal error: Oops: 0000000096000007 [#1] SMP Internal error: Oops: 0000000096000007 [#1] SMP Modules linked in: xt_multiport ipt_REJECT nf_reject_ipv4 xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c iptable_filter bpfilter rfkill at803x snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg dwmac_generic stmmac_platform snd_hda_codec stmmac joydev pcs_xpcs snd_hda_core phylink ppdev lp parport ramoops reed_solomon ip_tables x_tables nls_iso8859_1 vfat multipath linear amdgpu amdxcp drm_exec gpu_sched drm_buddy hid_generic usbhid hid radeon video drm_suballoc_helper drm_ttm_helper ttm i2c_algo_bit drm_display_helper cec drm_kms_helper drm CPU: 0 PID: 3663 Comm: systemd-sleep Not tainted 6.6.2+ #76 Source Version: 4e22ed63a0a48e7a7cff9b98b7806d8d4add7dc0 Hardware name: Greatwall GW-XXXXXX-XXX/GW-XXXXXX-XXX, BIOS KunLun BIOS V4.0 01/19/2021 pstate: 600003c5 (nZCv DAIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : swsusp_save+0x280/0x538 lr : swsusp_save+0x280/0x538 sp : ffffffa034a3fa40 x29: ffffffa034a3fa40 x28: ffffff8000001000 x27: 0000000000000000 x26: ffffff8001400000 x25: ffffffc08113e248 x24: 0000000000000000 x23: 0000000000080000 x22: ffffffc08113e280 x21: 00000000000c69f2 x20: ffffff8000000000 x19: ffffffc081ae2500 x18: 0000000000000000 x17: 6666662074736420 x16: 3030303030303030 x15: 3038666666666666 x14: 0000000000000b69 x13: ffffff9f89088530 x12: 00000000ffffffea x11: 00000000ffff7fff x10: 00000000ffff7fff x9 : ffffffc08193f0d0 x8 : 00000000000bffe8 x7 : c0000000ffff7fff x6 : 0000000000000001 x5 : ffffffa0fff09dc8 x4 : 0000000000000000 x3 : 0000000000000027 x2 : 0000000000000000 x1 : 0000000000000000 x0 : 000000000000004e Call trace: swsusp_save+0x280/0x538 swsusp_arch_suspend+0x148/0x190 hibernation_snapshot+0x240/0x39c hibernate+0xc4/0x378 state_store+0xf0/0x10c kobj_attr_store+0x14/0x24 The reason is swsusp_save() -> copy_data_pages() -> page_is_saveable() -> kernel_page_present() assuming that a page is always present when can_set_direct_map() is false (all of rodata_full, debug_pagealloc_enabled() and arm64_kfence_can_set_direct_map() false), irrespective of the MEMBLOCK_NOMAP ranges. Such MEMBLOCK_NOMAP regions should not be saved during hibernation. This problem was introduced by changes to the pfn_valid() logic in commit a7d9f306ba70 ("arm64: drop pfn_valid_within() and simplify pfn_valid()"). Similar to other architectures, drop the !can_set_direct_map() check in kernel_page_present() so that page_is_savable() skips such pages. Fixes: a7d9f306ba70 ("arm64: drop pfn_valid_within() and simplify pfn_valid()") Cc: # 5.14.x Suggested-by: Mike Rapoport Suggested-by: Catalin Marinas Co-developed-by: xiongxin Signed-off-by: xiongxin Signed-off-by: Yaxiong Tian Acked-by: Mike Rapoport (IBM) Link: https://lore.kernel.org/r/20240417025248.386622-1-tianyaxiong@kylinos.cn [catalin.marinas@arm.com: rework commit message] Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/pageattr.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 826cb200b204..425b398f8d45 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -220,9 +220,6 @@ bool kernel_page_present(struct page *page) pte_t *ptep; unsigned long addr = (unsigned long)page_address(page); - if (!can_set_direct_map()) - return true; - pgdp = pgd_offset_k(addr); if (pgd_none(READ_ONCE(*pgdp))) return false; From 76c2f4d426a5358fced5d5990744d46f10a4ccea Mon Sep 17 00:00:00 2001 From: Yuntao Wang Date: Fri, 12 Apr 2024 16:17:32 +0800 Subject: [PATCH 413/553] init/main.c: Fix potential static_command_line memory overflow commit 46dad3c1e57897ab9228332f03e1c14798d2d3b9 upstream. We allocate memory of size 'xlen + strlen(boot_command_line) + 1' for static_command_line, but the strings copied into static_command_line are extra_command_line and command_line, rather than extra_command_line and boot_command_line. When strlen(command_line) > strlen(boot_command_line), static_command_line will overflow. This patch just recovers strlen(command_line) which was miss-consolidated with strlen(boot_command_line) in the commit f5c7310ac73e ("init/main: add checks for the return value of memblock_alloc*()") Link: https://lore.kernel.org/all/20240412081733.35925-2-ytcoode@gmail.com/ Fixes: f5c7310ac73e ("init/main: add checks for the return value of memblock_alloc*()") Cc: stable@vger.kernel.org Signed-off-by: Yuntao Wang Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- init/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/init/main.c b/init/main.c index ccde19e7275f..2c339793511b 100644 --- a/init/main.c +++ b/init/main.c @@ -633,6 +633,8 @@ static void __init setup_command_line(char *command_line) if (!saved_command_line) panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen); + len = xlen + strlen(command_line) + 1; + static_command_line = memblock_alloc(len, SMP_CACHE_BYTES); if (!static_command_line) panic("%s: Failed to allocate %zu bytes\n", __func__, len); From 5ef7ba2799a3b5ed292b8f6407376e2c25ef002e Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sun, 7 Apr 2024 16:54:56 +0800 Subject: [PATCH 414/553] mm/memory-failure: fix deadlock when hugetlb_optimize_vmemmap is enabled commit 1983184c22dd84a4d95a71e5c6775c2638557dc7 upstream. When I did hard offline test with hugetlb pages, below deadlock occurs: ====================================================== WARNING: possible circular locking dependency detected 6.8.0-11409-gf6cef5f8c37f #1 Not tainted ------------------------------------------------------ bash/46904 is trying to acquire lock: ffffffffabe68910 (cpu_hotplug_lock){++++}-{0:0}, at: static_key_slow_dec+0x16/0x60 but task is already holding lock: ffffffffabf92ea8 (pcp_batch_high_lock){+.+.}-{3:3}, at: zone_pcp_disable+0x16/0x40 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (pcp_batch_high_lock){+.+.}-{3:3}: __mutex_lock+0x6c/0x770 page_alloc_cpu_online+0x3c/0x70 cpuhp_invoke_callback+0x397/0x5f0 __cpuhp_invoke_callback_range+0x71/0xe0 _cpu_up+0xeb/0x210 cpu_up+0x91/0xe0 cpuhp_bringup_mask+0x49/0xb0 bringup_nonboot_cpus+0xb7/0xe0 smp_init+0x25/0xa0 kernel_init_freeable+0x15f/0x3e0 kernel_init+0x15/0x1b0 ret_from_fork+0x2f/0x50 ret_from_fork_asm+0x1a/0x30 -> #0 (cpu_hotplug_lock){++++}-{0:0}: __lock_acquire+0x1298/0x1cd0 lock_acquire+0xc0/0x2b0 cpus_read_lock+0x2a/0xc0 static_key_slow_dec+0x16/0x60 __hugetlb_vmemmap_restore_folio+0x1b9/0x200 dissolve_free_huge_page+0x211/0x260 __page_handle_poison+0x45/0xc0 memory_failure+0x65e/0xc70 hard_offline_page_store+0x55/0xa0 kernfs_fop_write_iter+0x12c/0x1d0 vfs_write+0x387/0x550 ksys_write+0x64/0xe0 do_syscall_64+0xca/0x1e0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(pcp_batch_high_lock); lock(cpu_hotplug_lock); lock(pcp_batch_high_lock); rlock(cpu_hotplug_lock); *** DEADLOCK *** 5 locks held by bash/46904: #0: ffff98f6c3bb23f0 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x64/0xe0 #1: ffff98f6c328e488 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0xf8/0x1d0 #2: ffff98ef83b31890 (kn->active#113){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x100/0x1d0 #3: ffffffffabf9db48 (mf_mutex){+.+.}-{3:3}, at: memory_failure+0x44/0xc70 #4: ffffffffabf92ea8 (pcp_batch_high_lock){+.+.}-{3:3}, at: zone_pcp_disable+0x16/0x40 stack backtrace: CPU: 10 PID: 46904 Comm: bash Kdump: loaded Not tainted 6.8.0-11409-gf6cef5f8c37f #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x68/0xa0 check_noncircular+0x129/0x140 __lock_acquire+0x1298/0x1cd0 lock_acquire+0xc0/0x2b0 cpus_read_lock+0x2a/0xc0 static_key_slow_dec+0x16/0x60 __hugetlb_vmemmap_restore_folio+0x1b9/0x200 dissolve_free_huge_page+0x211/0x260 __page_handle_poison+0x45/0xc0 memory_failure+0x65e/0xc70 hard_offline_page_store+0x55/0xa0 kernfs_fop_write_iter+0x12c/0x1d0 vfs_write+0x387/0x550 ksys_write+0x64/0xe0 do_syscall_64+0xca/0x1e0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 RIP: 0033:0x7fc862314887 Code: 10 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 RSP: 002b:00007fff19311268 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007fc862314887 RDX: 000000000000000c RSI: 000056405645fe10 RDI: 0000000000000001 RBP: 000056405645fe10 R08: 00007fc8623d1460 R09: 000000007fffffff R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000c R13: 00007fc86241b780 R14: 00007fc862417600 R15: 00007fc862416a00 In short, below scene breaks the lock dependency chain: memory_failure __page_handle_poison zone_pcp_disable -- lock(pcp_batch_high_lock) dissolve_free_huge_page __hugetlb_vmemmap_restore_folio static_key_slow_dec cpus_read_lock -- rlock(cpu_hotplug_lock) Fix this by calling drain_all_pages() instead. This issue won't occur until commit a6b40850c442 ("mm: hugetlb: replace hugetlb_free_vmemmap_enabled with a static_key"). As it introduced rlock(cpu_hotplug_lock) in dissolve_free_huge_page() code path while lock(pcp_batch_high_lock) is already in the __page_handle_poison(). [linmiaohe@huawei.com: extend comment per Oscar] [akpm@linux-foundation.org: reflow block comment] Link: https://lkml.kernel.org/r/20240407085456.2798193-1-linmiaohe@huawei.com Fixes: a6b40850c442 ("mm: hugetlb: replace hugetlb_free_vmemmap_enabled with a static_key") Signed-off-by: Miaohe Lin Acked-by: Oscar Salvador Reviewed-by: Jane Chu Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/memory-failure.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 5b846ed5dcbe..be58ce999259 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -84,11 +84,23 @@ static int __page_handle_poison(struct page *page) { int ret; - zone_pcp_disable(page_zone(page)); + /* + * zone_pcp_disable() can't be used here. It will + * hold pcp_batch_high_lock and dissolve_free_huge_page() might hold + * cpu_hotplug_lock via static_key_slow_dec() when hugetlb vmemmap + * optimization is enabled. This will break current lock dependency + * chain and leads to deadlock. + * Disabling pcp before dissolving the page was a deterministic + * approach because we made sure that those pages cannot end up in any + * PCP list. Draining PCP lists expels those pages to the buddy system, + * but nothing guarantees that those pages do not get back to a PCP + * queue if we need to refill those. + */ ret = dissolve_free_huge_page(page); - if (!ret) + if (!ret) { + drain_all_pages(page_zone(page)); ret = take_page_off_buddy(page); - zone_pcp_enable(page_zone(page)); + } return ret; } From 212e3baccdb1939606420d88f7f52d346b49a284 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Thu, 11 Apr 2024 11:11:38 +0800 Subject: [PATCH 415/553] drm/amdgpu: validate the parameters of bo mapping operations more clearly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6fef2d4c00b5b8561ad68dd2b68173f5c6af1e75 upstream. Verify the parameters of amdgpu_vm_bo_(map/replace_map/clearing_mappings) in one common place. Fixes: dc54d3d1744d ("drm/amdgpu: implement AMDGPU_VA_OP_CLEAR v2") Cc: stable@vger.kernel.org Reported-by: Vlad Stolyarov Suggested-by: Christian König Signed-off-by: xinhui pan Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 72 ++++++++++++++++---------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 4c661e024e13..49a47807c42d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1400,6 +1400,37 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, trace_amdgpu_vm_bo_map(bo_va, mapping); } +/* Validate operation parameters to prevent potential abuse */ +static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev, + struct amdgpu_bo *bo, + uint64_t saddr, + uint64_t offset, + uint64_t size) +{ + uint64_t tmp, lpfn; + + if (saddr & AMDGPU_GPU_PAGE_MASK + || offset & AMDGPU_GPU_PAGE_MASK + || size & AMDGPU_GPU_PAGE_MASK) + return -EINVAL; + + if (check_add_overflow(saddr, size, &tmp) + || check_add_overflow(offset, size, &tmp) + || size == 0 /* which also leads to end < begin */) + return -EINVAL; + + /* make sure object fit at this offset */ + if (bo && offset + size > amdgpu_bo_size(bo)) + return -EINVAL; + + /* Ensure last pfn not exceed max_pfn */ + lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT; + if (lpfn >= adev->vm_manager.max_pfn) + return -EINVAL; + + return 0; +} + /** * amdgpu_vm_bo_map - map bo inside a vm * @@ -1426,21 +1457,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; + int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); if (tmp) { @@ -1493,17 +1517,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, uint64_t eaddr; int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; /* Allocate all the needed memory */ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); @@ -1517,7 +1533,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, } saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; mapping->start = saddr; mapping->last = eaddr; @@ -1604,10 +1620,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; LIST_HEAD(removed); uint64_t eaddr; + int r; + + r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size); + if (r) + return r; - eaddr = saddr + size - 1; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; /* Allocate all the needed memory */ before = kzalloc(sizeof(*before), GFP_KERNEL); From 8f79b42d1c213189a99c5de3e58081c205aaf47c Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 11 Apr 2024 22:55:11 -0400 Subject: [PATCH 416/553] drm/vmwgfx: Sort primary plane formats by order of preference commit d4c972bff3129a9dd4c22a3999fd8eba1a81531a upstream. The table of primary plane formats wasn't sorted at all, leading to applications picking our least desirable formats by defaults. Sort the primary plane formats according to our order of preference. Nice side-effect of this change is that it makes IGT's kms_atomic plane-invalid-params pass because the test picks the first format which for vmwgfx was DRM_FORMAT_XRGB1555 and uses fb's with odd sizes which make Pixman, which IGT depends on assert due to the fact that our 16bpp formats aren't 32 bit aligned like Pixman requires all formats to be. Signed-off-by: Zack Rusin Fixes: 36cc79bc9077 ("drm/vmwgfx: Add universal plane support") Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Cc: # v4.12+ Acked-by: Pekka Paalanen Link: https://patchwork.freedesktop.org/patch/msgid/20240412025511.78553-6-zack.rusin@broadcom.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index b02d2793659f..b116600b343a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -246,10 +246,10 @@ struct vmw_framebuffer_bo { static const uint32_t __maybe_unused vmw_primary_plane_formats[] = { - DRM_FORMAT_XRGB1555, - DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB1555, }; static const uint32_t __maybe_unused vmw_cursor_plane_formats[] = { From bcff1ed2ff1933be34b3737e5534c6650cd4e6e1 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 11 Apr 2024 22:55:10 -0400 Subject: [PATCH 417/553] drm/vmwgfx: Fix crtc's atomic check conditional commit a60ccade88f926e871a57176e86a34bbf0db0098 upstream. The conditional was supposed to prevent enabling of a crtc state without a set primary plane. Accidently it also prevented disabling crtc state with a set primary plane. Neither is correct. Fix the conditional and just driver-warn when a crtc state has been enabled without a primary plane which will help debug broken userspace. Fixes IGT's kms_atomic_interruptible and kms_atomic_transition tests. Signed-off-by: Zack Rusin Fixes: 06ec41909e31 ("drm/vmwgfx: Add and connect CRTC helper functions") Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Cc: # v4.12+ Reviewed-by: Ian Forbes Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20240412025511.78553-5-zack.rusin@broadcom.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index aa571b75cd07..b1aed051b41a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -793,6 +793,7 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane, int vmw_du_crtc_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state) { + struct vmw_private *vmw = vmw_priv(crtc->dev); struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state, crtc); struct vmw_display_unit *du = vmw_crtc_to_du(new_state->crtc); @@ -800,9 +801,13 @@ int vmw_du_crtc_atomic_check(struct drm_crtc *crtc, bool has_primary = new_state->plane_mask & drm_plane_mask(crtc->primary); - /* We always want to have an active plane with an active CRTC */ - if (has_primary != new_state->enable) - return -EINVAL; + /* + * This is fine in general, but broken userspace might expect + * some actual rendering so give a clue as why it's blank. + */ + if (new_state->enable && !has_primary) + drm_dbg_driver(&vmw->drm, + "CRTC without a primary plane will be blank.\n"); if (new_state->connector_mask != connector_mask && From ad74d208f213c06d860916ad40f609ade8c13039 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 11 Apr 2024 11:15:09 +1000 Subject: [PATCH 418/553] nouveau: fix instmem race condition around ptr stores commit fff1386cc889d8fb4089d285f883f8cba62d82ce upstream. Running a lot of VK CTS in parallel against nouveau, once every few hours you might see something like this crash. BUG: kernel NULL pointer dereference, address: 0000000000000008 PGD 8000000114e6e067 P4D 8000000114e6e067 PUD 109046067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 7 PID: 53891 Comm: deqp-vk Not tainted 6.8.0-rc6+ #27 Hardware name: Gigabyte Technology Co., Ltd. Z390 I AORUS PRO WIFI/Z390 I AORUS PRO WIFI-CF, BIOS F8 11/05/2021 RIP: 0010:gp100_vmm_pgt_mem+0xe3/0x180 [nouveau] Code: c7 48 01 c8 49 89 45 58 85 d2 0f 84 95 00 00 00 41 0f b7 46 12 49 8b 7e 08 89 da 42 8d 2c f8 48 8b 47 08 41 83 c7 01 48 89 ee <48> 8b 40 08 ff d0 0f 1f 00 49 8b 7e 08 48 89 d9 48 8d 75 04 48 c1 RSP: 0000:ffffac20c5857838 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 00000000004d8001 RCX: 0000000000000001 RDX: 00000000004d8001 RSI: 00000000000006d8 RDI: ffffa07afe332180 RBP: 00000000000006d8 R08: ffffac20c5857ad0 R09: 0000000000ffff10 R10: 0000000000000001 R11: ffffa07af27e2de0 R12: 000000000000001c R13: ffffac20c5857ad0 R14: ffffa07a96fe9040 R15: 000000000000001c FS: 00007fe395eed7c0(0000) GS:ffffa07e2c980000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000011febe001 CR4: 00000000003706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ... ? gp100_vmm_pgt_mem+0xe3/0x180 [nouveau] ? gp100_vmm_pgt_mem+0x37/0x180 [nouveau] nvkm_vmm_iter+0x351/0xa20 [nouveau] ? __pfx_nvkm_vmm_ref_ptes+0x10/0x10 [nouveau] ? __pfx_gp100_vmm_pgt_mem+0x10/0x10 [nouveau] ? __pfx_gp100_vmm_pgt_mem+0x10/0x10 [nouveau] ? __lock_acquire+0x3ed/0x2170 ? __pfx_gp100_vmm_pgt_mem+0x10/0x10 [nouveau] nvkm_vmm_ptes_get_map+0xc2/0x100 [nouveau] ? __pfx_nvkm_vmm_ref_ptes+0x10/0x10 [nouveau] ? __pfx_gp100_vmm_pgt_mem+0x10/0x10 [nouveau] nvkm_vmm_map_locked+0x224/0x3a0 [nouveau] Adding any sort of useful debug usually makes it go away, so I hand wrote the function in a line, and debugged the asm. Every so often pt->memory->ptrs is NULL. This ptrs ptr is set in the nv50_instobj_acquire called from nvkm_kmap. If Thread A and Thread B both get to nv50_instobj_acquire around the same time, and Thread A hits the refcount_set line, and in lockstep thread B succeeds at refcount_inc_not_zero, there is a chance the ptrs value won't have been stored since refcount_set is unordered. Force a memory barrier here, I picked smp_mb, since we want it on all CPUs and it's write followed by a read. v2: use paired smp_rmb/smp_wmb. Cc: Fixes: be55287aa5ba ("drm/nouveau/imem/nv50: embed nvkm_instobj directly into nv04_instobj") Signed-off-by: Dave Airlie Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240411011510.2546857-1-airlied@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c index c51bac76174c..9fe5b6a36ab9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c @@ -221,8 +221,11 @@ nv50_instobj_acquire(struct nvkm_memory *memory) void __iomem *map = NULL; /* Already mapped? */ - if (refcount_inc_not_zero(&iobj->maps)) + if (refcount_inc_not_zero(&iobj->maps)) { + /* read barrier match the wmb on refcount set */ + smp_rmb(); return iobj->map; + } /* Take the lock, and re-check that another thread hasn't * already mapped the object in the meantime. @@ -249,6 +252,8 @@ nv50_instobj_acquire(struct nvkm_memory *memory) iobj->base.memory.ptrs = &nv50_instobj_fast; else iobj->base.memory.ptrs = &nv50_instobj_slow; + /* barrier to ensure the ptrs are written before refcount is set */ + smp_wmb(); refcount_set(&iobj->maps, 1); } From 1e7feb31a18c197d63a5e606025ed63c762f8918 Mon Sep 17 00:00:00 2001 From: Qiang Zhang Date: Sun, 14 Apr 2024 19:49:45 +0800 Subject: [PATCH 419/553] bootconfig: use memblock_free_late to free xbc memory to buddy commit 89f9a1e876b5a7ad884918c03a46831af202c8a0 upstream. On the time to free xbc memory in xbc_exit(), memblock may has handed over memory to buddy allocator. So it doesn't make sense to free memory back to memblock. memblock_free() called by xbc_exit() even causes UAF bugs on architectures with CONFIG_ARCH_KEEP_MEMBLOCK disabled like x86. Following KASAN logs shows this case. This patch fixes the xbc memory free problem by calling memblock_free() in early xbc init error rewind path and calling memblock_free_late() in xbc exit path to free memory to buddy allocator. [ 9.410890] ================================================================== [ 9.418962] BUG: KASAN: use-after-free in memblock_isolate_range+0x12d/0x260 [ 9.426850] Read of size 8 at addr ffff88845dd30000 by task swapper/0/1 [ 9.435901] CPU: 9 PID: 1 Comm: swapper/0 Tainted: G U 6.9.0-rc3-00208-g586b5dfb51b9 #5 [ 9.446403] Hardware name: Intel Corporation RPLP LP5 (CPU:RaptorLake)/RPLP LP5 (ID:13), BIOS IRPPN02.01.01.00.00.19.015.D-00000000 Dec 28 2023 [ 9.460789] Call Trace: [ 9.463518] [ 9.465859] dump_stack_lvl+0x53/0x70 [ 9.469949] print_report+0xce/0x610 [ 9.473944] ? __virt_addr_valid+0xf5/0x1b0 [ 9.478619] ? memblock_isolate_range+0x12d/0x260 [ 9.483877] kasan_report+0xc6/0x100 [ 9.487870] ? memblock_isolate_range+0x12d/0x260 [ 9.493125] memblock_isolate_range+0x12d/0x260 [ 9.498187] memblock_phys_free+0xb4/0x160 [ 9.502762] ? __pfx_memblock_phys_free+0x10/0x10 [ 9.508021] ? mutex_unlock+0x7e/0xd0 [ 9.512111] ? __pfx_mutex_unlock+0x10/0x10 [ 9.516786] ? kernel_init_freeable+0x2d4/0x430 [ 9.521850] ? __pfx_kernel_init+0x10/0x10 [ 9.526426] xbc_exit+0x17/0x70 [ 9.529935] kernel_init+0x38/0x1e0 [ 9.533829] ? _raw_spin_unlock_irq+0xd/0x30 [ 9.538601] ret_from_fork+0x2c/0x50 [ 9.542596] ? __pfx_kernel_init+0x10/0x10 [ 9.547170] ret_from_fork_asm+0x1a/0x30 [ 9.551552] [ 9.555649] The buggy address belongs to the physical page: [ 9.561875] page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 pfn:0x45dd30 [ 9.570821] flags: 0x200000000000000(node=0|zone=2) [ 9.576271] page_type: 0xffffffff() [ 9.580167] raw: 0200000000000000 ffffea0011774c48 ffffea0012ba1848 0000000000000000 [ 9.588823] raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000 [ 9.597476] page dumped because: kasan: bad access detected [ 9.605362] Memory state around the buggy address: [ 9.610714] ffff88845dd2ff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 9.618786] ffff88845dd2ff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 9.626857] >ffff88845dd30000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 9.634930] ^ [ 9.638534] ffff88845dd30080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 9.646605] ffff88845dd30100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 9.654675] ================================================================== Link: https://lore.kernel.org/all/20240414114944.1012359-1-qiang4.zhang@linux.intel.com/ Fixes: 40caa127f3c7 ("init: bootconfig: Remove all bootconfig data when the init memory is removed") Cc: Stable@vger.kernel.org Signed-off-by: Qiang Zhang Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- include/linux/bootconfig.h | 7 ++++++- lib/bootconfig.c | 19 +++++++++++-------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index ca73940e26df..4195444ec45d 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -287,7 +287,12 @@ int __init xbc_init(const char *buf, size_t size, const char **emsg, int *epos); int __init xbc_get_info(int *node_size, size_t *data_size); /* XBC cleanup data structures */ -void __init xbc_exit(void); +void __init _xbc_exit(bool early); + +static inline void xbc_exit(void) +{ + _xbc_exit(false); +} /* XBC embedded bootconfig data in kernel */ #ifdef CONFIG_BOOT_CONFIG_EMBED diff --git a/lib/bootconfig.c b/lib/bootconfig.c index c59d26068a64..8841554432d5 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -61,9 +61,12 @@ static inline void * __init xbc_alloc_mem(size_t size) return memblock_alloc(size, SMP_CACHE_BYTES); } -static inline void __init xbc_free_mem(void *addr, size_t size) +static inline void __init xbc_free_mem(void *addr, size_t size, bool early) { - memblock_free(addr, size); + if (early) + memblock_free(addr, size); + else if (addr) + memblock_free_late(__pa(addr), size); } #else /* !__KERNEL__ */ @@ -73,7 +76,7 @@ static inline void *xbc_alloc_mem(size_t size) return malloc(size); } -static inline void xbc_free_mem(void *addr, size_t size) +static inline void xbc_free_mem(void *addr, size_t size, bool early) { free(addr); } @@ -904,13 +907,13 @@ static int __init xbc_parse_tree(void) * If you need to reuse xbc_init() with new boot config, you can * use this. */ -void __init xbc_exit(void) +void __init _xbc_exit(bool early) { - xbc_free_mem(xbc_data, xbc_data_size); + xbc_free_mem(xbc_data, xbc_data_size, early); xbc_data = NULL; xbc_data_size = 0; xbc_node_num = 0; - xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX); + xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX, early); xbc_nodes = NULL; brace_index = 0; } @@ -963,7 +966,7 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos) if (!xbc_nodes) { if (emsg) *emsg = "Failed to allocate bootconfig nodes"; - xbc_exit(); + _xbc_exit(true); return -ENOMEM; } memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX); @@ -977,7 +980,7 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos) *epos = xbc_err_pos; if (emsg) *emsg = xbc_err_msg; - xbc_exit(); + _xbc_exit(true); } else ret = xbc_node_num; From 897ac5306bbeb83e90c437326f7044c79a17c611 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Tue, 16 Apr 2024 03:20:48 +0900 Subject: [PATCH 420/553] nilfs2: fix OOB in nilfs_set_de_type commit c4a7dc9523b59b3e73fd522c73e95e072f876b16 upstream. The size of the nilfs_type_by_mode array in the fs/nilfs2/dir.c file is defined as "S_IFMT >> S_SHIFT", but the nilfs_set_de_type() function, which uses this array, specifies the index to read from the array in the same way as "(mode & S_IFMT) >> S_SHIFT". static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) { umode_t mode = inode->i_mode; de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; // oob } However, when the index is determined this way, an out-of-bounds (OOB) error occurs by referring to an index that is 1 larger than the array size when the condition "mode & S_IFMT == S_IFMT" is satisfied. Therefore, a patch to resize the nilfs_type_by_mode array should be applied to prevent OOB errors. Link: https://lkml.kernel.org/r/20240415182048.7144-1-konishi.ryusuke@gmail.com Reported-by: syzbot+2e22057de05b9f3b30d8@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2e22057de05b9f3b30d8 Fixes: 2ba466d74ed7 ("nilfs2: directory entry operations") Signed-off-by: Jeongjun Park Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index decd6471300b..760405da852f 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -243,7 +243,7 @@ nilfs_filetype_table[NILFS_FT_MAX] = { #define S_SHIFT 12 static unsigned char -nilfs_type_by_mode[S_IFMT >> S_SHIFT] = { +nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE, [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR, [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV, From 013c787d231188a6408e2991150d3c9bf9a2aa0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Sat, 17 Jun 2023 09:26:44 +0300 Subject: [PATCH 421/553] net: dsa: mt7530: set all CPU ports in MT7531_CPU_PMAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ff221029a51fd54cacac66e193e0c75e4de940e7 upstream. MT7531_CPU_PMAP represents the destination port mask for trapped-to-CPU frames (further restricted by PCR_MATRIX). Currently the driver sets the first CPU port as the single port in this bit mask, which works fine regardless of whether the device tree defines port 5, 6 or 5+6 as CPU ports. This is because the logic coincides with DSA's logic of picking the first CPU port as the CPU port that all user ports are affine to, by default. An upcoming change would like to influence DSA's selection of the default CPU port to no longer be the first one, and in that case, this logic needs adaptation. Since there is no observed leakage or duplication of frames if all CPU ports are defined in this bit mask, simply include them all. Suggested-by: Russell King (Oracle) Suggested-by: Vladimir Oltean Signed-off-by: Arınç ÜNAL Reviewed-by: Vladimir Oltean Reviewed-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Arınç ÜNAL Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mt7530.c | 15 +++++++-------- drivers/net/dsa/mt7530.h | 1 + 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index b5f61a9a378e..b5c36d220f28 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1236,6 +1236,13 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port) if (priv->id == ID_MT7530 || priv->id == ID_MT7621) mt7530_rmw(priv, MT7530_MFC, CPU_MASK, CPU_EN | CPU_PORT(port)); + /* Add the CPU port to the CPU port bitmap for MT7531. Trapped frames + * will be forwarded to the CPU port that is affine to the inbound user + * port. + */ + if (priv->id == ID_MT7531) + mt7530_set(priv, MT7531_CFC, MT7531_CPU_PMAP(BIT(port))); + /* CPU port gets connected to all user ports of * the switch. */ @@ -2534,16 +2541,8 @@ static int mt7531_setup_common(struct dsa_switch *ds) { struct mt7530_priv *priv = ds->priv; - struct dsa_port *cpu_dp; int ret, i; - /* BPDU to CPU port */ - dsa_switch_for_each_cpu_port(cpu_dp, ds) { - mt7530_rmw(priv, MT7531_CFC, MT7531_CPU_PMAP_MASK, - BIT(cpu_dp->index)); - break; - } - mt753x_trap_frames(priv); /* Enable and reset MIB counters */ diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index af18f47f2214..206966e46e81 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -57,6 +57,7 @@ enum mt753x_id { #define MT7531_MIRROR_PORT_GET(x) (((x) >> 16) & MIRROR_MASK) #define MT7531_MIRROR_PORT_SET(x) (((x) & MIRROR_MASK) << 16) #define MT7531_CPU_PMAP_MASK GENMASK(7, 0) +#define MT7531_CPU_PMAP(x) FIELD_PREP(MT7531_CPU_PMAP_MASK, x) #define MT753X_MIRROR_REG(id) (((id) == ID_MT7531) ? \ MT7531_CFC : MT7530_MFC) From d9c2f69cc12ce8d37e2f94a033e0c2c0bdedf558 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 17 Jun 2023 09:26:48 +0300 Subject: [PATCH 422/553] net: dsa: introduce preferred_default_local_cpu_port and use on MT7530 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b79d7c14f48083abb3fb061370c0c64a569edf4c upstream. Since the introduction of the OF bindings, DSA has always had a policy that in case multiple CPU ports are present in the device tree, the numerically smallest one is always chosen. The MT7530 switch family, except the switch on the MT7988 SoC, has 2 CPU ports, 5 and 6, where port 6 is preferable on the MT7531BE switch because it has higher bandwidth. The MT7530 driver developers had 3 options: - to modify DSA when the MT7531 switch support was introduced, such as to prefer the better port - to declare both CPU ports in device trees as CPU ports, and live with the sub-optimal performance resulting from not preferring the better port - to declare just port 6 in the device tree as a CPU port Of course they chose the path of least resistance (3rd option), kicking the can down the road. The hardware description in the device tree is supposed to be stable - developers are not supposed to adopt the strategy of piecemeal hardware description, where the device tree is updated in lockstep with the features that the kernel currently supports. Now, as a result of the fact that they did that, any attempts to modify the device tree and describe both CPU ports as CPU ports would make DSA change its default selection from port 6 to 5, effectively resulting in a performance degradation visible to users with the MT7531BE switch as can be seen below. Without preferring port 6: [ ID][Role] Interval Transfer Bitrate Retr [ 5][TX-C] 0.00-20.00 sec 374 MBytes 157 Mbits/sec 734 sender [ 5][TX-C] 0.00-20.00 sec 373 MBytes 156 Mbits/sec receiver [ 7][RX-C] 0.00-20.00 sec 1.81 GBytes 778 Mbits/sec 0 sender [ 7][RX-C] 0.00-20.00 sec 1.81 GBytes 777 Mbits/sec receiver With preferring port 6: [ ID][Role] Interval Transfer Bitrate Retr [ 5][TX-C] 0.00-20.00 sec 1.99 GBytes 856 Mbits/sec 273 sender [ 5][TX-C] 0.00-20.00 sec 1.99 GBytes 855 Mbits/sec receiver [ 7][RX-C] 0.00-20.00 sec 1.72 GBytes 737 Mbits/sec 15 sender [ 7][RX-C] 0.00-20.00 sec 1.71 GBytes 736 Mbits/sec receiver Using one port for WAN and the other ports for LAN is a very popular use case which is what this test emulates. As such, this change proposes that we retroactively modify stable kernels (which don't support the modification of the CPU port assignments, so as to let user space fix the problem and restore the throughput) to keep the mt7530 driver preferring port 6 even with device trees where the hardware is more fully described. Fixes: c288575f7810 ("net: dsa: mt7530: Add the support of MT7531 switch") Signed-off-by: Vladimir Oltean Signed-off-by: Arınç ÜNAL Reviewed-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Arınç ÜNAL Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mt7530.c | 15 +++++++++++++++ include/net/dsa.h | 8 ++++++++ net/dsa/dsa2.c | 24 +++++++++++++++++++++++- 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index b5c36d220f28..05f5f0aa8b11 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -419,6 +419,20 @@ static void mt7530_pll_setup(struct mt7530_priv *priv) core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); } +/* If port 6 is available as a CPU port, always prefer that as the default, + * otherwise don't care. + */ +static struct dsa_port * +mt753x_preferred_default_local_cpu_port(struct dsa_switch *ds) +{ + struct dsa_port *cpu_dp = dsa_to_port(ds, 6); + + if (dsa_port_is_cpu(cpu_dp)) + return cpu_dp; + + return NULL; +} + /* Setup port 6 interface mode and TRGMII TX circuit */ static int mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) @@ -3405,6 +3419,7 @@ static int mt753x_set_mac_eee(struct dsa_switch *ds, int port, static const struct dsa_switch_ops mt7530_switch_ops = { .get_tag_protocol = mtk_get_tag_protocol, .setup = mt753x_setup, + .preferred_default_local_cpu_port = mt753x_preferred_default_local_cpu_port, .get_strings = mt7530_get_strings, .get_ethtool_stats = mt7530_get_ethtool_stats, .get_sset_count = mt7530_get_sset_count, diff --git a/include/net/dsa.h b/include/net/dsa.h index ee369670e20e..f96b61d9768e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -968,6 +968,14 @@ struct dsa_switch_ops { struct phy_device *phy); void (*port_disable)(struct dsa_switch *ds, int port); + /* + * Compatibility between device trees defining multiple CPU ports and + * drivers which are not OK to use by default the numerically smallest + * CPU port of a switch for its local ports. This can return NULL, + * meaning "don't know/don't care". + */ + struct dsa_port *(*preferred_default_local_cpu_port)(struct dsa_switch *ds); + /* * Port's MAC EEE settings */ diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 5417f7b1187c..98f864879175 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -425,6 +425,24 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) return 0; } +static struct dsa_port * +dsa_switch_preferred_default_local_cpu_port(struct dsa_switch *ds) +{ + struct dsa_port *cpu_dp; + + if (!ds->ops->preferred_default_local_cpu_port) + return NULL; + + cpu_dp = ds->ops->preferred_default_local_cpu_port(ds); + if (!cpu_dp) + return NULL; + + if (WARN_ON(!dsa_port_is_cpu(cpu_dp) || cpu_dp->ds != ds)) + return NULL; + + return cpu_dp; +} + /* Perform initial assignment of CPU ports to user ports and DSA links in the * fabric, giving preference to CPU ports local to each switch. Default to * using the first CPU port in the switch tree if the port does not have a CPU @@ -432,12 +450,16 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) */ static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) { - struct dsa_port *cpu_dp, *dp; + struct dsa_port *preferred_cpu_dp, *cpu_dp, *dp; list_for_each_entry(cpu_dp, &dst->ports, list) { if (!dsa_port_is_cpu(cpu_dp)) continue; + preferred_cpu_dp = dsa_switch_preferred_default_local_cpu_port(cpu_dp->ds); + if (preferred_cpu_dp && preferred_cpu_dp != cpu_dp) + continue; + /* Prefer a local CPU port */ dsa_switch_for_each_port(dp, cpu_dp->ds) { /* Prefer the first local CPU port found */ From 41a004ffba9b1fd8a5a7128ebd0dfa3ed39c3316 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Wed, 20 Mar 2024 23:45:30 +0300 Subject: [PATCH 423/553] net: dsa: mt7530: fix improper frames on all 25MHz and 40MHz XTAL MT7530 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5f563c31ff0c40ce395d0bae7daa94c7950dac97 upstream. The MT7530 switch after reset initialises with a core clock frequency that works with a 25MHz XTAL connected to it. For 40MHz XTAL, the core clock frequency must be set to 500MHz. The mt7530_pll_setup() function is responsible of setting the core clock frequency. Currently, it runs on MT7530 with 25MHz and 40MHz XTAL. This causes MT7530 switch with 25MHz XTAL to egress and ingress frames improperly. Introduce a check to run it only on MT7530 with 40MHz XTAL. The core clock frequency is set by writing to a switch PHY's register. Access to the PHY's register is done via the MDIO bus the switch is also on. Therefore, it works only when the switch makes switch PHYs listen on the MDIO bus the switch is on. This is controlled either by the state of the ESW_P1_LED_1 pin after reset deassertion or modifying bit 5 of the modifiable trap register. When ESW_P1_LED_1 is pulled high, PHY indirect access is used. That means accessing PHY registers via the PHY indirect access control register of the switch. When ESW_P1_LED_1 is pulled low, PHY direct access is used. That means accessing PHY registers via the MDIO bus the switch is on. For MT7530 switch with 40MHz XTAL on a board with ESW_P1_LED_1 pulled high, the core clock frequency won't be set to 500MHz, causing the switch to egress and ingress frames improperly. Run mt7530_pll_setup() after PHY direct access is set on the modifiable trap register. With these two changes, all MT7530 switches with 25MHz and 40MHz, and P1_LED_1 pulled high or low, will egress and ingress frames properly. Link: https://github.com/BPI-SINOVOIP/BPI-R2-bsp/blob/4a5dd143f2172ec97a2872fa29c7c4cd520f45b5/linux-mt/drivers/net/ethernet/mediatek/gsw_mt7623.c#L1039 Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Signed-off-by: Arınç ÜNAL Link: https://lore.kernel.org/r/20240320-for-net-mt7530-fix-25mhz-xtal-with-direct-phy-access-v1-1-d92f605f1160@arinc9.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mt7530.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 05f5f0aa8b11..63c738c67273 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2434,8 +2434,6 @@ mt7530_setup(struct dsa_switch *ds) SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST | SYS_CTRL_REG_RST); - mt7530_pll_setup(priv); - /* Lower Tx driving for TRGMII path */ for (i = 0; i < NUM_TRGMII_CTRL; i++) mt7530_write(priv, MT7530_TRGMII_TD_ODT(i), @@ -2453,6 +2451,9 @@ mt7530_setup(struct dsa_switch *ds) priv->p6_interface = PHY_INTERFACE_MODE_NA; + if ((val & HWTRAP_XTAL_MASK) == HWTRAP_XTAL_40MHZ) + mt7530_pll_setup(priv); + mt753x_trap_frames(priv); /* Enable and reset MIB counters */ From 7d51db455ca03e5270cc585a75a674abd063fa6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Mon, 8 Apr 2024 10:08:53 +0300 Subject: [PATCH 424/553] net: dsa: mt7530: fix enabling EEE on MT7531 switch on all boards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 06dfcd4098cfdc4d4577d94793a4f9125386da8b upstream. The commit 40b5d2f15c09 ("net: dsa: mt7530: Add support for EEE features") brought EEE support but did not enable EEE on MT7531 switch MACs. EEE is enabled on MT7531 switch MACs by pulling the LAN2LED0 pin low on the board (bootstrapping), unsetting the EEE_DIS bit on the trap register, or setting the internal EEE switch bit on the CORE_PLL_GROUP4 register. Thanks to SkyLake Huang (黃啟澤) from MediaTek for providing information on the internal EEE switch bit. There are existing boards that were not designed to pull the pin low. Because of that, the EEE status currently depends on the board design. The EEE_DIS bit on the trap pertains to the LAN2LED0 pin which is usually used to control an LED. Once the bit is unset, the pin will be low. That will make the active low LED turn on. The pin is controlled by the switch PHY. It seems that the PHY controls the pin in the way that it inverts the pin state. That means depending on the wiring of the LED connected to LAN2LED0 on the board, the LED may be on without an active link. To not cause this unwanted behaviour whilst enabling EEE on all boards, set the internal EEE switch bit on the CORE_PLL_GROUP4 register. My testing on MT7531 shows a certain amount of traffic loss when EEE is enabled. That said, I haven't come across a board that enables EEE. So enable EEE on the switch MACs but disable EEE advertisement on the switch PHYs. This way, we don't change the behaviour of the majority of the boards that have this switch. The mediatek-ge PHY driver already disables EEE advertisement on the switch PHYs but my testing shows that it is somehow enabled afterwards. Disabling EEE advertisement before the PHY driver initialises keeps it off. With this change, EEE can now be enabled using ethtool. Fixes: 40b5d2f15c09 ("net: dsa: mt7530: Add support for EEE features") Reviewed-by: Florian Fainelli Signed-off-by: Arınç ÜNAL Tested-by: Daniel Golle Reviewed-by: Daniel Golle Link: https://lore.kernel.org/r/20240408-for-net-mt7530-fix-eee-for-mt7531-mt7988-v3-1-84fdef1f008b@arinc9.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mt7530.c | 17 ++++++++++++----- drivers/net/dsa/mt7530.h | 1 + 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 63c738c67273..1aba0cf38630 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2675,18 +2675,25 @@ mt7531_setup(struct dsa_switch *ds) priv->p5_interface = PHY_INTERFACE_MODE_NA; priv->p6_interface = PHY_INTERFACE_MODE_NA; - /* Enable PHY core PLL, since phy_device has not yet been created - * provided for phy_[read,write]_mmd_indirect is called, we provide - * our own mt7531_ind_mmd_phy_[read,write] to complete this - * function. + /* Enable Energy-Efficient Ethernet (EEE) and PHY core PLL, since + * phy_device has not yet been created provided for + * phy_[read,write]_mmd_indirect is called, we provide our own + * mt7531_ind_mmd_phy_[read,write] to complete this function. */ val = mt7531_ind_c45_phy_read(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2, CORE_PLL_GROUP4); - val |= MT7531_PHY_PLL_BYPASS_MODE; + val |= MT7531_RG_SYSPLL_DMY2 | MT7531_PHY_PLL_BYPASS_MODE; val &= ~MT7531_PHY_PLL_OFF; mt7531_ind_c45_phy_write(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2, CORE_PLL_GROUP4, val); + /* Disable EEE advertisement on the switch PHYs. */ + for (i = MT753X_CTRL_PHY_ADDR; + i < MT753X_CTRL_PHY_ADDR + MT7530_NUM_PHYS; i++) { + mt7531_ind_c45_phy_write(priv, i, MDIO_MMD_AN, MDIO_AN_EEE_ADV, + 0); + } + mt7531_setup_common(ds); /* Setup VLAN ID 0 for VLAN-unaware bridges */ diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 206966e46e81..6441e8d7f05d 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -673,6 +673,7 @@ enum mt7531_clk_skew { #define RG_SYSPLL_DDSFBK_EN BIT(12) #define RG_SYSPLL_BIAS_EN BIT(11) #define RG_SYSPLL_BIAS_LPF_EN BIT(10) +#define MT7531_RG_SYSPLL_DMY2 BIT(6) #define MT7531_PHY_PLL_OFF BIT(5) #define MT7531_PHY_PLL_BYPASS_MODE BIT(4) From b80ba648714e6d790d69610cf14656be222d0248 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 11 Apr 2024 23:02:15 +0900 Subject: [PATCH 425/553] ksmbd: fix slab-out-of-bounds in smb2_allocate_rsp_buf commit c119f4ede3fa90a9463f50831761c28f989bfb20 upstream. If ->ProtocolId is SMB2_TRANSFORM_PROTO_NUM, smb2 request size validation could be skipped. if request size is smaller than sizeof(struct smb2_query_info_req), slab-out-of-bounds read can happen in smb2_allocate_rsp_buf(). This patch allocate response buffer after decrypting transform request. smb3_decrypt_req() will validate transform request size and avoid slab-out-of-bound in smb2_allocate_rsp_buf(). Reported-by: Norbert Szetei Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/server.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index 11b201e6ee44..63b01f7d9703 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -167,20 +167,17 @@ static void __handle_ksmbd_work(struct ksmbd_work *work, int rc; bool is_chained = false; - if (conn->ops->allocate_rsp_buf(work)) - return; - if (conn->ops->is_transform_hdr && conn->ops->is_transform_hdr(work->request_buf)) { rc = conn->ops->decrypt_req(work); - if (rc < 0) { - conn->ops->set_rsp_status(work, STATUS_DATA_ERROR); - goto send; - } - + if (rc < 0) + return; work->encrypted = true; } + if (conn->ops->allocate_rsp_buf(work)) + return; + rc = conn->ops->init_rsp_hdr(work); if (rc) { /* either uid or tid is not correct */ From 21ff9d7d223c5c19cb4334009e4c0c83a2f4d674 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 12 Apr 2024 09:45:00 +0900 Subject: [PATCH 426/553] ksmbd: validate request buffer size in smb2_allocate_rsp_buf() commit 17cf0c2794bdb6f39671265aa18aea5c22ee8c4a upstream. The response buffer should be allocated in smb2_allocate_rsp_buf before validating request. But the fields in payload as well as smb2 header is used in smb2_allocate_rsp_buf(). This patch add simple buffer size validation to avoid potencial out-of-bounds in request buffer. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb2pdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index c02b1772cb80..34d88425434a 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -534,6 +534,10 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work) if (cmd == SMB2_QUERY_INFO_HE) { struct smb2_query_info_req *req; + if (get_rfc1002_len(work->request_buf) < + offsetof(struct smb2_query_info_req, OutputBufferLength)) + return -EINVAL; + req = smb2_get_msg(work->request_buf); if ((req->InfoType == SMB2_O_INFO_FILE && (req->FileInfoClass == FILE_FULL_EA_INFORMATION || From 4687606d94deaccce06c64655ee82989acff5512 Mon Sep 17 00:00:00 2001 From: Marios Makassikis Date: Mon, 15 Apr 2024 15:12:48 +0200 Subject: [PATCH 427/553] ksmbd: clear RENAME_NOREPLACE before calling vfs_rename commit 4973b04d3ea577db80c501c5f14e68ec69fe1794 upstream. File overwrite case is explicitly handled, so it is not necessary to pass RENAME_NOREPLACE to vfs_rename. Clearing the flag fixes rename operations when the share is a ntfs-3g mount. The latter uses an older version of fuse with no support for flags in the ->rename op. Cc: stable@vger.kernel.org Signed-off-by: Marios Makassikis Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/vfs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index fe2c80ea2e47..a4c99ec38fac 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -746,10 +746,15 @@ retry: goto out4; } + /* + * explicitly handle file overwrite case, for compatibility with + * filesystems that may not support rename flags (e.g: fuse) + */ if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) { err = -EEXIST; goto out4; } + flags &= ~(RENAME_NOREPLACE); if (old_child == trap) { err = -EINVAL; From 67a877128bbe6a5c3282ad1ed8df5ba2cc93b55a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 19 Apr 2024 23:46:34 +0900 Subject: [PATCH 428/553] ksmbd: common: use struct_group_attr instead of struct_group for network_open_info commit 0268a7cc7fdc47d90b6c18859de7718d5059f6f1 upstream. 4byte padding cause the connection issue with the applications of MacOS. smb2_close response size increases by 4 bytes by padding, And the smb client of MacOS check it and stop the connection. This patch use struct_group_attr instead of struct_group for network_open_info to use __packed to avoid padding. Fixes: 0015eb6e1238 ("smb: client, common: fix fortify warnings") Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/common/smb2pdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index a3936ff53d9d..25383b11d01b 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -699,7 +699,7 @@ struct smb2_close_rsp { __le16 StructureSize; /* 60 */ __le16 Flags; __le32 Reserved; - struct_group(network_open_info, + struct_group_attr(network_open_info, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; From 0f7908a016c092cfdaa16d785fa5099d867bc1a3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 30 Jan 2024 11:02:43 +0100 Subject: [PATCH 429/553] PCI/ASPM: Fix deadlock when enabling ASPM commit 1e560864159d002b453da42bd2c13a1805515a20 upstream. A last minute revert in 6.7-final introduced a potential deadlock when enabling ASPM during probe of Qualcomm PCIe controllers as reported by lockdep: ============================================ WARNING: possible recursive locking detected 6.7.0 #40 Not tainted -------------------------------------------- kworker/u16:5/90 is trying to acquire lock: ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pcie_aspm_pm_state_change+0x58/0xdc but task is already holding lock: ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pci_walk_bus+0x34/0xbc other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(pci_bus_sem); lock(pci_bus_sem); *** DEADLOCK *** Call trace: print_deadlock_bug+0x25c/0x348 __lock_acquire+0x10a4/0x2064 lock_acquire+0x1e8/0x318 down_read+0x60/0x184 pcie_aspm_pm_state_change+0x58/0xdc pci_set_full_power_state+0xa8/0x114 pci_set_power_state+0xc4/0x120 qcom_pcie_enable_aspm+0x1c/0x3c [pcie_qcom] pci_walk_bus+0x64/0xbc qcom_pcie_host_post_init_2_7_0+0x28/0x34 [pcie_qcom] The deadlock can easily be reproduced on machines like the Lenovo ThinkPad X13s by adding a delay to increase the race window during asynchronous probe where another thread can take a write lock. Add a new pci_set_power_state_locked() and associated helper functions that can be called with the PCI bus semaphore held to avoid taking the read lock twice. Link: https://lore.kernel.org/r/ZZu0qx2cmn7IwTyQ@hovoldconsulting.com Link: https://lore.kernel.org/r/20240130100243.11011-1-johan+linaro@kernel.org Fixes: f93e71aea6c6 ("Revert "PCI/ASPM: Remove pcie_aspm_pm_state_change()"") Signed-off-by: Johan Hovold Signed-off-by: Bjorn Helgaas Cc: # 6.7 [bhelgaas: backported to v6.1.y, which contains b9c370b61d73 ("Revert "PCI/ASPM: Remove pcie_aspm_pm_state_change()""), a backport of f93e71aea6c6. This omits the drivers/pci/controller/dwc/pcie-qcom.c hunk that updates qcom_pcie_enable_aspm(), which was added by 9f4f3dfad8cf ("PCI: qcom: Enable ASPM for platforms supporting 1.9.0 ops"), which is not present in v6.1.87.] Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/bus.c | 49 +++++++++++++++++--------- drivers/pci/pci.c | 78 +++++++++++++++++++++++++++-------------- drivers/pci/pci.h | 4 +-- drivers/pci/pcie/aspm.c | 13 ++++--- include/linux/pci.h | 5 +++ 5 files changed, 100 insertions(+), 49 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index feafa378bf8e..aa2fba1c0f56 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -379,21 +379,8 @@ void pci_bus_add_devices(const struct pci_bus *bus) } EXPORT_SYMBOL(pci_bus_add_devices); -/** pci_walk_bus - walk devices on/under bus, calling callback. - * @top bus whose devices should be walked - * @cb callback to be called for each device found - * @userdata arbitrary pointer to be passed to callback. - * - * Walk the given bus, including any bridged devices - * on buses under this bus. Call the provided callback - * on each device found. - * - * We check the return of @cb each time. If it returns anything - * other than 0, we break out. - * - */ -void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), - void *userdata) +static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), + void *userdata, bool locked) { struct pci_dev *dev; struct pci_bus *bus; @@ -401,7 +388,8 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), int retval; bus = top; - down_read(&pci_bus_sem); + if (!locked) + down_read(&pci_bus_sem); next = top->devices.next; for (;;) { if (next == &bus->devices) { @@ -424,10 +412,37 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), if (retval) break; } - up_read(&pci_bus_sem); + if (!locked) + up_read(&pci_bus_sem); +} + +/** + * pci_walk_bus - walk devices on/under bus, calling callback. + * @top: bus whose devices should be walked + * @cb: callback to be called for each device found + * @userdata: arbitrary pointer to be passed to callback + * + * Walk the given bus, including any bridged devices + * on buses under this bus. Call the provided callback + * on each device found. + * + * We check the return of @cb each time. If it returns anything + * other than 0, we break out. + */ +void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata) +{ + __pci_walk_bus(top, cb, userdata, false); } EXPORT_SYMBOL_GPL(pci_walk_bus); +void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata) +{ + lockdep_assert_held(&pci_bus_sem); + + __pci_walk_bus(top, cb, userdata, true); +} +EXPORT_SYMBOL_GPL(pci_walk_bus_locked); + struct pci_bus *pci_bus_get(struct pci_bus *bus) { if (bus) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 5368a37154cf..67956bfebf87 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1258,6 +1258,7 @@ end: /** * pci_set_full_power_state - Put a PCI device into D0 and update its state * @dev: PCI device to power up + * @locked: whether pci_bus_sem is held * * Call pci_power_up() to put @dev into D0, read from its PCI_PM_CTRL register * to confirm the state change, restore its BARs if they might be lost and @@ -1267,7 +1268,7 @@ end: * to D0, it is more efficient to use pci_power_up() directly instead of this * function. */ -static int pci_set_full_power_state(struct pci_dev *dev) +static int pci_set_full_power_state(struct pci_dev *dev, bool locked) { u16 pmcsr; int ret; @@ -1303,7 +1304,7 @@ static int pci_set_full_power_state(struct pci_dev *dev) } if (dev->bus->self) - pcie_aspm_pm_state_change(dev->bus->self); + pcie_aspm_pm_state_change(dev->bus->self, locked); return 0; } @@ -1332,10 +1333,22 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state) pci_walk_bus(bus, __pci_dev_set_current_state, &state); } +static void __pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state, bool locked) +{ + if (!bus) + return; + + if (locked) + pci_walk_bus_locked(bus, __pci_dev_set_current_state, &state); + else + pci_walk_bus(bus, __pci_dev_set_current_state, &state); +} + /** * pci_set_low_power_state - Put a PCI device into a low-power state. * @dev: PCI device to handle. * @state: PCI power state (D1, D2, D3hot) to put the device into. + * @locked: whether pci_bus_sem is held * * Use the device's PCI_PM_CTRL register to put it into a low-power state. * @@ -1346,7 +1359,7 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state) * 0 if device already is in the requested state. * 0 if device's power state has been successfully changed. */ -static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state) +static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state, bool locked) { u16 pmcsr; @@ -1400,29 +1413,12 @@ static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state) pci_power_name(state)); if (dev->bus->self) - pcie_aspm_pm_state_change(dev->bus->self); + pcie_aspm_pm_state_change(dev->bus->self, locked); return 0; } -/** - * pci_set_power_state - Set the power state of a PCI device - * @dev: PCI device to handle. - * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. - * - * Transition a device to a new power state, using the platform firmware and/or - * the device's PCI PM registers. - * - * RETURN VALUE: - * -EINVAL if the requested state is invalid. - * -EIO if device does not support PCI PM or its PM capabilities register has a - * wrong version, or device doesn't support the requested state. - * 0 if the transition is to D1 or D2 but D1 and D2 are not supported. - * 0 if device already is in the requested state. - * 0 if the transition is to D3 but D3 is not supported. - * 0 if device's power state has been successfully changed. - */ -int pci_set_power_state(struct pci_dev *dev, pci_power_t state) +static int __pci_set_power_state(struct pci_dev *dev, pci_power_t state, bool locked) { int error; @@ -1446,7 +1442,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) return 0; if (state == PCI_D0) - return pci_set_full_power_state(dev); + return pci_set_full_power_state(dev, locked); /* * This device is quirked not to be put into D3, so don't put it in @@ -1460,16 +1456,16 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) * To put the device in D3cold, put it into D3hot in the native * way, then put it into D3cold using platform ops. */ - error = pci_set_low_power_state(dev, PCI_D3hot); + error = pci_set_low_power_state(dev, PCI_D3hot, locked); if (pci_platform_power_transition(dev, PCI_D3cold)) return error; /* Powering off a bridge may power off the whole hierarchy */ if (dev->current_state == PCI_D3cold) - pci_bus_set_current_state(dev->subordinate, PCI_D3cold); + __pci_bus_set_current_state(dev->subordinate, PCI_D3cold, locked); } else { - error = pci_set_low_power_state(dev, state); + error = pci_set_low_power_state(dev, state, locked); if (pci_platform_power_transition(dev, state)) return error; @@ -1477,8 +1473,38 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) return 0; } + +/** + * pci_set_power_state - Set the power state of a PCI device + * @dev: PCI device to handle. + * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. + * + * Transition a device to a new power state, using the platform firmware and/or + * the device's PCI PM registers. + * + * RETURN VALUE: + * -EINVAL if the requested state is invalid. + * -EIO if device does not support PCI PM or its PM capabilities register has a + * wrong version, or device doesn't support the requested state. + * 0 if the transition is to D1 or D2 but D1 and D2 are not supported. + * 0 if device already is in the requested state. + * 0 if the transition is to D3 but D3 is not supported. + * 0 if device's power state has been successfully changed. + */ +int pci_set_power_state(struct pci_dev *dev, pci_power_t state) +{ + return __pci_set_power_state(dev, state, false); +} EXPORT_SYMBOL(pci_set_power_state); +int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state) +{ + lockdep_assert_held(&pci_bus_sem); + + return __pci_set_power_state(dev, state, true); +} +EXPORT_SYMBOL(pci_set_power_state_locked); + #define PCI_EXP_SAVE_REGS 7 static struct pci_cap_saved_state *_pci_find_saved_cap(struct pci_dev *pci_dev, diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 9950deeb047a..88576a22fecb 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -556,12 +556,12 @@ bool pcie_wait_for_link(struct pci_dev *pdev, bool active); #ifdef CONFIG_PCIEASPM void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev); -void pcie_aspm_pm_state_change(struct pci_dev *pdev); +void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked); void pcie_aspm_powersave_config_link(struct pci_dev *pdev); #else static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { } -static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { } +static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { } static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { } #endif diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 2a3d973658da..cf4acea6610d 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1055,8 +1055,11 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) up_read(&pci_bus_sem); } -/* @pdev: the root port or switch downstream port */ -void pcie_aspm_pm_state_change(struct pci_dev *pdev) +/* + * @pdev: the root port or switch downstream port + * @locked: whether pci_bus_sem is held + */ +void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { struct pcie_link_state *link = pdev->link_state; @@ -1066,12 +1069,14 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev) * Devices changed PM state, we should recheck if latency * meets all functions' requirement */ - down_read(&pci_bus_sem); + if (!locked) + down_read(&pci_bus_sem); mutex_lock(&aspm_lock); pcie_update_aspm_capable(link->root); pcie_config_aspm_path(link); mutex_unlock(&aspm_lock); - up_read(&pci_bus_sem); + if (!locked) + up_read(&pci_bus_sem); } void pcie_aspm_powersave_config_link(struct pci_dev *pdev) diff --git a/include/linux/pci.h b/include/linux/pci.h index f5d89a4b811f..4da7411da9ba 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1383,6 +1383,7 @@ int pci_load_and_free_saved_state(struct pci_dev *dev, struct pci_saved_state **state); int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state); int pci_set_power_state(struct pci_dev *dev, pci_power_t state); +int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); bool pci_pme_capable(struct pci_dev *dev, pci_power_t state); void pci_pme_active(struct pci_dev *dev, bool enable); @@ -1553,6 +1554,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata); +void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), + void *userdata); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); void pci_setup_bridge(struct pci_bus *bus); @@ -1884,6 +1887,8 @@ static inline int pci_save_state(struct pci_dev *dev) { return 0; } static inline void pci_restore_state(struct pci_dev *dev) { } static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state) { return 0; } +static inline int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state) +{ return 0; } static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable) { return 0; } static inline pci_power_t pci_choose_state(struct pci_dev *dev, From f2295faba5e8249ae4082791bfc1664c88fff83a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 27 Apr 2024 17:07:18 +0200 Subject: [PATCH 430/553] Linux 6.1.88 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240423213853.356988651@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Ron Economos Tested-by: Florian Fainelli Tested-by: Yann Sionneau Tested-by: Mateusz Jończyk Tested-by: kernelci.org bot Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e46a57006a34..c73cb678fb9a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 87 +SUBLEVEL = 88 EXTRAVERSION = NAME = Curry Ramen From 8a5291736e706cc3df06e2427bb7fa087dbfb0f7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Apr 2024 12:21:58 +0200 Subject: [PATCH 431/553] Revert "ASoC: ti: Convert Pandora ASoC to GPIO descriptors" This reverts commit 0f4048e1a0c6e9d3d31ce5b684600fd137cebfca which is commit 319e6ac143b9e9048e527ab9dd2aabb8fdf3d60f upstream. It breaks the 6.1.y build, so needs to be reverted. Cc: Linus Walleij Cc: Jarkko Nikula Cc: Mark Brown Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/pdata-quirks.c | 10 ----- sound/soc/ti/omap3pandora.c | 63 +++++++++++++++++++----------- 2 files changed, 40 insertions(+), 33 deletions(-) diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 44da1e14a374..9deba798cc91 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -257,19 +257,9 @@ static struct platform_device pandora_backlight = { .id = -1, }; -static struct gpiod_lookup_table pandora_soc_audio_gpios = { - .dev_id = "soc-audio", - .table = { - GPIO_LOOKUP("gpio-112-127", 6, "dac", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-0-15", 14, "amp", GPIO_ACTIVE_HIGH), - { } - }, -}; - static void __init omap3_pandora_legacy_init(void) { platform_device_register(&pandora_backlight); - gpiod_add_lookup_table(&pandora_soc_audio_gpios); } #endif /* CONFIG_ARCH_OMAP3 */ diff --git a/sound/soc/ti/omap3pandora.c b/sound/soc/ti/omap3pandora.c index fa92ed97dfe3..a287e9747c2a 100644 --- a/sound/soc/ti/omap3pandora.c +++ b/sound/soc/ti/omap3pandora.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include @@ -21,11 +21,12 @@ #include "omap-mcbsp.h" +#define OMAP3_PANDORA_DAC_POWER_GPIO 118 +#define OMAP3_PANDORA_AMP_POWER_GPIO 14 + #define PREFIX "ASoC omap3pandora: " static struct regulator *omap3pandora_dac_reg; -static struct gpio_desc *dac_power_gpio; -static struct gpio_desc *amp_power_gpio; static int omap3pandora_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) @@ -77,9 +78,9 @@ static int omap3pandora_dac_event(struct snd_soc_dapm_widget *w, return ret; } mdelay(1); - gpiod_set_value(dac_power_gpio, 1); + gpio_set_value(OMAP3_PANDORA_DAC_POWER_GPIO, 1); } else { - gpiod_set_value(dac_power_gpio, 0); + gpio_set_value(OMAP3_PANDORA_DAC_POWER_GPIO, 0); mdelay(1); regulator_disable(omap3pandora_dac_reg); } @@ -91,9 +92,9 @@ static int omap3pandora_hp_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *k, int event) { if (SND_SOC_DAPM_EVENT_ON(event)) - gpiod_set_value(amp_power_gpio, 1); + gpio_set_value(OMAP3_PANDORA_AMP_POWER_GPIO, 1); else - gpiod_set_value(amp_power_gpio, 0); + gpio_set_value(OMAP3_PANDORA_AMP_POWER_GPIO, 0); return 0; } @@ -228,10 +229,35 @@ static int __init omap3pandora_soc_init(void) pr_info("OMAP3 Pandora SoC init\n"); + ret = gpio_request(OMAP3_PANDORA_DAC_POWER_GPIO, "dac_power"); + if (ret) { + pr_err(PREFIX "Failed to get DAC power GPIO\n"); + return ret; + } + + ret = gpio_direction_output(OMAP3_PANDORA_DAC_POWER_GPIO, 0); + if (ret) { + pr_err(PREFIX "Failed to set DAC power GPIO direction\n"); + goto fail0; + } + + ret = gpio_request(OMAP3_PANDORA_AMP_POWER_GPIO, "amp_power"); + if (ret) { + pr_err(PREFIX "Failed to get amp power GPIO\n"); + goto fail0; + } + + ret = gpio_direction_output(OMAP3_PANDORA_AMP_POWER_GPIO, 0); + if (ret) { + pr_err(PREFIX "Failed to set amp power GPIO direction\n"); + goto fail1; + } + omap3pandora_snd_device = platform_device_alloc("soc-audio", -1); if (omap3pandora_snd_device == NULL) { pr_err(PREFIX "Platform device allocation failed\n"); - return -ENOMEM; + ret = -ENOMEM; + goto fail1; } platform_set_drvdata(omap3pandora_snd_device, &snd_soc_card_omap3pandora); @@ -242,20 +268,6 @@ static int __init omap3pandora_soc_init(void) goto fail2; } - dac_power_gpio = devm_gpiod_get(&omap3pandora_snd_device->dev, - "dac", GPIOD_OUT_LOW); - if (IS_ERR(dac_power_gpio)) { - ret = PTR_ERR(dac_power_gpio); - goto fail3; - } - - amp_power_gpio = devm_gpiod_get(&omap3pandora_snd_device->dev, - "amp", GPIOD_OUT_LOW); - if (IS_ERR(amp_power_gpio)) { - ret = PTR_ERR(amp_power_gpio); - goto fail3; - } - omap3pandora_dac_reg = regulator_get(&omap3pandora_snd_device->dev, "vcc"); if (IS_ERR(omap3pandora_dac_reg)) { pr_err(PREFIX "Failed to get DAC regulator from %s: %ld\n", @@ -271,7 +283,10 @@ fail3: platform_device_del(omap3pandora_snd_device); fail2: platform_device_put(omap3pandora_snd_device); - +fail1: + gpio_free(OMAP3_PANDORA_AMP_POWER_GPIO); +fail0: + gpio_free(OMAP3_PANDORA_DAC_POWER_GPIO); return ret; } module_init(omap3pandora_soc_init); @@ -280,6 +295,8 @@ static void __exit omap3pandora_soc_exit(void) { regulator_put(omap3pandora_dac_reg); platform_device_unregister(omap3pandora_snd_device); + gpio_free(OMAP3_PANDORA_AMP_POWER_GPIO); + gpio_free(OMAP3_PANDORA_DAC_POWER_GPIO); } module_exit(omap3pandora_soc_exit); From dcbc050cb0d304c3427d6583384eebcaf0e3caee Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Apr 2024 12:32:48 +0200 Subject: [PATCH 432/553] Linux 6.1.89 Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c73cb678fb9a..a0472e1cf715 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 88 +SUBLEVEL = 89 EXTRAVERSION = NAME = Curry Ramen From e24e1651908bbabb38dbd89d964ae97da700701a Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 19 Apr 2024 12:05:07 -0300 Subject: [PATCH 433/553] smb: client: fix rename(2) regression against samba [ Upstream commit 18d86965e31f9be4d477da0744a7cdc9815858de ] After commit 2c7d399e551c ("smb: client: reuse file lease key in compound operations") the client started reusing lease keys for rename, unlink and set path size operations to prevent it from breaking its own leases and thus causing unnecessary lease breaks to same connection. The implementation relies on positive dentries and cifsInodeInfo::lease_granted to decide whether reusing lease keys for the compound requests. cifsInodeInfo::lease_granted was introduced by commit 0ab95c2510b6 ("Defer close only when lease is enabled.") to indicate whether lease caching is granted for a specific file, but that can only happen until file is open, so cifsInodeInfo::lease_granted was left uninitialised in ->alloc_inode and then client started sending random lease keys for files that hadn't any leases. This fixes the following test case against samba: mount.cifs //srv/share /mnt/1 -o ...,nosharesock mount.cifs //srv/share /mnt/2 -o ...,nosharesock touch /mnt/1/foo; tail -f /mnt/1/foo & pid=$! mv /mnt/2/foo /mnt/2/bar # fails with -EIO kill $pid Fixes: 0ab95c2510b6 ("Defer close only when lease is enabled.") Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cifsfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 0a79771c8f33..f0a3336ffb6c 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -387,6 +387,7 @@ cifs_alloc_inode(struct super_block *sb) * server, can not assume caching of file data or metadata. */ cifs_set_oplock_level(cifs_inode, 0); + cifs_inode->lease_granted = false; cifs_inode->flags = 0; spin_lock_init(&cifs_inode->writers_lock); cifs_inode->writers = 0; From 4f83ca4c7aa60447565108cd09a400bb3b4898c4 Mon Sep 17 00:00:00 2001 From: Takayuki Nagata Date: Mon, 15 Apr 2024 16:47:49 +0900 Subject: [PATCH 434/553] cifs: reinstate original behavior again for forceuid/forcegid [ Upstream commit 77d8aa79ecfb209308e0644c02f655122b31def7 ] forceuid/forcegid should be enabled by default when uid=/gid= options are specified, but commit 24e0a1eff9e2 ("cifs: switch to new mount api") changed the behavior. Due to the change, a mounted share does not show intentional uid/gid for files and directories even though uid=/gid= options are specified since forceuid/forcegid are not enabled. This patch reinstates original behavior that overrides uid/gid with specified uid/gid by the options. Fixes: 24e0a1eff9e2 ("cifs: switch to new mount api") Signed-off-by: Takayuki Nagata Acked-by: Paulo Alcantara (Red Hat) Acked-by: Ronnie Sahlberg Acked-by: Tom Talpey Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/fs_context.c | 12 ++++++++++++ fs/smb/client/fs_context.h | 2 ++ 2 files changed, 14 insertions(+) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 4d5302b58b53..ca39d01077cd 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -676,6 +676,16 @@ static int smb3_fs_context_validate(struct fs_context *fc) /* set the port that we got earlier */ cifs_set_port((struct sockaddr *)&ctx->dstaddr, ctx->port); + if (ctx->uid_specified && !ctx->forceuid_specified) { + ctx->override_uid = 1; + pr_notice("enabling forceuid mount option implicitly because uid= option is specified\n"); + } + + if (ctx->gid_specified && !ctx->forcegid_specified) { + ctx->override_gid = 1; + pr_notice("enabling forcegid mount option implicitly because gid= option is specified\n"); + } + if (ctx->override_uid && !ctx->uid_specified) { ctx->override_uid = 0; pr_notice("ignoring forceuid mount option specified with no uid= option\n"); @@ -923,12 +933,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->override_uid = 0; else ctx->override_uid = 1; + ctx->forceuid_specified = true; break; case Opt_forcegid: if (result.negated) ctx->override_gid = 0; else ctx->override_gid = 1; + ctx->forcegid_specified = true; break; case Opt_perm: if (result.negated) diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 26093f54d3e6..319a91b7f670 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -154,6 +154,8 @@ enum cifs_param { }; struct smb3_fs_context { + bool forceuid_specified; + bool forcegid_specified; bool uid_specified; bool cruid_specified; bool gid_specified; From 6b0ac25f367fb177f557cf684357e2233138fb68 Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Wed, 6 Mar 2024 00:44:04 +0000 Subject: [PATCH 435/553] HID: intel-ish-hid: ipc: Fix dev_err usage with uninitialized dev->devc [ Upstream commit 92826905ae340b7f2b25759a06c8c60bfc476b9f ] The variable dev->devc in ish_dev_init was utilized by dev_err before it was properly assigned. To rectify this, the assignment of dev->devc has been moved to immediately follow memory allocation. Without this change "(NULL device *)" is printed for device information. Fixes: 8ae2f2b0a284 ("HID: intel-ish-hid: ipc: Fix potential use-after-free in work function") Fixes: ae02e5d40d5f ("HID: intel-ish-hid: ipc layer") Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/intel-ish-hid/ipc/ipc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c index a49c6affd7c4..dd5fc60874ba 100644 --- a/drivers/hid/intel-ish-hid/ipc/ipc.c +++ b/drivers/hid/intel-ish-hid/ipc/ipc.c @@ -948,6 +948,7 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev) if (!dev) return NULL; + dev->devc = &pdev->dev; ishtp_device_init(dev); init_waitqueue_head(&dev->wait_hw_ready); @@ -983,7 +984,6 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev) } dev->ops = &ish_hw_ops; - dev->devc = &pdev->dev; dev->mtu = IPC_PAYLOAD_SIZE - sizeof(struct ishtp_msg_hdr); return dev; } From 526facda6194c481cb10c80523cae50b7dbf018e Mon Sep 17 00:00:00 2001 From: Yaraslau Furman Date: Wed, 3 Apr 2024 19:54:24 +0300 Subject: [PATCH 436/553] HID: logitech-dj: allow mice to use all types of reports [ Upstream commit 21f28a7eb78dea6c59be6b0a5e0b47bf3d25fcbb ] You can bind whatever action you want to the mouse's reprogrammable buttons using Windows application. Allow Linux to receive multimedia keycodes. Fixes: 3ed224e273ac ("HID: logitech-dj: Fix 064d:c52f receiver support") Signed-off-by: Yaraslau Furman Reviewed-by: Hans de Goede Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-logitech-dj.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 08768e5acced..57697605b2e2 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -965,9 +965,7 @@ static void logi_hidpp_dev_conn_notif_equad(struct hid_device *hdev, } break; case REPORT_TYPE_MOUSE: - workitem->reports_supported |= STD_MOUSE | HIDPP; - if (djrcv_dev->type == recvr_type_mouse_only) - workitem->reports_supported |= MULTIMEDIA; + workitem->reports_supported |= STD_MOUSE | HIDPP | MULTIMEDIA; break; } } From 9ab1d84bdb12a52387810f1554ba66c0f0cbd25d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Thu, 14 Mar 2024 15:24:35 +0300 Subject: [PATCH 437/553] arm64: dts: rockchip: set PHY address of MT7531 switch to 0x1f MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a2ac2a1b02590a22a236c43c455f421cdede45f5 ] The MT7531 switch listens on PHY address 0x1f on an MDIO bus. I've got two findings that support this. There's no bootstrapping option to change the PHY address of the switch. The Linux driver hardcodes 0x1f as the PHY address of the switch. So the reg property on the device tree is currently ignored by the Linux driver. Therefore, describe the correct PHY address on Banana Pi BPI-R2 Pro that has this switch. Signed-off-by: Arınç ÜNAL Fixes: c1804463e5c6 ("arm64: dts: rockchip: Add mt7531 dsa node to BPI-R2-Pro board") Link: https://lore.kernel.org/r/20240314-for-rockchip-mt7531-phy-address-v1-1-743b5873358f@arinc9.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index 26d7fda275ed..7952a1431436 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -521,9 +521,9 @@ #address-cells = <1>; #size-cells = <0>; - switch@0 { + switch@1f { compatible = "mediatek,mt7531"; - reg = <0>; + reg = <0x1f>; ports { #address-cells = <1>; From cb5b05e61968247a9ef0c71b306c888d3b2993e9 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Fri, 8 Mar 2024 16:46:07 +0100 Subject: [PATCH 438/553] arm64: dts: rockchip: enable internal pull-up on Q7_USB_ID for RK3399 Puma [ Upstream commit e6b1168f37e3f86d9966276c5a3fff9eb0df3e5f ] The Q7_USB_ID has a diode used as a level-shifter, and is used as an input pin. The SoC default for this pin is a pull-up, which is correct but the pinconf in the introducing commit missed that, so let's fix this oversight. Fixes: ed2c66a95c0c ("arm64: dts: rockchip: fix rk3399-puma-haikou USB OTG mode") Signed-off-by: Quentin Schulz Link: https://lore.kernel.org/r/20240308-puma-diode-pu-v2-1-309f83da110a@theobroma-systems.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index aa3e21bd6c8f..fee2cc035613 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -443,7 +443,7 @@ usb3 { usb3_id: usb3-id { rockchip,pins = - <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>; + <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_up>; }; }; }; From aa1af71deedaf08fd400aed2f38a8c57557ea334 Mon Sep 17 00:00:00 2001 From: Iskander Amara Date: Fri, 8 Mar 2024 09:52:43 +0100 Subject: [PATCH 439/553] arm64: dts: rockchip: fix alphabetical ordering RK3399 puma [ Upstream commit f0abb4b2c7acf3c3e4130dc3f54cd90cf2ae62bc ] Nodes overridden by their reference should be ordered alphabetically to make it easier to read the DTS. pinctrl node is defined in the wrong location so let's reorder it. Signed-off-by: Iskander Amara Reviewed-by: Quentin Schulz Link: https://lore.kernel.org/r/20240308085243.69903-2-iskander.amara@theobroma-systems.com Signed-off-by: Heiko Stuebner Stable-dep-of: 945a7c857091 ("arm64: dts: rockchip: enable internal pull-up on PCIE_WAKE# for RK3399 Puma") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index fee2cc035613..a060419bca90 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -401,15 +401,6 @@ gpio1830-supply = <&vcc_1v8>; }; -&pmu_io_domains { - status = "okay"; - pmu1830-supply = <&vcc_1v8>; -}; - -&pwm2 { - status = "okay"; -}; - &pinctrl { i2c8 { i2c8_xfer_a: i2c8-xfer { @@ -448,6 +439,15 @@ }; }; +&pmu_io_domains { + status = "okay"; + pmu1830-supply = <&vcc_1v8>; +}; + +&pwm2 { + status = "okay"; +}; + &sdhci { /* * Signal integrity isn't great at 200MHz but 100MHz has proven stable From 076ff06a1e3a9d9b4f1ab32921e2af73357c045a Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Fri, 8 Mar 2024 16:46:08 +0100 Subject: [PATCH 440/553] arm64: dts: rockchip: enable internal pull-up on PCIE_WAKE# for RK3399 Puma [ Upstream commit 945a7c8570916650a415757d15d83e0fa856a686 ] The PCIE_WAKE# has a diode used as a level-shifter, and is used as an input pin. While the SoC default is to enable the pull-up, the core rk3399 pinconf for this pin opted for pull-none. So as to not disturb the behaviour of other boards which may rely on pull-none instead of pull-up, set the needed pull-up only for RK3399 Puma. Fixes: 60fd9f72ce8a ("arm64: dts: rockchip: add Haikou baseboard with RK3399-Q7 SoM") Signed-off-by: Quentin Schulz Link: https://lore.kernel.org/r/20240308-puma-diode-pu-v2-2-309f83da110a@theobroma-systems.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index a060419bca90..a77f922107c2 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -401,6 +401,11 @@ gpio1830-supply = <&vcc_1v8>; }; +&pcie_clkreqn_cpm { + rockchip,pins = + <2 RK_PD2 RK_FUNC_GPIO &pcfg_pull_up>; +}; + &pinctrl { i2c8 { i2c8_xfer_a: i2c8-xfer { From 475816446f6037a10f50ae79af737783e0bc9a8c Mon Sep 17 00:00:00 2001 From: Dragan Simic Date: Mon, 1 Apr 2024 00:20:56 +0200 Subject: [PATCH 441/553] arm64: dts: rockchip: Remove unsupported node from the Pinebook Pro dts [ Upstream commit 43853e843aa6c3d47ff2b0cce898318839483d05 ] Remove a redundant node from the Pine64 Pinebook Pro dts, which is intended to provide a value for the delay in PCI Express enumeration, but that isn't supported without additional out-of-tree kernel patches. There were already efforts to upstream those kernel patches, because they reportedly make some PCI Express cards (such as LSI SAS HBAs) usable in Pine64 RockPro64 (which is also based on the RK3399); otherwise, those PCI Express cards fail to enumerate. However, providing the required background and explanations proved to be a tough nut to crack, which is the reason why those patches remain outside of the kernel mainline for now. If those out-of-tree patches eventually become upstreamed, the resulting device-tree changes will almost surely belong to the RK3399 SoC dtsi. Also, the above-mentioned unusable-without-out-of-tree-patches PCI Express devices are in all fairness not usable in a Pinebook Pro without some extensive hardware modifications, which is another reason to delete this redundant node. When it comes to the Pinebook Pro, only M.2 NVMe SSDs can be installed out of the box (using an additional passive adapter PCB sold separately by Pine64), which reportedly works fine with no additional patches. Fixes: 5a65505a6988 ("arm64: dts: rockchip: Add initial support for Pinebook Pro") Signed-off-by: Dragan Simic Link: https://lore.kernel.org/r/0f82c3f97cb798d012270d13b34d8d15305ef293.1711923520.git.dsimic@manjaro.org Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts index 194e48c755f6..a51e8d0493ca 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts @@ -789,7 +789,6 @@ }; &pcie0 { - bus-scan-delay-ms = <1000>; ep-gpios = <&gpio2 RK_PD4 GPIO_ACTIVE_HIGH>; num-lanes = <4>; pinctrl-names = "default"; From 2f83d4763ac969b40c19adec23fbe610ec5a751d Mon Sep 17 00:00:00 2001 From: Ikjoon Jang Date: Fri, 23 Feb 2024 17:11:21 +0800 Subject: [PATCH 442/553] arm64: dts: mediatek: mt8183: Add power-domains properity to mfgcfg [ Upstream commit 1781f2c461804c0123f59afc7350e520a88edffb ] mfgcfg clock is under MFG_ASYNC power domain. Fixes: e526c9bc11f8 ("arm64: dts: Add Mediatek SoC MT8183 and evaluation board dts and Makefile") Fixes: 37fb78b9aeb7 ("arm64: dts: mediatek: Add mt8183 power domains controller") Signed-off-by: Weiyi Lu Signed-off-by: Ikjoon Jang Reviewed-by: Enric Balletbo i Serra Signed-off-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20240223091122.2430037-1-wenst@chromium.org Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8183.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index d5d9b954c449..2147e152683b 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -1554,6 +1554,7 @@ compatible = "mediatek,mt8183-mfgcfg", "syscon"; reg = <0 0x13000000 0 0x1000>; #clock-cells = <1>; + power-domains = <&spm MT8183_POWER_DOMAIN_MFG_ASYNC>; }; gpu: gpu@13040000 { From 18548e2ab95449e1a34e20e658c2457f433aa0e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Thu, 29 Feb 2024 14:44:28 -0500 Subject: [PATCH 443/553] arm64: dts: mediatek: mt8192: Add missing gce-client-reg to mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 00bcc8810d9dd69d3899a4189e2f3964f263a600 ] Add the missing mediatek,gce-client-reg property to the mutex node to allow it to use the GCE. This prevents the "can't parse gce-client-reg property" error from being printed and should result in better performance. Fixes: b4b75bac952b ("arm64: dts: mt8192: Add display nodes") Suggested-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240229-gce-client-reg-add-missing-mt8192-95-v1-1-b12c233a8a33@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8192.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8192.dtsi b/arch/arm64/boot/dts/mediatek/mt8192.dtsi index 4ed8a0f18758..7ecba8c7262d 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192.dtsi @@ -1240,6 +1240,7 @@ reg = <0 0x14001000 0 0x1000>; interrupts = ; clocks = <&mmsys CLK_MM_DISP_MUTEX0>; + mediatek,gce-client-reg = <&gce SUBSYS_1400XXXX 0x1000 0x1000>; mediatek,gce-events = , ; power-domains = <&spm MT8192_POWER_DOMAIN_DISP>; From 5bcfc5337334f44518b1d24b0d0bd155558d15a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Thu, 29 Feb 2024 14:44:29 -0500 Subject: [PATCH 444/553] arm64: dts: mediatek: mt8195: Add missing gce-client-reg to vpp/vdosys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 96b0c1528ef41fe754f5d1378b1db6c098a2e33f ] Add the missing mediatek,gce-client-reg property to the vppsys and vdosys nodes to allow them to use the GCE. This prevents the "can't parse gce-client-reg property" error from being printed and should result in better performance. Fixes: 6aa5b46d1755 ("arm64: dts: mt8195: Add vdosys and vppsys clock nodes") Suggested-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20240229-gce-client-reg-add-missing-mt8192-95-v1-2-b12c233a8a33@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 414cbe345127..8f33b3226435 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -1492,6 +1492,7 @@ compatible = "mediatek,mt8195-vppsys0"; reg = <0 0x14000000 0 0x1000>; #clock-cells = <1>; + mediatek,gce-client-reg = <&gce1 SUBSYS_1400XXXX 0 0x1000>; }; smi_sub_common_vpp0_vpp1_2x1: smi@14010000 { @@ -1597,6 +1598,7 @@ compatible = "mediatek,mt8195-vppsys1"; reg = <0 0x14f00000 0 0x1000>; #clock-cells = <1>; + mediatek,gce-client-reg = <&gce1 SUBSYS_14f0XXXX 0 0x1000>; }; larb5: larb@14f02000 { @@ -1982,6 +1984,7 @@ reg = <0 0x1c01a000 0 0x1000>; mboxes = <&gce0 0 CMDQ_THR_PRIO_4>; #clock-cells = <1>; + mediatek,gce-client-reg = <&gce0 SUBSYS_1c01XXXX 0xa000 0x1000>; }; larb20: larb@1b010000 { From e8ac4490db15ca97349f64d0241541cb98ef4870 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Thu, 29 Feb 2024 14:44:30 -0500 Subject: [PATCH 445/553] arm64: dts: mediatek: mt8195: Add missing gce-client-reg to mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3b129949184a1251e6a42db714f6d68b75fabedd ] Add the missing mediatek,gce-client-reg property to the mutex node to allow it to use the GCE. This prevents the "can't parse gce-client-reg property" error from being printed and should result in better performance. Fixes: b852ee68fd72 ("arm64: dts: mt8195: Add display node for vdosys0") Suggested-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20240229-gce-client-reg-add-missing-mt8192-95-v1-3-b12c233a8a33@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 8f33b3226435..bdf002e9cece 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -2088,6 +2088,7 @@ interrupts = ; power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS0>; clocks = <&vdosys0 CLK_VDO0_DISP_MUTEX0>; + mediatek,gce-client-reg = <&gce0 SUBSYS_1c01XXXX 0x6000 0x1000>; mediatek,gce-events = ; }; From 942debbea563c4ccfeeeed21a304c9b84b84090d Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Fri, 15 Mar 2024 19:16:02 +0800 Subject: [PATCH 446/553] arm64: dts: mediatek: mt8192-asurada: Update min voltage constraint for MT6315 [ Upstream commit 374a7c6400e314458178255a63c37d6347845092 ] Update the minimum voltage from 300000 uV to 400000 uV so it matches the MT6315 datasheet. Also update the minimum voltage for Vgpu regulator from 606250 uV to 400000 uV because the requested voltage could be lower than the minimum voltage on the GPU OPP table when the MTK Smart Voltage Scaling (SVS) driver is enabled. Fixes: 3183cb62b033 ("arm64: dts: mediatek: asurada: Add SPMI regulators") Signed-off-by: Pin-yen Lin Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240315111621.2263159-2-treapking@chromium.org Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi index c6080af1e4a3..0814ed6a7272 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi @@ -903,7 +903,7 @@ mt6315_6_vbuck1: vbuck1 { regulator-compatible = "vbuck1"; regulator-name = "Vbcpu"; - regulator-min-microvolt = <300000>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; regulator-allowed-modes = <0 1 2>; @@ -913,7 +913,7 @@ mt6315_6_vbuck3: vbuck3 { regulator-compatible = "vbuck3"; regulator-name = "Vlcpu"; - regulator-min-microvolt = <300000>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; regulator-allowed-modes = <0 1 2>; @@ -930,7 +930,7 @@ mt6315_7_vbuck1: vbuck1 { regulator-compatible = "vbuck1"; regulator-name = "Vgpu"; - regulator-min-microvolt = <606250>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <800000>; regulator-enable-ramp-delay = <256>; regulator-allowed-modes = <0 1 2>; From 818f56a8b32fd734c5ab36bdc0a8985e939060f8 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Fri, 15 Mar 2024 19:16:03 +0800 Subject: [PATCH 447/553] arm64: dts: mediatek: mt8195-cherry: Update min voltage constraint for MT6315 [ Upstream commit e9a6b8b5c61350535c7eb5ea9b2dde0d5745bd1b ] Update the minimum voltage from 300000 uV to 400000 uV so it matches the MT6315 datasheet. Also update the minimum voltage for Vgpu regulator from 625000 uV to 400000 uV because the requested voltage could be lower than the minimum voltage on the GPU OPP table when the MTK Smart Voltage Scaling (SVS) driver is enabled. Fixes: 260c04d425eb ("arm64: dts: mediatek: cherry: Enable MT6315 regulators on SPMI bus") Signed-off-by: Pin-yen Lin Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240315111621.2263159-3-treapking@chromium.org Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi index 4b8a1c462906..9180a73db066 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi @@ -845,7 +845,7 @@ mt6315_6_vbuck1: vbuck1 { regulator-compatible = "vbuck1"; regulator-name = "Vbcpu"; - regulator-min-microvolt = <300000>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; regulator-ramp-delay = <6250>; @@ -863,7 +863,7 @@ mt6315_7_vbuck1: vbuck1 { regulator-compatible = "vbuck1"; regulator-name = "Vgpu"; - regulator-min-microvolt = <625000>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; regulator-ramp-delay = <6250>; From ce782b5a748531abd91bfc82333c872446298124 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Mar 2024 23:10:47 +0100 Subject: [PATCH 448/553] arm64: dts: mediatek: mt7622: fix clock controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3ba5a61594347ab46e7c2cff6cd63ea0f1282efb ] 1. Drop unneeded "syscon"s (bindings were updated recently) 2. Use "clock-controller" in nodenames 3. Add missing "#clock-cells" Fixes: d7167881e03e ("arm64: dts: mt7622: add clock controller device nodes") Fixes: e9b65ecb7c30 ("arm64: dts: mediatek: mt7622: introduce nodes for Wireless Ethernet Dispatch") Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240317221050.18595-2-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 27 +++++++++++------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 7bb316922a3a..c1747483350e 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -282,16 +282,14 @@ }; }; - apmixedsys: apmixedsys@10209000 { - compatible = "mediatek,mt7622-apmixedsys", - "syscon"; + apmixedsys: clock-controller@10209000 { + compatible = "mediatek,mt7622-apmixedsys"; reg = <0 0x10209000 0 0x1000>; #clock-cells = <1>; }; - topckgen: topckgen@10210000 { - compatible = "mediatek,mt7622-topckgen", - "syscon"; + topckgen: clock-controller@10210000 { + compatible = "mediatek,mt7622-topckgen"; reg = <0 0x10210000 0 0x1000>; #clock-cells = <1>; }; @@ -734,9 +732,8 @@ power-domains = <&scpsys MT7622_POWER_DOMAIN_WB>; }; - ssusbsys: ssusbsys@1a000000 { - compatible = "mediatek,mt7622-ssusbsys", - "syscon"; + ssusbsys: clock-controller@1a000000 { + compatible = "mediatek,mt7622-ssusbsys"; reg = <0 0x1a000000 0 0x1000>; #clock-cells = <1>; #reset-cells = <1>; @@ -793,9 +790,8 @@ }; }; - pciesys: pciesys@1a100800 { - compatible = "mediatek,mt7622-pciesys", - "syscon"; + pciesys: clock-controller@1a100800 { + compatible = "mediatek,mt7622-pciesys"; reg = <0 0x1a100800 0 0x1000>; #clock-cells = <1>; #reset-cells = <1>; @@ -921,12 +917,13 @@ }; }; - hifsys: syscon@1af00000 { - compatible = "mediatek,mt7622-hifsys", "syscon"; + hifsys: clock-controller@1af00000 { + compatible = "mediatek,mt7622-hifsys"; reg = <0 0x1af00000 0 0x70>; + #clock-cells = <1>; }; - ethsys: syscon@1b000000 { + ethsys: clock-controller@1b000000 { compatible = "mediatek,mt7622-ethsys", "syscon"; reg = <0 0x1b000000 0 0x1000>; From da3c0740f0aa774c84ac985ed180f76771d0df15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Mar 2024 23:10:48 +0100 Subject: [PATCH 449/553] arm64: dts: mediatek: mt7622: fix IR nodename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 800dc93c3941e372c94278bf4059e6e82f60bd66 ] Fix following validation error: arch/arm64/boot/dts/mediatek/mt7622-rfb1.dtb: cir@10009000: $nodename:0: 'cir@10009000' does not match '^ir(-receiver)?(@[a-f0-9]+)?$' from schema $id: http://devicetree.org/schemas/media/mediatek,mt7622-cir.yaml# Fixes: ae457b7679c4 ("arm64: dts: mt7622: add SoC and peripheral related device nodes") Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240317221050.18595-3-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index c1747483350e..87f692a041a2 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -251,7 +251,7 @@ clock-names = "hif_sel"; }; - cir: cir@10009000 { + cir: ir-receiver@10009000 { compatible = "mediatek,mt7622-cir"; reg = <0 0x10009000 0 0x1000>; interrupts = ; From d078de867493c5ad05a3189c317f727299d9d69f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Mar 2024 23:10:49 +0100 Subject: [PATCH 450/553] arm64: dts: mediatek: mt7622: fix ethernet controller "compatible" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 208add29ce5b7291f6c466e4dfd9cbf61c72888e ] Fix following validation error: arch/arm64/boot/dts/mediatek/mt7622-rfb1.dtb: ethernet@1b100000: compatible: ['mediatek,mt7622-eth', 'mediatek,mt2701-eth', 'syscon'] is too long from schema $id: http://devicetree.org/schemas/net/mediatek,net.yaml# (and other complains about wrong clocks). Fixes: 5f599b3a0bb8 ("arm64: dts: mt7622: add ethernet device nodes") Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240317221050.18595-4-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 87f692a041a2..5b7be71afa5c 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -963,9 +963,7 @@ }; eth: ethernet@1b100000 { - compatible = "mediatek,mt7622-eth", - "mediatek,mt2701-eth", - "syscon"; + compatible = "mediatek,mt7622-eth"; reg = <0 0x1b100000 0 0x20000>; interrupts = , , From 1aea205a4226c8ef60bddb805cbf9a347b1111f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Mar 2024 23:10:50 +0100 Subject: [PATCH 451/553] arm64: dts: mediatek: mt7622: drop "reset-names" from thermal block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ecb5b0034f5bcc35003b4b965cf50c6e98316e79 ] Binding doesn't specify "reset-names" property and Linux driver also doesn't use it. Fix following validation error: arch/arm64/boot/dts/mediatek/mt7622-rfb1.dtb: thermal@1100b000: Unevaluated properties are not allowed ('reset-names' was unexpected) from schema $id: http://devicetree.org/schemas/thermal/mediatek,thermal.yaml# Fixes: ae457b7679c4 ("arm64: dts: mt7622: add SoC and peripheral related device nodes") Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240317221050.18595-5-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 5b7be71afa5c..f8a32006885b 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -512,7 +512,6 @@ <&pericfg CLK_PERI_AUXADC_PD>; clock-names = "therm", "auxadc"; resets = <&pericfg MT7622_PERI_THERM_SW_RST>; - reset-names = "therm"; mediatek,auxadc = <&auxadc>; mediatek,apmixedsys = <&apmixedsys>; nvmem-cells = <&thermal_calibration>; From af45b5bc30f0dc2caf0a3d6a7e40f8da288d0fa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 1 Mar 2024 08:47:41 +0100 Subject: [PATCH 452/553] arm64: dts: mediatek: mt2712: fix validation errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3baac7291effb501c4d52df7019ebf52011e5772 ] 1. Fixup infracfg clock controller binding It also acts as reset controller so #reset-cells is required. 2. Use -pins suffix for pinctrl This fixes: arch/arm64/boot/dts/mediatek/mt2712-evb.dtb: syscon@10001000: '#reset-cells' is a required property from schema $id: http://devicetree.org/schemas/arm/mediatek/mediatek,infracfg.yaml# arch/arm64/boot/dts/mediatek/mt2712-evb.dtb: pinctrl@1000b000: 'eth_default', 'eth_sleep', 'usb0_iddig', 'usb1_iddig' do not match any of the regexes: 'pinctrl-[0-9]+', 'pins$' from schema $id: http://devicetree.org/schemas/pinctrl/mediatek,mt65xx-pinctrl.yaml# Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240301074741.8362-1-zajec5@gmail.com [Angelo: Added Fixes tags] Fixes: 5d4839709c8e ("arm64: dts: mt2712: Add clock controller device nodes") Fixes: 1724f4cc5133 ("arm64: dts: Add USB3 related nodes for MT2712") Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt2712-evb.dts | 8 ++++---- arch/arm64/boot/dts/mediatek/mt2712e.dtsi | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts index d31a194124c9..03fd9df16999 100644 --- a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts @@ -128,7 +128,7 @@ }; &pio { - eth_default: eth_default { + eth_default: eth-default-pins { tx_pins { pinmux = , , @@ -155,7 +155,7 @@ }; }; - eth_sleep: eth_sleep { + eth_sleep: eth-sleep-pins { tx_pins { pinmux = , , @@ -181,14 +181,14 @@ }; }; - usb0_id_pins_float: usb0_iddig { + usb0_id_pins_float: usb0-iddig-pins { pins_iddig { pinmux = ; bias-pull-up; }; }; - usb1_id_pins_float: usb1_iddig { + usb1_id_pins_float: usb1-iddig-pins { pins_iddig { pinmux = ; bias-pull-up; diff --git a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi index 1ac0b2cf3d40..fde2b165f55d 100644 --- a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi @@ -249,10 +249,11 @@ #clock-cells = <1>; }; - infracfg: syscon@10001000 { + infracfg: clock-controller@10001000 { compatible = "mediatek,mt2712-infracfg", "syscon"; reg = <0 0x10001000 0 0x1000>; #clock-cells = <1>; + #reset-cells = <1>; }; pericfg: syscon@10003000 { From 0277e73e8ea50e04888ad1b455ea34e3aee773c2 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Tue, 5 Mar 2024 15:32:18 +0100 Subject: [PATCH 453/553] arm64: dts: rockchip: regulator for sd needs to be always on for BPI-R2Pro [ Upstream commit 433d54818f64a2fe0562f8c04c7a81f562368515 ] With default dts configuration for BPI-R2Pro, the regulator for sd card is powered off when reboot is commanded, and the only solution to detect the sd card again, and therefore, allow rebooting from there, is to do a hardware reset. Configure the regulator for sd to be always on for BPI-R2Pro in order to avoid this issue. Fixes: f901aaadaa2a ("arm64: dts: rockchip: Add Bananapi R2 Pro") Signed-off-by: Jose Ignacio Tornos Martinez Link: https://lore.kernel.org/r/20240305143222.189413-1-jtornosm@redhat.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index 7952a1431436..856fe4b66a0b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -412,6 +412,8 @@ vccio_sd: LDO_REG5 { regulator-name = "vccio_sd"; + regulator-always-on; + regulator-boot-on; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; From bab058e31a92f489557e3a00e16d34853ed38ab4 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Fri, 29 Mar 2024 10:36:50 +0000 Subject: [PATCH 454/553] ARC: [plat-hsdk]: Remove misplaced interrupt-cells property [ Upstream commit 61231eb8113ce47991f35024f9c20810b37996bf ] "gmac" node stands for just an ordinary Ethernet controller, which is by no means a provider of interrupts, i.e. it doesn't serve as an interrupt controller, thus "#interrupt-cells" property doesn't belong to it and so we remove it. Fixes: ------------>8------------ DTC arch/arc/boot/dts/hsdk.dtb arch/arc/boot/dts/hsdk.dts:207.23-235.5: Warning (interrupt_provider): /soc/ethernet@8000: '#interrupt-cells' found, but node is not an interrupt provider arch/arc/boot/dts/hsdk.dtb: Warning (interrupt_map): Failed prerequisite 'interrupt_provider' ------------>8------------ Reported-by: Vineet Gupta Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/boot/dts/hsdk.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 6691f4255077..41b980df862b 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -205,7 +205,6 @@ }; gmac: ethernet@8000 { - #interrupt-cells = <1>; compatible = "snps,dwmac"; reg = <0x8000 0x2000>; interrupts = <10>; From d20e3beb83da0f3a09239a833f85bccecfe30052 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Mon, 15 Apr 2024 11:54:43 +0300 Subject: [PATCH 455/553] wifi: iwlwifi: mvm: remove old PASN station when adding a new one [ Upstream commit dbfff5bf9292714f02ace002fea8ce6599ea1145 ] If a PASN station is added, and an old PASN station already exists for the same mac address, remove the old station before adding the new one. Keeping the old station caueses old security context to be used in measurements. Fixes: 0739a7d70e00 ("iwlwifi: mvm: initiator: add option for adding a PASN responder") Signed-off-by: Avraham Stern Signed-off-by: Miri Korenblit Link: https://msgid.link/20240415114847.ef3544a416f2.I4e8c7c8ca22737f4f908ae5cd4fc0b920c703dd3@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c index 8c5b97fb1941..5b0b4bb2bb68 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c @@ -48,6 +48,8 @@ int iwl_mvm_ftm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, if (!pasn) return -ENOBUFS; + iwl_mvm_ftm_remove_pasn_sta(mvm, addr); + pasn->cipher = iwl_mvm_cipher_to_location_cipher(cipher); switch (pasn->cipher) { From 9b9c4adad6d132e1d591d21d6b618a18aa25da80 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 15 Apr 2024 11:54:44 +0300 Subject: [PATCH 456/553] wifi: iwlwifi: mvm: return uid from iwl_mvm_build_scan_cmd [ Upstream commit bada85a3f584763deadd201147778c3e791d279c ] This function is supposed to return a uid on success, and an errno in failure. But it currently returns the return value of the specific cmd version handler, which in turn returns 0 on success and errno otherwise. This means that on success, iwl_mvm_build_scan_cmd will return 0 regardless if the actual uid. Fix this by returning the uid if the handler succeeded. Fixes: 687db6ff5b70 ("iwlwifi: scan: make new scan req versioning flow") Signed-off-by: Miri Korenblit Reviewed-by: Ilan Peer Link: https://msgid.link/20240415114847.5e2d602b3190.I4c4931021be74a67a869384c8f8ee7463e0c7857@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index acd8803dbcdd..b20d64dbba1a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -2650,7 +2650,8 @@ static int iwl_mvm_build_scan_cmd(struct iwl_mvm *mvm, if (ver_handler->version != scan_ver) continue; - return ver_handler->handler(mvm, vif, params, type, uid); + err = ver_handler->handler(mvm, vif, params, type, uid); + return err ? : uid; } err = iwl_mvm_scan_umac(mvm, vif, params, type, uid); From 9064163f1cf3cb90150ec1a94bfb349fe58630b1 Mon Sep 17 00:00:00 2001 From: David Bauer Date: Thu, 18 Apr 2024 15:29:08 +0200 Subject: [PATCH 457/553] vxlan: drop packets from invalid src-address [ Upstream commit f58f45c1e5b92975e91754f5407250085a6ae7cf ] The VXLAN driver currently does not check if the inner layer2 source-address is valid. In case source-address snooping/learning is enabled, a entry in the FDB for the invalid address is created with the layer3 address of the tunnel endpoint. If the frame happens to have a non-unicast address set, all this non-unicast traffic is subsequently not flooded to the tunnel network but sent to the learnt host in the FDB. To make matters worse, this FDB entry does not expire. Apply the same filtering for packets as it is done for bridges. This not only drops these invalid packets but avoids them from being learnt into the FDB. Fixes: d342894c5d2f ("vxlan: virtual extensible lan") Suggested-by: Ido Schimmel Signed-off-by: David Bauer Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/vxlan/vxlan_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 619dd71c9d75..fbd36dff9ec2 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1662,6 +1662,10 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan, if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr)) return false; + /* Ignore packets from invalid src-address */ + if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) + return false; + /* Get address from the outer IP header */ if (vxlan_get_sk_family(vs) == AF_INET) { saddr.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; From 3f7ecad54c01d1c0f27d546f968de69fa0da1efa Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 18 Apr 2024 15:46:06 +0200 Subject: [PATCH 458/553] mlxsw: core: Unregister EMAD trap using FORWARD action [ Upstream commit 976c44af48141cd8595601c0af2a19a43c5b228b ] The device's manual (PRM - Programmer's Reference Manual) classifies the trap that is used to deliver EMAD responses as an "event trap". Among other things, it means that the only actions that can be associated with the trap are TRAP and FORWARD (NOP). Currently, during driver de-initialization the driver unregisters the trap by setting its action to DISCARD, which violates the above guideline. Future firmware versions will prevent such misuses by returning an error. This does not prevent the driver from working, but an error will be printed to the kernel log during module removal / devlink reload: mlxsw_spectrum 0000:03:00.0: Reg cmd access status failed (status=7(bad parameter)) mlxsw_spectrum 0000:03:00.0: Reg cmd access failed (reg_id=7003(hpkt),type=write) Suppress the error message by aligning the driver to the manual and use a FORWARD (NOP) action when unregistering the trap. Fixes: 4ec14b7634b2 ("mlxsw: Add interface to access registers and process events") Cc: Jiri Pirko Cc: Amit Cohen Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: Simon Horman Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/753a89e14008fde08cb4a2c1e5f537b81d8eb2d6.1713446092.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index e2a985ec2c76..f36a416ffcfe 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -792,7 +792,7 @@ free_skb: static const struct mlxsw_listener mlxsw_emad_rx_listener = MLXSW_RXL(mlxsw_emad_rx_listener_func, ETHEMAD, TRAP_TO_CPU, false, - EMAD, DISCARD); + EMAD, FORWARD); static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) { From d3c4b14c8a99dc41429db4b56b1dc104600cc9d8 Mon Sep 17 00:00:00 2001 From: Andrei Simion Date: Thu, 4 Apr 2024 15:38:23 +0300 Subject: [PATCH 459/553] ARM: dts: microchip: at91-sama7g5ek: Replace regulator-suspend-voltage with the valid property [ Upstream commit e027b71762e84ee9d4ba9ad5401b956b9e83ed2a ] By checking the pmic node with microchip,mcp16502.yaml# 'regulator-suspend-voltage' does not match any of the regexes 'pinctrl-[0-9]+' from schema microchip,mcp16502.yaml# which inherits regulator.yaml#. So replace regulator-suspend-voltage with regulator-suspend-microvolt to avoid the inconsitency. Fixes: 85b1304b9daa ("ARM: dts: at91: sama7g5ek: set regulator voltages for standby state") Signed-off-by: Andrei Simion Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20240404123824.19182-2-andrei.simion@microchip.com [claudiu.beznea: added a dot before starting the last sentence in commit description] Signed-off-by: Claudiu Beznea Signed-off-by: Sasha Levin --- arch/arm/boot/dts/at91-sama7g5ek.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/at91-sama7g5ek.dts b/arch/arm/boot/dts/at91-sama7g5ek.dts index 4af8a1c96ed6..bede6e88ae11 100644 --- a/arch/arm/boot/dts/at91-sama7g5ek.dts +++ b/arch/arm/boot/dts/at91-sama7g5ek.dts @@ -293,7 +293,7 @@ regulator-state-standby { regulator-on-in-suspend; - regulator-suspend-voltage = <1150000>; + regulator-suspend-microvolt = <1150000>; regulator-mode = <4>; }; @@ -314,7 +314,7 @@ regulator-state-standby { regulator-on-in-suspend; - regulator-suspend-voltage = <1050000>; + regulator-suspend-microvolt = <1050000>; regulator-mode = <4>; }; @@ -331,7 +331,7 @@ regulator-always-on; regulator-state-standby { - regulator-suspend-voltage = <1800000>; + regulator-suspend-microvolt = <1800000>; regulator-on-in-suspend; }; @@ -346,7 +346,7 @@ regulator-max-microvolt = <3700000>; regulator-state-standby { - regulator-suspend-voltage = <1800000>; + regulator-suspend-microvolt = <1800000>; regulator-on-in-suspend; }; From 599c9ad5e1d43f5c12d869f5fd406ba5d8c55270 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 20 Apr 2024 07:01:16 +0000 Subject: [PATCH 460/553] icmp: prevent possible NULL dereferences from icmp_build_probe() [ Upstream commit c58e88d49097bd12dfcfef4f075b43f5d5830941 ] First problem is a double call to __in_dev_get_rcu(), because the second one could return NULL. if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list) Second problem is a read from dev->ip6_ptr with no NULL check: if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list)) Use the correct RCU API to fix these. v2: add missing include Fixes: d329ea5bd884 ("icmp: add response to RFC 8335 PROBE messages") Signed-off-by: Eric Dumazet Cc: Andreas Roeseler Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/icmp.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 2b09ef70752f..31051b327e53 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -92,6 +92,7 @@ #include #include #include +#include /* * Build xmit assembly blocks @@ -1029,6 +1030,8 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) struct icmp_ext_hdr *ext_hdr, _ext_hdr; struct icmp_ext_echo_iio *iio, _iio; struct net *net = dev_net(skb->dev); + struct inet6_dev *in6_dev; + struct in_device *in_dev; struct net_device *dev; char buff[IFNAMSIZ]; u16 ident_len; @@ -1112,10 +1115,15 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) /* Fill bits in reply message */ if (dev->flags & IFF_UP) status |= ICMP_EXT_ECHOREPLY_ACTIVE; - if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list) + + in_dev = __in_dev_get_rcu(dev); + if (in_dev && rcu_access_pointer(in_dev->ifa_list)) status |= ICMP_EXT_ECHOREPLY_IPV4; - if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list)) + + in6_dev = __in6_dev_get(dev); + if (in6_dev && !list_empty(&in6_dev->addr_list)) status |= ICMP_EXT_ECHOREPLY_IPV6; + dev_put(dev); icmphdr->un.echo.sequence |= htons(status); return true; From 16be600293ca4a494fa7b8f70affc18c1113f405 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 19 Apr 2024 16:02:00 +0800 Subject: [PATCH 461/553] bridge/br_netlink.c: no need to return void function [ Upstream commit 4fd1edcdf13c0d234543ecf502092be65c5177db ] br_info_notify is a void function. There is no need to return. Fixes: b6d0425b816e ("bridge: cfm: Netlink Notifications.") Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/bridge/br_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index d087fd4c784a..d38eff27767d 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -650,7 +650,7 @@ void br_ifinfo_notify(int event, const struct net_bridge *br, { u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED; - return br_info_notify(event, br, port, filter); + br_info_notify(event, br, port, filter); } /* From b20beb0598ed6abfcabb0086f0961fc52dd0d8a9 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Fri, 19 Apr 2024 11:34:47 -0700 Subject: [PATCH 462/553] bnxt_en: refactor reset close code [ Upstream commit 7474b1c82be3780692d537d331f9aa7fc1e5a368 ] Introduce bnxt_fw_fatal_close() API which can be used to stop data path and disable device when firmware is in fatal state. Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: David S. Miller Stable-dep-of: a1acdc226bae ("bnxt_en: Fix the PCI-AER routines") Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0d0aad7141c1..e889017e3a7f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11812,6 +11812,16 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) bnxt_rtnl_unlock_sp(bp); } +static void bnxt_fw_fatal_close(struct bnxt *bp) +{ + bnxt_tx_disable(bp); + bnxt_disable_napi(bp); + bnxt_disable_int_sync(bp); + bnxt_free_irq(bp); + bnxt_clear_int_mode(bp); + pci_disable_device(bp->pdev); +} + static void bnxt_fw_reset_close(struct bnxt *bp) { bnxt_ulp_stop(bp); @@ -11825,12 +11835,7 @@ static void bnxt_fw_reset_close(struct bnxt *bp) pci_read_config_word(bp->pdev, PCI_SUBSYSTEM_ID, &val); if (val == 0xffff) bp->fw_reset_min_dsecs = 0; - bnxt_tx_disable(bp); - bnxt_disable_napi(bp); - bnxt_disable_int_sync(bp); - bnxt_free_irq(bp); - bnxt_clear_int_mode(bp); - pci_disable_device(bp->pdev); + bnxt_fw_fatal_close(bp); } __bnxt_close_nic(bp, true, false); bnxt_vf_reps_free(bp); From 25a82005d5686297e1bbc225c63674c3abe9c1cd Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Fri, 19 Apr 2024 11:34:48 -0700 Subject: [PATCH 463/553] bnxt_en: Fix the PCI-AER routines [ Upstream commit a1acdc226baec331512f815d6ac9dd6f8435cc7f ] We do not support two simultaneous recoveries so check for reset flag, BNXT_STATE_IN_FW_RESET, and do not proceed with AER further. When the pci channel state is pci_channel_io_frozen, the PCIe link can not be trusted so we disable the traffic immediately and stop BAR access by calling bnxt_fw_fatal_close(). BAR access after AER fatal error can cause an NMI. Fixes: f75d9a0aa967 ("bnxt_en: Re-write PCI BARs after PCI fatal error.") Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e889017e3a7f..70021b5eb54a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13983,6 +13983,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, { struct net_device *netdev = pci_get_drvdata(pdev); struct bnxt *bp = netdev_priv(netdev); + bool abort = false; netdev_info(netdev, "PCI I/O error detected\n"); @@ -13991,16 +13992,27 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, bnxt_ulp_stop(bp); - if (state == pci_channel_io_perm_failure) { + if (test_and_set_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { + netdev_err(bp->dev, "Firmware reset already in progress\n"); + abort = true; + } + + if (abort || state == pci_channel_io_perm_failure) { rtnl_unlock(); return PCI_ERS_RESULT_DISCONNECT; } - if (state == pci_channel_io_frozen) + /* Link is not reliable anymore if state is pci_channel_io_frozen + * so we disable bus master to prevent any potential bad DMAs before + * freeing kernel memory. + */ + if (state == pci_channel_io_frozen) { set_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state); + bnxt_fw_fatal_close(bp); + } if (netif_running(netdev)) - bnxt_close(netdev); + __bnxt_close_nic(bp, true, true); if (pci_is_enabled(pdev)) pci_disable_device(pdev); @@ -14086,6 +14098,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) } reset_exit: + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); bnxt_clear_reservations(bp, true); rtnl_unlock(); From 424c69dbb2acd7cc09285ba4bd46cbdb5d97bd75 Mon Sep 17 00:00:00 2001 From: Paul Geurts Date: Thu, 18 Apr 2024 21:25:38 +0200 Subject: [PATCH 464/553] NFC: trf7970a: disable all regulators on removal [ Upstream commit 6bea4f03c6a4e973ef369e15aac88f37981db49e ] During module probe, regulator 'vin' and 'vdd-io' are used and enabled, but the vdd-io regulator overwrites the 'vin' regulator pointer. During remove, only the vdd-io is disabled, as the vin regulator pointer is not available anymore. When regulator_put() is called during resource cleanup a kernel warning is given, as the regulator is still enabled. Store the two regulators in separate pointers and disable both the regulators on module remove. Fixes: 49d22c70aaf0 ("NFC: trf7970a: Add device tree option of 1.8 Volt IO voltage") Signed-off-by: Paul Geurts Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/DB7PR09MB26847A4EBF88D9EDFEB1DA0F950E2@DB7PR09MB2684.eurprd09.prod.outlook.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/nfc/trf7970a.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index 21d68664fe08..7968baa626d1 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -424,7 +424,8 @@ struct trf7970a { enum trf7970a_state state; struct device *dev; struct spi_device *spi; - struct regulator *regulator; + struct regulator *vin_regulator; + struct regulator *vddio_regulator; struct nfc_digital_dev *ddev; u32 quirks; bool is_initiator; @@ -1883,7 +1884,7 @@ static int trf7970a_power_up(struct trf7970a *trf) if (trf->state != TRF7970A_ST_PWR_OFF) return 0; - ret = regulator_enable(trf->regulator); + ret = regulator_enable(trf->vin_regulator); if (ret) { dev_err(trf->dev, "%s - Can't enable VIN: %d\n", __func__, ret); return ret; @@ -1926,7 +1927,7 @@ static int trf7970a_power_down(struct trf7970a *trf) if (trf->en2_gpiod && !(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW)) gpiod_set_value_cansleep(trf->en2_gpiod, 0); - ret = regulator_disable(trf->regulator); + ret = regulator_disable(trf->vin_regulator); if (ret) dev_err(trf->dev, "%s - Can't disable VIN: %d\n", __func__, ret); @@ -2065,37 +2066,37 @@ static int trf7970a_probe(struct spi_device *spi) mutex_init(&trf->lock); INIT_DELAYED_WORK(&trf->timeout_work, trf7970a_timeout_work_handler); - trf->regulator = devm_regulator_get(&spi->dev, "vin"); - if (IS_ERR(trf->regulator)) { - ret = PTR_ERR(trf->regulator); + trf->vin_regulator = devm_regulator_get(&spi->dev, "vin"); + if (IS_ERR(trf->vin_regulator)) { + ret = PTR_ERR(trf->vin_regulator); dev_err(trf->dev, "Can't get VIN regulator: %d\n", ret); goto err_destroy_lock; } - ret = regulator_enable(trf->regulator); + ret = regulator_enable(trf->vin_regulator); if (ret) { dev_err(trf->dev, "Can't enable VIN: %d\n", ret); goto err_destroy_lock; } - uvolts = regulator_get_voltage(trf->regulator); + uvolts = regulator_get_voltage(trf->vin_regulator); if (uvolts > 4000000) trf->chip_status_ctrl = TRF7970A_CHIP_STATUS_VRS5_3; - trf->regulator = devm_regulator_get(&spi->dev, "vdd-io"); - if (IS_ERR(trf->regulator)) { - ret = PTR_ERR(trf->regulator); + trf->vddio_regulator = devm_regulator_get(&spi->dev, "vdd-io"); + if (IS_ERR(trf->vddio_regulator)) { + ret = PTR_ERR(trf->vddio_regulator); dev_err(trf->dev, "Can't get VDD_IO regulator: %d\n", ret); - goto err_destroy_lock; + goto err_disable_vin_regulator; } - ret = regulator_enable(trf->regulator); + ret = regulator_enable(trf->vddio_regulator); if (ret) { dev_err(trf->dev, "Can't enable VDD_IO: %d\n", ret); - goto err_destroy_lock; + goto err_disable_vin_regulator; } - if (regulator_get_voltage(trf->regulator) == 1800000) { + if (regulator_get_voltage(trf->vddio_regulator) == 1800000) { trf->io_ctrl = TRF7970A_REG_IO_CTRL_IO_LOW; dev_dbg(trf->dev, "trf7970a config vdd_io to 1.8V\n"); } @@ -2108,7 +2109,7 @@ static int trf7970a_probe(struct spi_device *spi) if (!trf->ddev) { dev_err(trf->dev, "Can't allocate NFC digital device\n"); ret = -ENOMEM; - goto err_disable_regulator; + goto err_disable_vddio_regulator; } nfc_digital_set_parent_dev(trf->ddev, trf->dev); @@ -2137,8 +2138,10 @@ err_shutdown: trf7970a_shutdown(trf); err_free_ddev: nfc_digital_free_device(trf->ddev); -err_disable_regulator: - regulator_disable(trf->regulator); +err_disable_vddio_regulator: + regulator_disable(trf->vddio_regulator); +err_disable_vin_regulator: + regulator_disable(trf->vin_regulator); err_destroy_lock: mutex_destroy(&trf->lock); return ret; @@ -2157,7 +2160,8 @@ static void trf7970a_remove(struct spi_device *spi) nfc_digital_unregister_device(trf->ddev); nfc_digital_free_device(trf->ddev); - regulator_disable(trf->regulator); + regulator_disable(trf->vddio_regulator); + regulator_disable(trf->vin_regulator); mutex_destroy(&trf->lock); } From 0d14f104027e30720582448706c7d6b43065c851 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 19 Apr 2024 10:04:56 +0800 Subject: [PATCH 465/553] ax25: Fix netdev refcount issue [ Upstream commit 467324bcfe1a31ec65d0cf4aa59421d6b7a7d52b ] The dev_tracker is added to ax25_cb in ax25_bind(). When the ax25 device is detaching, the dev_tracker of ax25_cb should be deallocated in ax25_kill_by_device() instead of the dev_tracker of ax25_dev. The log reported by ref_tracker is shown below: [ 80.884935] ref_tracker: reference already released. [ 80.885150] ref_tracker: allocated in: [ 80.885349] ax25_dev_device_up+0x105/0x540 [ 80.885730] ax25_device_event+0xa4/0x420 [ 80.885730] notifier_call_chain+0xc9/0x1e0 [ 80.885730] __dev_notify_flags+0x138/0x280 [ 80.885730] dev_change_flags+0xd7/0x180 [ 80.885730] dev_ifsioc+0x6a9/0xa30 [ 80.885730] dev_ioctl+0x4d8/0xd90 [ 80.885730] sock_do_ioctl+0x1c2/0x2d0 [ 80.885730] sock_ioctl+0x38b/0x4f0 [ 80.885730] __se_sys_ioctl+0xad/0xf0 [ 80.885730] do_syscall_64+0xc4/0x1b0 [ 80.885730] entry_SYSCALL_64_after_hwframe+0x67/0x6f [ 80.885730] ref_tracker: freed in: [ 80.885730] ax25_device_event+0x272/0x420 [ 80.885730] notifier_call_chain+0xc9/0x1e0 [ 80.885730] dev_close_many+0x272/0x370 [ 80.885730] unregister_netdevice_many_notify+0x3b5/0x1180 [ 80.885730] unregister_netdev+0xcf/0x120 [ 80.885730] sixpack_close+0x11f/0x1b0 [ 80.885730] tty_ldisc_kill+0xcb/0x190 [ 80.885730] tty_ldisc_hangup+0x338/0x3d0 [ 80.885730] __tty_hangup+0x504/0x740 [ 80.885730] tty_release+0x46e/0xd80 [ 80.885730] __fput+0x37f/0x770 [ 80.885730] __x64_sys_close+0x7b/0xb0 [ 80.885730] do_syscall_64+0xc4/0x1b0 [ 80.885730] entry_SYSCALL_64_after_hwframe+0x67/0x6f [ 80.893739] ------------[ cut here ]------------ [ 80.894030] WARNING: CPU: 2 PID: 140 at lib/ref_tracker.c:255 ref_tracker_free+0x47b/0x6b0 [ 80.894297] Modules linked in: [ 80.894929] CPU: 2 PID: 140 Comm: ax25_conn_rel_6 Not tainted 6.9.0-rc4-g8cd26fd90c1a #11 [ 80.895190] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qem4 [ 80.895514] RIP: 0010:ref_tracker_free+0x47b/0x6b0 [ 80.895808] Code: 83 c5 18 4c 89 eb 48 c1 eb 03 8a 04 13 84 c0 0f 85 df 01 00 00 41 83 7d 00 00 75 4b 4c 89 ff 9 [ 80.896171] RSP: 0018:ffff888009edf8c0 EFLAGS: 00000286 [ 80.896339] RAX: 1ffff1100141ac00 RBX: 1ffff1100149463b RCX: dffffc0000000000 [ 80.896502] RDX: 0000000000000001 RSI: 0000000000000246 RDI: ffff88800a0d6518 [ 80.896925] RBP: ffff888009edf9b0 R08: ffff88806d3288d3 R09: 1ffff1100da6511a [ 80.897212] R10: dffffc0000000000 R11: ffffed100da6511b R12: ffff88800a4a31d4 [ 80.897859] R13: ffff88800a4a31d8 R14: dffffc0000000000 R15: ffff88800a0d6518 [ 80.898279] FS: 00007fd88b7fe700(0000) GS:ffff88806d300000(0000) knlGS:0000000000000000 [ 80.899436] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 80.900181] CR2: 00007fd88c001d48 CR3: 000000000993e000 CR4: 00000000000006f0 ... [ 80.935774] ref_tracker: sp%d@000000000bb9df3d has 1/1 users at [ 80.935774] ax25_bind+0x424/0x4e0 [ 80.935774] __sys_bind+0x1d9/0x270 [ 80.935774] __x64_sys_bind+0x75/0x80 [ 80.935774] do_syscall_64+0xc4/0x1b0 [ 80.935774] entry_SYSCALL_64_after_hwframe+0x67/0x6f Change ax25_dev->dev_tracker to the dev_tracker of ax25_cb in order to mitigate the bug. Fixes: feef318c855a ("ax25: fix UAF bugs of net_device caused by rebinding operation") Signed-off-by: Duoming Zhou Link: https://lore.kernel.org/r/20240419020456.29826-1-duoming@zju.edu.cn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ax25/af_ax25.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 6b4c25a92377..0bffac238b61 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -103,7 +103,7 @@ again: s->ax25_dev = NULL; if (sk->sk_socket) { netdev_put(ax25_dev->dev, - &ax25_dev->dev_tracker); + &s->dev_tracker); ax25_dev_put(ax25_dev); } ax25_cb_del(s); From 82810873acb43f0a41802f11e200363e40892c9f Mon Sep 17 00:00:00 2001 From: Adam Li Date: Mon, 26 Feb 2024 02:24:52 +0000 Subject: [PATCH 466/553] net: make SK_MEMORY_PCPU_RESERV tunable [ Upstream commit 12a686c2e761f1f1f6e6e2117a9ab9c6de2ac8a7 ] This patch adds /proc/sys/net/core/mem_pcpu_rsv sysctl file, to make SK_MEMORY_PCPU_RESERV tunable. Commit 3cd3399dd7a8 ("net: implement per-cpu reserves for memory_allocated") introduced per-cpu forward alloc cache: "Implement a per-cpu cache of +1/-1 MB, to reduce number of changes to sk->sk_prot->memory_allocated, which would otherwise be cause of false sharing." sk_prot->memory_allocated points to global atomic variable: atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; If increasing the per-cpu cache size from 1MB to e.g. 16MB, changes to sk->sk_prot->memory_allocated can be further reduced. Performance may be improved on system with many cores. Signed-off-by: Adam Li Reviewed-by: Christoph Lameter (Ampere) Signed-off-by: David S. Miller Stable-dep-of: 3584718cf2ec ("net: fix sk_memory_allocated_{add|sub} vs softirqs") Signed-off-by: Sasha Levin --- Documentation/admin-guide/sysctl/net.rst | 5 +++++ include/net/sock.h | 5 +++-- net/core/sock.c | 1 + net/core/sysctl_net_core.c | 9 +++++++++ 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 6394f5dc2303..e3894c928118 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -205,6 +205,11 @@ Will increase power usage. Default: 0 (off) +mem_pcpu_rsv +------------ + +Per-cpu reserved forward alloc cache size in page units. Default 1MB per CPU. + rmem_default ------------ diff --git a/include/net/sock.h b/include/net/sock.h index 60577751ea9e..6ef6ce43a2ed 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1483,6 +1483,7 @@ sk_memory_allocated(const struct sock *sk) /* 1 MB per cpu, in page units */ #define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) +extern int sysctl_mem_pcpu_rsv; static inline void sk_memory_allocated_add(struct sock *sk, int amt) @@ -1491,7 +1492,7 @@ sk_memory_allocated_add(struct sock *sk, int amt) preempt_disable(); local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve >= SK_MEMORY_PCPU_RESERVE) { + if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) { __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); } @@ -1505,7 +1506,7 @@ sk_memory_allocated_sub(struct sock *sk, int amt) preempt_disable(); local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) { + if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) { __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); } diff --git a/net/core/sock.c b/net/core/sock.c index c8803b95ea0d..550af616f535 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -279,6 +279,7 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; EXPORT_SYMBOL(sysctl_rmem_max); __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; +int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE; /* Maximal space eaten by iovec or ancillary data plus some space */ int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 5b1ce656baa1..d281d5343ff4 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -29,6 +29,7 @@ static int int_3600 = 3600; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; +static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -348,6 +349,14 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &min_rcvbuf, }, + { + .procname = "mem_pcpu_rsv", + .data = &sysctl_mem_pcpu_rsv, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_mem_pcpu_rsv, + }, { .procname = "dev_weight", .data = &weight_p, From 1e9b694597d44d02ed464a1aa69f5b094c1041d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 21 Apr 2024 17:52:48 +0000 Subject: [PATCH 467/553] net: fix sk_memory_allocated_{add|sub} vs softirqs [ Upstream commit 3584718cf2ec7e79b6814f2596dcf398c5fb2eca ] Jonathan Heathcote reported a regression caused by blamed commit on aarch64 architecture. x86 happens to have irq-safe __this_cpu_add_return() and __this_cpu_sub(), but this is not generic. I think my confusion came from "struct sock" argument, because these helpers are called with a locked socket. But the memory accounting is per-proto (and per-cpu after the blamed commit). We might cleanup these helpers later to directly accept a "struct proto *proto" argument. Switch to this_cpu_add_return() and this_cpu_xchg() operations, and get rid of preempt_disable()/preempt_enable() pairs. Fast path becomes a bit faster as a result :) Many thanks to Jonathan Heathcote for his awesome report and investigations. Fixes: 3cd3399dd7a8 ("net: implement per-cpu reserves for memory_allocated") Reported-by: Jonathan Heathcote Closes: https://lore.kernel.org/netdev/VI1PR01MB42407D7947B2EA448F1E04EFD10D2@VI1PR01MB4240.eurprd01.prod.exchangelabs.com/ Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: Shakeel Butt Link: https://lore.kernel.org/r/20240421175248.1692552-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/sock.h | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 6ef6ce43a2ed..77298c74822a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1485,32 +1485,34 @@ sk_memory_allocated(const struct sock *sk) #define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) extern int sysctl_mem_pcpu_rsv; -static inline void -sk_memory_allocated_add(struct sock *sk, int amt) +static inline void proto_memory_pcpu_drain(struct proto *proto) { - int local_reserve; + int val = this_cpu_xchg(*proto->per_cpu_fw_alloc, 0); - preempt_disable(); - local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) { - __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); - atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); - } - preempt_enable(); + if (val) + atomic_long_add(val, proto->memory_allocated); } static inline void -sk_memory_allocated_sub(struct sock *sk, int amt) +sk_memory_allocated_add(const struct sock *sk, int val) { - int local_reserve; + struct proto *proto = sk->sk_prot; - preempt_disable(); - local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) { - __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); - atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); - } - preempt_enable(); + val = this_cpu_add_return(*proto->per_cpu_fw_alloc, val); + + if (unlikely(val >= READ_ONCE(sysctl_mem_pcpu_rsv))) + proto_memory_pcpu_drain(proto); +} + +static inline void +sk_memory_allocated_sub(const struct sock *sk, int val) +{ + struct proto *proto = sk->sk_prot; + + val = this_cpu_sub_return(*proto->per_cpu_fw_alloc, val); + + if (unlikely(val <= -READ_ONCE(sysctl_mem_pcpu_rsv))) + proto_memory_pcpu_drain(proto); } #define SK_ALLOC_PERCPU_COUNTER_BATCH 16 From 7a25bfd12733a8f38f8ca47c581f876c3d481ac0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 21 Apr 2024 18:43:26 +0000 Subject: [PATCH 468/553] ipv4: check for NULL idev in ip_route_use_hint() [ Upstream commit 58a4c9b1e5a3e53c9148e80b90e1e43897ce77d1 ] syzbot was able to trigger a NULL deref in fib_validate_source() in an old tree [1]. It appears the bug exists in latest trees. All calls to __in_dev_get_rcu() must be checked for a NULL result. [1] general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 2 PID: 3257 Comm: syz-executor.3 Not tainted 5.10.0-syzkaller #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:fib_validate_source+0xbf/0x15a0 net/ipv4/fib_frontend.c:425 Code: 18 f2 f2 f2 f2 42 c7 44 20 23 f3 f3 f3 f3 48 89 44 24 78 42 c6 44 20 27 f3 e8 5d 88 48 fc 4c 89 e8 48 c1 e8 03 48 89 44 24 18 <42> 80 3c 20 00 74 08 4c 89 ef e8 d2 15 98 fc 48 89 5c 24 10 41 bf RSP: 0018:ffffc900015fee40 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88800f7a4000 RCX: ffff88800f4f90c0 RDX: 0000000000000000 RSI: 0000000004001eac RDI: ffff8880160c64c0 RBP: ffffc900015ff060 R08: 0000000000000000 R09: ffff88800f7a4000 R10: 0000000000000002 R11: ffff88800f4f90c0 R12: dffffc0000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff88800f7a4000 FS: 00007f938acfe6c0(0000) GS:ffff888058c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f938acddd58 CR3: 000000001248e000 CR4: 0000000000352ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ip_route_use_hint+0x410/0x9b0 net/ipv4/route.c:2231 ip_rcv_finish_core+0x2c4/0x1a30 net/ipv4/ip_input.c:327 ip_list_rcv_finish net/ipv4/ip_input.c:612 [inline] ip_sublist_rcv+0x3ed/0xe50 net/ipv4/ip_input.c:638 ip_list_rcv+0x422/0x470 net/ipv4/ip_input.c:673 __netif_receive_skb_list_ptype net/core/dev.c:5572 [inline] __netif_receive_skb_list_core+0x6b1/0x890 net/core/dev.c:5620 __netif_receive_skb_list net/core/dev.c:5672 [inline] netif_receive_skb_list_internal+0x9f9/0xdc0 net/core/dev.c:5764 netif_receive_skb_list+0x55/0x3e0 net/core/dev.c:5816 xdp_recv_frames net/bpf/test_run.c:257 [inline] xdp_test_run_batch net/bpf/test_run.c:335 [inline] bpf_test_run_xdp_live+0x1818/0x1d00 net/bpf/test_run.c:363 bpf_prog_test_run_xdp+0x81f/0x1170 net/bpf/test_run.c:1376 bpf_prog_test_run+0x349/0x3c0 kernel/bpf/syscall.c:3736 __sys_bpf+0x45c/0x710 kernel/bpf/syscall.c:5115 __do_sys_bpf kernel/bpf/syscall.c:5201 [inline] __se_sys_bpf kernel/bpf/syscall.c:5199 [inline] __x64_sys_bpf+0x7c/0x90 kernel/bpf/syscall.c:5199 Fixes: 02b24941619f ("ipv4: use dst hint for ipv4 list receive") Reported-by: syzbot Signed-off-by: Eric Dumazet Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20240421184326.1704930-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a0c687ff2598..6c0f1e347b85 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2168,6 +2168,9 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, int err = -EINVAL; u32 tag = 0; + if (!in_dev) + return -EINVAL; + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) goto martian_source; From 5e5e1865b73ba1628cc0ac92b9d614a489982230 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 21 Apr 2024 19:38:28 +0000 Subject: [PATCH 469/553] net: usb: ax88179_178a: stop lying about skb->truesize [ Upstream commit 4ce62d5b2f7aecd4900e7d6115588ad7f9acccca ] Some usb drivers try to set small skb->truesize and break core networking stacks. In this patch, I removed one of the skb->truesize overide. I also replaced one skb_clone() by an allocation of a fresh and small skb, to get minimally sized skbs, like we did in commit 1e2c61172342 ("net: cdc_ncm: reduce skb truesize in rx path") Fixes: f8ebb3ac881b ("net: usb: ax88179_178a: Fix packet receiving") Reported-by: shironeko Closes: https://lore.kernel.org/netdev/c110f41a0d2776b525930f213ca9715c@tesaguri.club/ Signed-off-by: Eric Dumazet Cc: Jose Alonso Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240421193828.1966195-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/ax88179_178a.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 3078511f7608..21b6c4d94a63 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1456,21 +1456,16 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) /* Skip IP alignment pseudo header */ skb_pull(skb, 2); - skb->truesize = SKB_TRUESIZE(pkt_len_plus_padd); ax88179_rx_checksum(skb, pkt_hdr); return 1; } - ax_skb = skb_clone(skb, GFP_ATOMIC); + ax_skb = netdev_alloc_skb_ip_align(dev->net, pkt_len); if (!ax_skb) return 0; - skb_trim(ax_skb, pkt_len); + skb_put(ax_skb, pkt_len); + memcpy(ax_skb->data, skb->data + 2, pkt_len); - /* Skip IP alignment pseudo header */ - skb_pull(ax_skb, 2); - - skb->truesize = pkt_len_plus_padd + - SKB_DATA_ALIGN(sizeof(struct sk_buff)); ax88179_rx_checksum(ax_skb, pkt_hdr); usbnet_skb_return(dev, ax_skb); From 25a1c2d4b1fcf938356a9688a96a6456abd44b29 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Mon, 22 Apr 2024 05:39:30 -0400 Subject: [PATCH 470/553] net: gtp: Fix Use-After-Free in gtp_dellink [ Upstream commit f2a904107ee2b647bb7794a1a82b67740d7c8a64 ] Since call_rcu, which is called in the hlist_for_each_entry_rcu traversal of gtp_dellink, is not part of the RCU read critical section, it is possible that the RCU grace period will pass during the traversal and the key will be free. To prevent this, it should be changed to hlist_for_each_entry_safe. Fixes: 94dc550a5062 ("gtp: fix an use-after-free in ipv4_pdp_find()") Signed-off-by: Hyunwoo Kim Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/gtp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 7086acfed5b9..05b5914d8358 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1110,11 +1110,12 @@ out_hashtable: static void gtp_dellink(struct net_device *dev, struct list_head *head) { struct gtp_dev *gtp = netdev_priv(dev); + struct hlist_node *next; struct pdp_ctx *pctx; int i; for (i = 0; i < gtp->hash_size; i++) - hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) + hlist_for_each_entry_safe(pctx, next, >p->tid_hash[i], hlist_tid) pdp_context_delete(pctx); list_del_rcu(>p->list); From 14051cbcf386b1358d687021d855b82a0ffc485f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 16 Apr 2024 15:34:45 -0400 Subject: [PATCH 471/553] Bluetooth: MGMT: Fix failing to MGMT_OP_ADD_UUID/MGMT_OP_REMOVE_UUID [ Upstream commit 6eb5fcc416f127f220b9177a5c9ae751cac1cda8 ] These commands don't require the adapter to be up and running so don't use hci_cmd_sync_queue which would check that flag, instead use hci_cmd_sync_submit which would ensure mgmt_class_complete is set properly regardless if any command was actually run or not. Link: https://github.com/bluez/bluez/issues/809 Fixes: d883a4669a1d ("Bluetooth: hci_sync: Only allow hci_cmd_sync_queue if running") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/mgmt.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 716f6dc4934b..4f4b394370bf 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2680,7 +2680,11 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) goto failed; } - err = hci_cmd_sync_queue(hdev, add_uuid_sync, cmd, mgmt_class_complete); + /* MGMT_OP_ADD_UUID don't require adapter the UP/Running so use + * hci_cmd_sync_submit instead of hci_cmd_sync_queue. + */ + err = hci_cmd_sync_submit(hdev, add_uuid_sync, cmd, + mgmt_class_complete); if (err < 0) { mgmt_pending_free(cmd); goto failed; @@ -2774,8 +2778,11 @@ update_class: goto unlock; } - err = hci_cmd_sync_queue(hdev, remove_uuid_sync, cmd, - mgmt_class_complete); + /* MGMT_OP_REMOVE_UUID don't require adapter the UP/Running so use + * hci_cmd_sync_submit instead of hci_cmd_sync_queue. + */ + err = hci_cmd_sync_submit(hdev, remove_uuid_sync, cmd, + mgmt_class_complete); if (err < 0) mgmt_pending_free(cmd); @@ -2841,8 +2848,11 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } - err = hci_cmd_sync_queue(hdev, set_class_sync, cmd, - mgmt_class_complete); + /* MGMT_OP_SET_DEV_CLASS don't require adapter the UP/Running so use + * hci_cmd_sync_submit instead of hci_cmd_sync_queue. + */ + err = hci_cmd_sync_submit(hdev, set_class_sync, cmd, + mgmt_class_complete); if (err < 0) mgmt_pending_free(cmd); From 31f18a1f58117b7da9bdbdfe22e1da424a67b45c Mon Sep 17 00:00:00 2001 From: Chun-Yi Lee Date: Wed, 24 Apr 2024 21:59:03 +0800 Subject: [PATCH 472/553] Bluetooth: hci_sync: Using hci_cmd_sync_submit when removing Adv Monitor [ Upstream commit 88cd6e6b2d327faa13e4505b07f1e380e51b21ff ] Since the d883a4669a1de be introduced in v6.4, bluetooth daemon got the following failed message of MGMT_OP_REMOVE_ADV_MONITOR command when controller is power-off: bluetoothd[20976]: src/adapter.c:reset_adv_monitors_complete() Failed to reset Adv Monitors: Failed> Normally this situation is happened when the bluetoothd deamon be started manually after system booting. Which means that bluetoothd received MGMT_EV_INDEX_ADDED event after kernel runs hci_power_off(). Base on doc/mgmt-api.txt, the MGMT_OP_REMOVE_ADV_MONITOR command can be used when the controller is not powered. This patch changes the code in remove_adv_monitor() to use hci_cmd_sync_submit() instead of hci_cmd_sync_queue(). Fixes: d883a4669a1de ("Bluetooth: hci_sync: Only allow hci_cmd_sync_queue if running") Cc: Luiz Augusto von Dentz Cc: Manish Mandlik Cc: Archie Pusaka Cc: Miao-chen Chou Signed-off-by: Chun-Yi Lee Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/mgmt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 4f4b394370bf..76dac5a90aef 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5540,8 +5540,8 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, goto unlock; } - err = hci_cmd_sync_queue(hdev, mgmt_remove_adv_monitor_sync, cmd, - mgmt_remove_adv_monitor_complete); + err = hci_cmd_sync_submit(hdev, mgmt_remove_adv_monitor_sync, cmd, + mgmt_remove_adv_monitor_complete); if (err) { mgmt_pending_remove(cmd); From 4115403dc9aa984a036b5f639b9059d6191b25cc Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 24 Apr 2024 14:29:32 +0200 Subject: [PATCH 473/553] Bluetooth: qca: set power_ctrl_enabled on NULL returned by gpiod_get_optional() [ Upstream commit 3d05fc82237aa97162d0d7dc300b55bb34e91d02 ] Any return value from gpiod_get_optional() other than a pointer to a GPIO descriptor or a NULL-pointer is an error and the driver should abort probing. That being said: commit 56d074d26c58 ("Bluetooth: hci_qca: don't use IS_ERR_OR_NULL() with gpiod_get_optional()") no longer sets power_ctrl_enabled on NULL-pointer returned by devm_gpiod_get_optional(). Restore this behavior but bail-out on errors. While at it: also bail-out on error returned when trying to get the "swctrl" GPIO. Reported-by: Wren Turkal Reported-by: Zijun Hu Closes: https://lore.kernel.org/linux-bluetooth/1713449192-25926-2-git-send-email-quic_zijuhu@quicinc.com/ Fixes: 56d074d26c58 ("Bluetooth: hci_qca: don't use IS_ERR_OR_NULL() with gpiod_get_optional()") Reviewed-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski Tested-by: Wren Turkal Reported-by: Wren Turkal Reported-by: Zijun Hu Reviewed-by: Krzysztof Kozlowski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/hci_qca.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 33956ddec933..ca6065297a7b 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2257,16 +2257,21 @@ static int qca_serdev_probe(struct serdev_device *serdev) (data->soc_type == QCA_WCN6750 || data->soc_type == QCA_WCN6855)) { dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n"); - power_ctrl_enabled = false; + return PTR_ERR(qcadev->bt_en); } + if (!qcadev->bt_en) + power_ctrl_enabled = false; + qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl", GPIOD_IN); if (IS_ERR(qcadev->sw_ctrl) && (data->soc_type == QCA_WCN6750 || data->soc_type == QCA_WCN6855 || - data->soc_type == QCA_WCN7850)) - dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); + data->soc_type == QCA_WCN7850)) { + dev_err(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); + return PTR_ERR(qcadev->sw_ctrl); + } qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); if (IS_ERR(qcadev->susclk)) { @@ -2285,10 +2290,13 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(qcadev->bt_en)) { - dev_warn(&serdev->dev, "failed to acquire enable gpio\n"); - power_ctrl_enabled = false; + dev_err(&serdev->dev, "failed to acquire enable gpio\n"); + return PTR_ERR(qcadev->bt_en); } + if (!qcadev->bt_en) + power_ctrl_enabled = false; + qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); if (IS_ERR(qcadev->susclk)) { dev_warn(&serdev->dev, "failed to acquire clk\n"); From f4861f052f2d4daa76379abae66fbe2c14f48ec4 Mon Sep 17 00:00:00 2001 From: Ismael Luceno Date: Sun, 21 Apr 2024 16:22:32 +0200 Subject: [PATCH 474/553] ipvs: Fix checksumming on GSO of SCTP packets [ Upstream commit e10d3ba4d434ed172914617ed8d74bd411421193 ] It was observed in the wild that pairs of consecutive packets would leave the IPVS with the same wrong checksum, and the issue only went away when disabling GSO. IPVS needs to avoid computing the SCTP checksum when using GSO. Fixes: 90017accff61 ("sctp: Add GSO support") Co-developed-by: Firo Yang Signed-off-by: Ismael Luceno Tested-by: Andreas Taschner Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipvs/ip_vs_proto_sctp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index a0921adc31a9..1e689c714127 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -126,7 +126,8 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, if (sctph->source != cp->vport || payload_csum || skb->ip_summed == CHECKSUM_PARTIAL) { sctph->source = cp->vport; - sctp_nat_csum(skb, sctph, sctphoff); + if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + sctp_nat_csum(skb, sctph, sctphoff); } else { skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -174,7 +175,8 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, (skb->ip_summed == CHECKSUM_PARTIAL && !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) { sctph->dest = cp->dport; - sctp_nat_csum(skb, sctph, sctphoff); + if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + sctp_nat_csum(skb, sctph, sctphoff); } else if (skb->ip_summed != CHECKSUM_PARTIAL) { skb->ip_summed = CHECKSUM_UNNECESSARY; } From bca6fa2d9a9f560e6b89fd5190b05cc2f5d422c1 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Mon, 22 Apr 2024 05:37:17 -0400 Subject: [PATCH 475/553] net: openvswitch: Fix Use-After-Free in ovs_ct_exit [ Upstream commit 5ea7b72d4fac2fdbc0425cd8f2ea33abe95235b2 ] Since kfree_rcu, which is called in the hlist_for_each_entry_rcu traversal of ovs_ct_limit_exit, is not part of the RCU read critical section, it is possible that the RCU grace period will pass during the traversal and the key will be free. To prevent this, it should be changed to hlist_for_each_entry_safe. Fixes: 11efd5cb04a1 ("openvswitch: Support conntrack zone limit") Signed-off-by: Hyunwoo Kim Reviewed-by: Eric Dumazet Reviewed-by: Aaron Conole Link: https://lore.kernel.org/r/ZiYvzQN/Ry5oeFQW@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/openvswitch/conntrack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e4ba86b84b9b..2302bae1e012 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1920,9 +1920,9 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net) for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) { struct hlist_head *head = &info->limits[i]; struct ovs_ct_limit *ct_limit; + struct hlist_node *next; - hlist_for_each_entry_rcu(ct_limit, head, hlist_node, - lockdep_ovsl_is_held()) + hlist_for_each_entry_safe(ct_limit, next, head, hlist_node) kfree_rcu(ct_limit, rcu); } kfree(info->limits); From 19ebdce6609e8cc462ff1f8478c09b39bf25c159 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:55 +0200 Subject: [PATCH 476/553] mlxsw: spectrum_acl_tcam: Fix race during rehash delayed work [ Upstream commit d90cfe20562407d9f080d24123078d666d730707 ] The purpose of the rehash delayed work is to reduce the number of masks (eRPs) used by an ACL region as the eRP bank is a global and limited resource. This is done in three steps: 1. Creating a new set of masks and a new ACL region which will use the new masks and to which the existing filters will be migrated to. The new region is assigned to 'vregion->region' and the region from which the filters are migrated from is assigned to 'vregion->region2'. 2. Migrating all the filters from the old region to the new region. 3. Destroying the old region and setting 'vregion->region2' to NULL. Only the second steps is performed under the 'vregion->lock' mutex although its comments says that among other things it "Protects consistency of region, region2 pointers". This is problematic as the first step can race with filter insertion from user space that uses 'vregion->region', but under the mutex. Fix by holding the mutex across the entirety of the delayed work and not only during the second step. Fixes: 2bffc5322fd8 ("mlxsw: spectrum_acl: Don't take mutex in mlxsw_sp_acl_tcam_vregion_rehash_work()") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1ec1d54edf2bad0a369e6b4fa030aba64e1f124b.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 41eac7dfb67e..508c0b1b80fd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -780,7 +780,9 @@ static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work) rehash.dw.work); int credits = MLXSW_SP_ACL_TCAM_VREGION_REHASH_CREDITS; + mutex_lock(&vregion->lock); mlxsw_sp_acl_tcam_vregion_rehash(vregion->mlxsw_sp, vregion, &credits); + mutex_unlock(&vregion->lock); if (credits < 0) /* Rehash gone out of credits so it was interrupted. * Schedule the work as soon as possible to continue. @@ -1420,7 +1422,6 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, int err, err2; trace_mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion); - mutex_lock(&vregion->lock); err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, ctx, credits); if (err) { @@ -1440,7 +1441,6 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, /* Let the rollback to be continued later on. */ } } - mutex_unlock(&vregion->lock); trace_mlxsw_sp_acl_tcam_vregion_migrate_end(mlxsw_sp, vregion); return err; } From b996e8699da810e4c915841d6aaef761007f933a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:56 +0200 Subject: [PATCH 477/553] mlxsw: spectrum_acl_tcam: Fix possible use-after-free during activity update [ Upstream commit 79b5b4b18bc85b19d3a518483f9abbbe6d7b3ba4 ] The rule activity update delayed work periodically traverses the list of configured rules and queries their activity from the device. As part of this task it accesses the entry pointed by 'ventry->entry', but this entry can be changed concurrently by the rehash delayed work, leading to a use-after-free [1]. Fix by closing the race and perform the activity query under the 'vregion->lock' mutex. [1] BUG: KASAN: slab-use-after-free in mlxsw_sp_acl_tcam_flower_rule_activity_get+0x121/0x140 Read of size 8 at addr ffff8881054ed808 by task kworker/0:18/181 CPU: 0 PID: 181 Comm: kworker/0:18 Not tainted 6.9.0-rc2-custom-00781-gd5ab772d32f7 #2 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_rule_activity_update_work Call Trace: dump_stack_lvl+0xc6/0x120 print_report+0xce/0x670 kasan_report+0xd7/0x110 mlxsw_sp_acl_tcam_flower_rule_activity_get+0x121/0x140 mlxsw_sp_acl_rule_activity_update_work+0x219/0x400 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Allocated by task 1039: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 __kasan_kmalloc+0x8f/0xa0 __kmalloc+0x19c/0x360 mlxsw_sp_acl_tcam_entry_create+0x7b/0x1f0 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x30d/0xb50 mlxsw_sp_acl_tcam_vregion_rehash_work+0x157/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Freed by task 1039: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 poison_slab_object+0x102/0x170 __kasan_slab_free+0x14/0x30 kfree+0xc1/0x290 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x3d7/0xb50 mlxsw_sp_acl_tcam_vregion_rehash_work+0x157/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Fixes: 2bffc5322fd8 ("mlxsw: spectrum_acl: Don't take mutex in mlxsw_sp_acl_tcam_vregion_rehash_work()") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1fcce0a60b231ebeb2515d91022284ba7b4ffe7a.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 508c0b1b80fd..8cbce127d231 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1256,8 +1256,14 @@ mlxsw_sp_acl_tcam_ventry_activity_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_ventry *ventry, bool *activity) { - return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, - ventry->entry, activity); + struct mlxsw_sp_acl_tcam_vregion *vregion = ventry->vchunk->vregion; + int err; + + mutex_lock(&vregion->lock); + err = mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, ventry->entry, + activity); + mutex_unlock(&vregion->lock); + return err; } static int From 813e2ab753a8f8c243a39ede20c2e0adc15f3887 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:57 +0200 Subject: [PATCH 478/553] mlxsw: spectrum_acl_tcam: Fix possible use-after-free during rehash [ Upstream commit 54225988889931467a9b55fdbef534079b665519 ] The rehash delayed work migrates filters from one region to another according to the number of available credits. The migrated from region is destroyed at the end of the work if the number of credits is non-negative as the assumption is that this is indicative of migration being complete. This assumption is incorrect as a non-negative number of credits can also be the result of a failed migration. The destruction of a region that still has filters referencing it can result in a use-after-free [1]. Fix by not destroying the region if migration failed. [1] BUG: KASAN: slab-use-after-free in mlxsw_sp_acl_ctcam_region_entry_remove+0x21d/0x230 Read of size 8 at addr ffff8881735319e8 by task kworker/0:31/3858 CPU: 0 PID: 3858 Comm: kworker/0:31 Tainted: G W 6.9.0-rc2-custom-00782-gf2275c2157d8 #5 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work Call Trace: dump_stack_lvl+0xc6/0x120 print_report+0xce/0x670 kasan_report+0xd7/0x110 mlxsw_sp_acl_ctcam_region_entry_remove+0x21d/0x230 mlxsw_sp_acl_ctcam_entry_del+0x2e/0x70 mlxsw_sp_acl_atcam_entry_del+0x81/0x210 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x3cd/0xb50 mlxsw_sp_acl_tcam_vregion_rehash_work+0x157/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Allocated by task 174: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 __kasan_kmalloc+0x8f/0xa0 __kmalloc+0x19c/0x360 mlxsw_sp_acl_tcam_region_create+0xdf/0x9c0 mlxsw_sp_acl_tcam_vregion_rehash_work+0x954/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Freed by task 7: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 poison_slab_object+0x102/0x170 __kasan_slab_free+0x14/0x30 kfree+0xc1/0x290 mlxsw_sp_acl_tcam_region_destroy+0x272/0x310 mlxsw_sp_acl_tcam_vregion_rehash_work+0x731/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Fixes: c9c9af91f1d9 ("mlxsw: spectrum_acl: Allow to interrupt/continue rehash work") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/3e412b5659ec2310c5c615760dfe5eac18dd7ebd.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 8cbce127d231..44c750e1025a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1548,6 +1548,7 @@ mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, ctx, credits); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); + return; } if (*credits >= 0) From 78884187c09fbd41b6c0a4d1a0da6571df381ddb Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:58 +0200 Subject: [PATCH 479/553] mlxsw: spectrum_acl_tcam: Rate limit error message [ Upstream commit 5bcf925587e9b5d36420d572a0b4d131c90fb306 ] In the rare cases when the device resources are exhausted it is likely that the rehash delayed work will fail. An error message will be printed whenever this happens which can be overwhelming considering the fact that the work is per-region and that there can be hundreds of regions. Fix by rate limiting the error message. Fixes: e5e7962ee5c2 ("mlxsw: spectrum_acl: Implement region migration according to hints") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/c510763b2ebd25e7990d80183feff91cde593145.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 44c750e1025a..b0396cbf3cce 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1547,7 +1547,7 @@ mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion, ctx, credits); if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); return; } From b822644fd90992ee362c5e0c8d2556efc8856c76 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:59 +0200 Subject: [PATCH 480/553] mlxsw: spectrum_acl_tcam: Fix memory leak during rehash [ Upstream commit 8ca3f7a7b61393804c46f170743c3b839df13977 ] The rehash delayed work migrates filters from one region to another. This is done by iterating over all chunks (all the filters with the same priority) in the region and in each chunk iterating over all the filters. If the migration fails, the code tries to migrate the filters back to the old region. However, the rollback itself can also fail in which case another migration will be erroneously performed. Besides the fact that this ping pong is not a very good idea, it also creates a problem. Each virtual chunk references two chunks: The currently used one ('vchunk->chunk') and a backup ('vchunk->chunk2'). During migration the first holds the chunk we want to migrate filters to and the second holds the chunk we are migrating filters from. The code currently assumes - but does not verify - that the backup chunk does not exist (NULL) if the currently used chunk does not reference the target region. This assumption breaks when we are trying to rollback a rollback, resulting in the backup chunk being overwritten and leaked [1]. Fix by not rolling back a failed rollback and add a warning to avoid future cases. [1] WARNING: CPU: 5 PID: 1063 at lib/parman.c:291 parman_destroy+0x17/0x20 Modules linked in: CPU: 5 PID: 1063 Comm: kworker/5:11 Tainted: G W 6.9.0-rc2-custom-00784-gc6a05c468a0b #14 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work RIP: 0010:parman_destroy+0x17/0x20 [...] Call Trace: mlxsw_sp_acl_atcam_region_fini+0x19/0x60 mlxsw_sp_acl_tcam_region_destroy+0x49/0xf0 mlxsw_sp_acl_tcam_vregion_rehash_work+0x1f1/0x470 process_one_work+0x151/0x370 worker_thread+0x2cb/0x3e0 kthread+0xd0/0x100 ret_from_fork+0x34/0x50 ret_from_fork_asm+0x1a/0x30 Fixes: 843500518509 ("mlxsw: spectrum_acl: Do rollback as another call to mlxsw_sp_acl_tcam_vchunk_migrate_all()") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/d5edd4f4503934186ae5cfe268503b16345b4e0f.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index b0396cbf3cce..adaad9fc5fa5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1297,6 +1297,8 @@ mlxsw_sp_acl_tcam_vchunk_migrate_start(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_chunk *new_chunk; + WARN_ON(vchunk->chunk2); + new_chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, region); if (IS_ERR(new_chunk)) return PTR_ERR(new_chunk); @@ -1431,6 +1433,8 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, ctx, credits); if (err) { + if (ctx->this_is_rollback) + return err; /* In case migration was not successful, we need to swap * so the original region pointer is assigned again * to vregion->region. From 751d352858108314efd33dddd5a9a2b6bf7d6916 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:26:00 +0200 Subject: [PATCH 481/553] mlxsw: spectrum_acl_tcam: Fix warning during rehash [ Upstream commit 743edc8547a92b6192aa1f1b6bb78233fa21dc9b ] As previously explained, the rehash delayed work migrates filters from one region to another. This is done by iterating over all chunks (all the filters with the same priority) in the region and in each chunk iterating over all the filters. When the work runs out of credits it stores the current chunk and entry as markers in the per-work context so that it would know where to resume the migration from the next time the work is scheduled. Upon error, the chunk marker is reset to NULL, but without resetting the entry markers despite being relative to it. This can result in migration being resumed from an entry that does not belong to the chunk being migrated. In turn, this will eventually lead to a chunk being iterated over as if it is an entry. Because of how the two structures happen to be defined, this does not lead to KASAN splats, but to warnings such as [1]. Fix by creating a helper that resets all the markers and call it from all the places the currently only reset the chunk marker. For good measures also call it when starting a completely new rehash. Add a warning to avoid future cases. [1] WARNING: CPU: 7 PID: 1076 at drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c:407 mlxsw_afk_encode+0x242/0x2f0 Modules linked in: CPU: 7 PID: 1076 Comm: kworker/7:24 Tainted: G W 6.9.0-rc3-custom-00880-g29e61d91b77b #29 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work RIP: 0010:mlxsw_afk_encode+0x242/0x2f0 [...] Call Trace: mlxsw_sp_acl_atcam_entry_add+0xd9/0x3c0 mlxsw_sp_acl_tcam_entry_create+0x5e/0xa0 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x109/0x290 mlxsw_sp_acl_tcam_vregion_rehash_work+0x6c/0x470 process_one_work+0x151/0x370 worker_thread+0x2cb/0x3e0 kthread+0xd0/0x100 ret_from_fork+0x34/0x50 Fixes: 6f9579d4e302 ("mlxsw: spectrum_acl: Remember where to continue rehash migration") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/cc17eed86b41dd829d39b07906fec074a9ce580e.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../mellanox/mlxsw/spectrum_acl_tcam.c | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index adaad9fc5fa5..1a6c774c8b7b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -792,6 +792,17 @@ static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work) mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion); } +static void +mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(struct mlxsw_sp_acl_tcam_rehash_ctx *ctx) +{ + /* The entry markers are relative to the current chunk and therefore + * needs to be reset together with the chunk marker. + */ + ctx->current_vchunk = NULL; + ctx->start_ventry = NULL; + ctx->stop_ventry = NULL; +} + static void mlxsw_sp_acl_tcam_rehash_ctx_vchunk_changed(struct mlxsw_sp_acl_tcam_vchunk *vchunk) { @@ -814,7 +825,7 @@ mlxsw_sp_acl_tcam_rehash_ctx_vregion_changed(struct mlxsw_sp_acl_tcam_vregion *v * the current chunk pointer to make sure all chunks * are properly migrated. */ - vregion->rehash.ctx.current_vchunk = NULL; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(&vregion->rehash.ctx); } static struct mlxsw_sp_acl_tcam_vregion * @@ -1317,7 +1328,7 @@ mlxsw_sp_acl_tcam_vchunk_migrate_end(struct mlxsw_sp *mlxsw_sp, { mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2); vchunk->chunk2 = NULL; - ctx->current_vchunk = NULL; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx); } static int @@ -1349,6 +1360,8 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, ventry = list_first_entry(&vchunk->ventry_list, typeof(*ventry), list); + WARN_ON(ventry->vchunk != vchunk); + list_for_each_entry_from(ventry, &vchunk->ventry_list, list) { /* During rollback, once we reach the ventry that failed * to migrate, we are done. @@ -1440,7 +1453,7 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, * to vregion->region. */ swap(vregion->region, vregion->region2); - ctx->current_vchunk = NULL; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx); ctx->this_is_rollback = true; err2 = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, ctx, credits); @@ -1499,6 +1512,7 @@ mlxsw_sp_acl_tcam_vregion_rehash_start(struct mlxsw_sp *mlxsw_sp, ctx->hints_priv = hints_priv; ctx->this_is_rollback = false; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx); return 0; From 4526a56e02da3725db979358964df9cd9c567154 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:26:01 +0200 Subject: [PATCH 482/553] mlxsw: spectrum_acl_tcam: Fix incorrect list API usage [ Upstream commit b377add0f0117409c418ddd6504bd682ebe0bf79 ] Both the function that migrates all the chunks within a region and the function that migrates all the entries within a chunk call list_first_entry() on the respective lists without checking that the lists are not empty. This is incorrect usage of the API, which leads to the following warning [1]. Fix by returning if the lists are empty as there is nothing to migrate in this case. [1] WARNING: CPU: 0 PID: 6437 at drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c:1266 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x1f1/0> Modules linked in: CPU: 0 PID: 6437 Comm: kworker/0:37 Not tainted 6.9.0-rc3-custom-00883-g94a65f079ef6 #39 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work RIP: 0010:mlxsw_sp_acl_tcam_vchunk_migrate_all+0x1f1/0x2c0 [...] Call Trace: mlxsw_sp_acl_tcam_vregion_rehash_work+0x6c/0x4a0 process_one_work+0x151/0x370 worker_thread+0x2cb/0x3e0 kthread+0xd0/0x100 ret_from_fork+0x34/0x50 ret_from_fork_asm+0x1a/0x30 Fixes: 6f9579d4e302 ("mlxsw: spectrum_acl: Remember where to continue rehash migration") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/4628e9a22d1d84818e28310abbbc498e7bc31bc9.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 1a6c774c8b7b..d0c7cb059616 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1351,6 +1351,9 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, return 0; } + if (list_empty(&vchunk->ventry_list)) + goto out; + /* If the migration got interrupted, we have the ventry to start from * stored in context. */ @@ -1402,6 +1405,7 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, } } +out: mlxsw_sp_acl_tcam_vchunk_migrate_end(mlxsw_sp, vchunk, ctx); return 0; } @@ -1415,6 +1419,9 @@ mlxsw_sp_acl_tcam_vchunk_migrate_all(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_vchunk *vchunk; int err; + if (list_empty(&vregion->vchunk_list)) + return 0; + /* If the migration got interrupted, we have the vchunk * we are working on stored in context. */ From 5bfe7bf9656ed2633718388f12b7c38b86414a04 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:26:02 +0200 Subject: [PATCH 483/553] mlxsw: spectrum_acl_tcam: Fix memory leak when canceling rehash work [ Upstream commit fb4e2b70a7194b209fc7320bbf33b375f7114bd5 ] The rehash delayed work is rescheduled with a delay if the number of credits at end of the work is not negative as supposedly it means that the migration ended. Otherwise, it is rescheduled immediately. After "mlxsw: spectrum_acl_tcam: Fix possible use-after-free during rehash" the above is no longer accurate as a non-negative number of credits is no longer indicative of the migration being done. It can also happen if the work encountered an error in which case the migration will resume the next time the work is scheduled. The significance of the above is that it is possible for the work to be pending and associated with hints that were allocated when the migration started. This leads to the hints being leaked [1] when the work is canceled while pending as part of ACL region dismantle. Fix by freeing the hints if hints are associated with a work that was canceled while pending. Blame the original commit since the reliance on not having a pending work associated with hints is fragile. [1] unreferenced object 0xffff88810e7c3000 (size 256): comm "kworker/0:16", pid 176, jiffies 4295460353 hex dump (first 32 bytes): 00 30 95 11 81 88 ff ff 61 00 00 00 00 00 00 80 .0......a....... 00 00 61 00 40 00 00 00 00 00 00 00 04 00 00 00 ..a.@........... backtrace (crc 2544ddb9): [<00000000cf8cfab3>] kmalloc_trace+0x23f/0x2a0 [<000000004d9a1ad9>] objagg_hints_get+0x42/0x390 [<000000000b143cf3>] mlxsw_sp_acl_erp_rehash_hints_get+0xca/0x400 [<0000000059bdb60a>] mlxsw_sp_acl_tcam_vregion_rehash_work+0x868/0x1160 [<00000000e81fd734>] process_one_work+0x59c/0xf20 [<00000000ceee9e81>] worker_thread+0x799/0x12c0 [<00000000bda6fe39>] kthread+0x246/0x300 [<0000000070056d23>] ret_from_fork+0x34/0x70 [<00000000dea2b93e>] ret_from_fork_asm+0x1a/0x30 Fixes: c9c9af91f1d9 ("mlxsw: spectrum_acl: Allow to interrupt/continue rehash work") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/0cc12ebb07c4d4c41a1265ee2c28b392ff997a86.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index d0c7cb059616..685bcf8cbfa9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -898,10 +898,14 @@ mlxsw_sp_acl_tcam_vregion_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam = vregion->tcam; if (vgroup->vregion_rehash_enabled && ops->region_rehash_hints_get) { + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx = &vregion->rehash.ctx; + mutex_lock(&tcam->lock); list_del(&vregion->tlist); mutex_unlock(&tcam->lock); - cancel_delayed_work_sync(&vregion->rehash.dw); + if (cancel_delayed_work_sync(&vregion->rehash.dw) && + ctx->hints_priv) + ops->region_rehash_hints_put(ctx->hints_priv); } mlxsw_sp_acl_tcam_vgroup_vregion_detach(mlxsw_sp, vregion); if (vregion->region2) From e32535744043fe9442bc3ce739492440414f3e76 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 23 Apr 2024 17:21:48 -0700 Subject: [PATCH 484/553] eth: bnxt: fix counting packets discarded due to OOM and netpoll [ Upstream commit 730117730709992c9f6535dd7b47638ee561ec45 ] I added OOM and netpoll discard counters, naively assuming that the cpr pointer is pointing to a common completion ring. Turns out that is usually *a* completion ring but not *the* completion ring which bnapi->cp_ring points to. bnapi->cp_ring is where the stats are read from, so we end up reporting 0 thru ethtool -S and qstat even though the drop events have happened. Make 100% sure we're recording statistics in the correct structure. Fixes: 907fd4a294db ("bnxt: count discards due to memory allocation errors") Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20240424002148.3937059-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 44 ++++++++++------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 70021b5eb54a..77ea19bcdc6f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1697,7 +1697,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping); if (!skb) { bnxt_abort_tpa(cpr, idx, agg_bufs); - cpr->sw_stats.rx.rx_oom_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; return NULL; } } else { @@ -1707,7 +1707,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, new_data = __bnxt_alloc_rx_frag(bp, &new_mapping, GFP_ATOMIC); if (!new_data) { bnxt_abort_tpa(cpr, idx, agg_bufs); - cpr->sw_stats.rx.rx_oom_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; return NULL; } @@ -1723,7 +1723,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, if (!skb) { skb_free_frag(data); bnxt_abort_tpa(cpr, idx, agg_bufs); - cpr->sw_stats.rx.rx_oom_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; return NULL; } skb_reserve(skb, bp->rx_offset); @@ -1734,7 +1734,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, idx, agg_bufs, true); if (!skb) { /* Page reuse already handled by bnxt_rx_pages(). */ - cpr->sw_stats.rx.rx_oom_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; return NULL; } } @@ -1950,11 +1950,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, u32 frag_len = bnxt_rx_agg_pages_xdp(bp, cpr, &xdp, cp_cons, agg_bufs, false); - if (!frag_len) { - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; - } + if (!frag_len) + goto oom_next_rx; } xdp_active = true; } @@ -1977,9 +1974,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, else bnxt_xdp_buff_frags_free(rxr, &xdp); } - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; + goto oom_next_rx; } } else { u32 payload; @@ -1990,29 +1985,21 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, payload = 0; skb = bp->rx_skb_func(bp, rxr, cons, data, data_ptr, dma_addr, payload | len); - if (!skb) { - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; - } + if (!skb) + goto oom_next_rx; } if (agg_bufs) { if (!xdp_active) { skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, cp_cons, agg_bufs, false); - if (!skb) { - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; - } + if (!skb) + goto oom_next_rx; } else { skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, rxr->page_pool, &xdp, rxcmp1); if (!skb) { /* we should be able to free the old skb here */ bnxt_xdp_buff_frags_free(rxr, &xdp); - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; + goto oom_next_rx; } } } @@ -2090,6 +2077,11 @@ next_rx_no_prod_no_len: *raw_cons = tmp_raw_cons; return rc; + +oom_next_rx: + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; + rc = -ENOMEM; + goto next_rx; } /* In netpoll mode, if we are using a combined completion ring, we need to @@ -2135,7 +2127,7 @@ static int bnxt_force_rx_discard(struct bnxt *bp, } rc = bnxt_rx_pkt(bp, cpr, raw_cons, event); if (rc && rc != -EBUSY) - cpr->sw_stats.rx.rx_netpoll_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_netpoll_discards += 1; return rc; } From 13ba94f6cc820fdea15efeaa17d4c722874eebf9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 24 Apr 2024 20:45:01 +0200 Subject: [PATCH 485/553] netfilter: nf_tables: honor table dormant flag from netdev release event path [ Upstream commit 8e30abc9ace4f0add4cd761dfdbfaebae5632dd2 ] Check for table dormant flag otherwise netdev release event path tries to unregister an already unregistered hook. [524854.857999] ------------[ cut here ]------------ [524854.858010] WARNING: CPU: 0 PID: 3386599 at net/netfilter/core.c:501 __nf_unregister_net_hook+0x21a/0x260 [...] [524854.858848] CPU: 0 PID: 3386599 Comm: kworker/u32:2 Not tainted 6.9.0-rc3+ #365 [524854.858869] Workqueue: netns cleanup_net [524854.858886] RIP: 0010:__nf_unregister_net_hook+0x21a/0x260 [524854.858903] Code: 24 e8 aa 73 83 ff 48 63 43 1c 83 f8 01 0f 85 3d ff ff ff e8 98 d1 f0 ff 48 8b 3c 24 e8 8f 73 83 ff 48 63 43 1c e9 26 ff ff ff <0f> 0b 48 83 c4 18 48 c7 c7 00 68 e9 82 5b 5d 41 5c 41 5d 41 5e 41 [524854.858914] RSP: 0018:ffff8881e36d79e0 EFLAGS: 00010246 [524854.858926] RAX: 0000000000000000 RBX: ffff8881339ae790 RCX: ffffffff81ba524a [524854.858936] RDX: dffffc0000000000 RSI: 0000000000000008 RDI: ffff8881c8a16438 [524854.858945] RBP: ffff8881c8a16438 R08: 0000000000000001 R09: ffffed103c6daf34 [524854.858954] R10: ffff8881e36d79a7 R11: 0000000000000000 R12: 0000000000000005 [524854.858962] R13: ffff8881c8a16000 R14: 0000000000000000 R15: ffff8881351b5a00 [524854.858971] FS: 0000000000000000(0000) GS:ffff888390800000(0000) knlGS:0000000000000000 [524854.858982] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [524854.858991] CR2: 00007fc9be0f16f4 CR3: 00000001437cc004 CR4: 00000000001706f0 [524854.859000] Call Trace: [524854.859006] [524854.859013] ? __warn+0x9f/0x1a0 [524854.859027] ? __nf_unregister_net_hook+0x21a/0x260 [524854.859044] ? report_bug+0x1b1/0x1e0 [524854.859060] ? handle_bug+0x3c/0x70 [524854.859071] ? exc_invalid_op+0x17/0x40 [524854.859083] ? asm_exc_invalid_op+0x1a/0x20 [524854.859100] ? __nf_unregister_net_hook+0x6a/0x260 [524854.859116] ? __nf_unregister_net_hook+0x21a/0x260 [524854.859135] nf_tables_netdev_event+0x337/0x390 [nf_tables] [524854.859304] ? __pfx_nf_tables_netdev_event+0x10/0x10 [nf_tables] [524854.859461] ? packet_notifier+0xb3/0x360 [524854.859476] ? _raw_spin_unlock_irqrestore+0x11/0x40 [524854.859489] ? dcbnl_netdevice_event+0x35/0x140 [524854.859507] ? __pfx_nf_tables_netdev_event+0x10/0x10 [nf_tables] [524854.859661] notifier_call_chain+0x7d/0x140 [524854.859677] unregister_netdevice_many_notify+0x5e1/0xae0 Fixes: d54725cd11a5 ("netfilter: nf_tables: support for multiple devices per netdev hook") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_chain_filter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 274b6f7e6bb5..d170758a1eb5 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -338,7 +338,9 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev, return; if (n > 1) { - nf_unregister_net_hook(ctx->net, &found->ops); + if (!(ctx->chain->table->flags & NFT_TABLE_F_DORMANT)) + nf_unregister_net_hook(ctx->net, &found->ops); + list_del_rcu(&found->list); kfree_rcu(found, rcu); return; From 152ed360cf2d273f88fc99a518b7eb868aae2939 Mon Sep 17 00:00:00 2001 From: Sindhu Devale Date: Tue, 23 Apr 2024 11:27:17 -0700 Subject: [PATCH 486/553] i40e: Do not use WQ_MEM_RECLAIM flag for workqueue [ Upstream commit 2cc7d150550cc981aceedf008f5459193282425c ] Issue reported by customer during SRIOV testing, call trace: When both i40e and the i40iw driver are loaded, a warning in check_flush_dependency is being triggered. This seems to be because of the i40e driver workqueue is allocated with the WQ_MEM_RECLAIM flag, and the i40iw one is not. Similar error was encountered on ice too and it was fixed by removing the flag. Do the same for i40e too. [Feb 9 09:08] ------------[ cut here ]------------ [ +0.000004] workqueue: WQ_MEM_RECLAIM i40e:i40e_service_task [i40e] is flushing !WQ_MEM_RECLAIM infiniband:0x0 [ +0.000060] WARNING: CPU: 0 PID: 937 at kernel/workqueue.c:2966 check_flush_dependency+0x10b/0x120 [ +0.000007] Modules linked in: snd_seq_dummy snd_hrtimer snd_seq snd_timer snd_seq_device snd soundcore nls_utf8 cifs cifs_arc4 nls_ucs2_utils rdma_cm iw_cm ib_cm cifs_md4 dns_resolver netfs qrtr rfkill sunrpc vfat fat intel_rapl_msr intel_rapl_common irdma intel_uncore_frequency intel_uncore_frequency_common ice ipmi_ssif isst_if_common skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp gnss coretemp ib_uverbs rapl intel_cstate ib_core iTCO_wdt iTCO_vendor_support acpi_ipmi mei_me ipmi_si intel_uncore ioatdma i2c_i801 joydev pcspkr mei ipmi_devintf lpc_ich intel_pch_thermal i2c_smbus ipmi_msghandler acpi_power_meter acpi_pad xfs libcrc32c ast sd_mod drm_shmem_helper t10_pi drm_kms_helper sg ixgbe drm i40e ahci crct10dif_pclmul libahci crc32_pclmul igb crc32c_intel libata ghash_clmulni_intel i2c_algo_bit mdio dca wmi dm_mirror dm_region_hash dm_log dm_mod fuse [ +0.000050] CPU: 0 PID: 937 Comm: kworker/0:3 Kdump: loaded Not tainted 6.8.0-rc2-Feb-net_dev-Qiueue-00279-gbd43c5687e05 #1 [ +0.000003] Hardware name: Intel Corporation S2600BPB/S2600BPB, BIOS SE5C620.86B.02.01.0013.121520200651 12/15/2020 [ +0.000001] Workqueue: i40e i40e_service_task [i40e] [ +0.000024] RIP: 0010:check_flush_dependency+0x10b/0x120 [ +0.000003] Code: ff 49 8b 54 24 18 48 8d 8b b0 00 00 00 49 89 e8 48 81 c6 b0 00 00 00 48 c7 c7 b0 97 fa 9f c6 05 8a cc 1f 02 01 e8 35 b3 fd ff <0f> 0b e9 10 ff ff ff 80 3d 78 cc 1f 02 00 75 94 e9 46 ff ff ff 90 [ +0.000002] RSP: 0018:ffffbd294976bcf8 EFLAGS: 00010282 [ +0.000002] RAX: 0000000000000000 RBX: ffff94d4c483c000 RCX: 0000000000000027 [ +0.000001] RDX: ffff94d47f620bc8 RSI: 0000000000000001 RDI: ffff94d47f620bc0 [ +0.000001] RBP: 0000000000000000 R08: 0000000000000000 R09: 00000000ffff7fff [ +0.000001] R10: ffffbd294976bb98 R11: ffffffffa0be65e8 R12: ffff94c5451ea180 [ +0.000001] R13: ffff94c5ab5e8000 R14: ffff94c5c20b6e05 R15: ffff94c5f1330ab0 [ +0.000001] FS: 0000000000000000(0000) GS:ffff94d47f600000(0000) knlGS:0000000000000000 [ +0.000002] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.000001] CR2: 00007f9e6f1fca70 CR3: 0000000038e20004 CR4: 00000000007706f0 [ +0.000000] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ +0.000001] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ +0.000001] PKRU: 55555554 [ +0.000001] Call Trace: [ +0.000001] [ +0.000002] ? __warn+0x80/0x130 [ +0.000003] ? check_flush_dependency+0x10b/0x120 [ +0.000002] ? report_bug+0x195/0x1a0 [ +0.000005] ? handle_bug+0x3c/0x70 [ +0.000003] ? exc_invalid_op+0x14/0x70 [ +0.000002] ? asm_exc_invalid_op+0x16/0x20 [ +0.000006] ? check_flush_dependency+0x10b/0x120 [ +0.000002] ? check_flush_dependency+0x10b/0x120 [ +0.000002] __flush_workqueue+0x126/0x3f0 [ +0.000015] ib_cache_cleanup_one+0x1c/0xe0 [ib_core] [ +0.000056] __ib_unregister_device+0x6a/0xb0 [ib_core] [ +0.000023] ib_unregister_device_and_put+0x34/0x50 [ib_core] [ +0.000020] i40iw_close+0x4b/0x90 [irdma] [ +0.000022] i40e_notify_client_of_netdev_close+0x54/0xc0 [i40e] [ +0.000035] i40e_service_task+0x126/0x190 [i40e] [ +0.000024] process_one_work+0x174/0x340 [ +0.000003] worker_thread+0x27e/0x390 [ +0.000001] ? __pfx_worker_thread+0x10/0x10 [ +0.000002] kthread+0xdf/0x110 [ +0.000002] ? __pfx_kthread+0x10/0x10 [ +0.000002] ret_from_fork+0x2d/0x50 [ +0.000003] ? __pfx_kthread+0x10/0x10 [ +0.000001] ret_from_fork_asm+0x1b/0x30 [ +0.000004] [ +0.000001] ---[ end trace 0000000000000000 ]--- Fixes: 4d5957cbdecd ("i40e: remove WQ_UNBOUND and the task limit of our workqueue") Signed-off-by: Sindhu Devale Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Mateusz Polchlopek Signed-off-by: Aleksandr Loktionov Tested-by: Robert Ganzynkowicz Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240423182723.740401-2-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index a9db1ed74d3f..d08d41545dae 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16716,7 +16716,7 @@ static int __init i40e_init_module(void) * since we need to be able to guarantee forward progress even under * memory pressure. */ - i40e_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, i40e_driver_name); + i40e_wq = alloc_workqueue("%s", 0, 0, i40e_driver_name); if (!i40e_wq) { pr_err("%s: Failed to create workqueue\n", i40e_driver_name); return -ENOMEM; From 81ad28ac21b7d489a44a399351ef139330397c78 Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Tue, 23 Apr 2024 11:27:18 -0700 Subject: [PATCH 487/553] i40e: Report MFS in decimal base instead of hex [ Upstream commit ef3c313119ea448c22da10366faa26b5b4b1a18e ] If the MFS is set below the default (0x2600), a warning message is reported like the following : MFS for port 1 has been set below the default: 600 This message is a bit confusing as the number shown here (600) is in fact an hexa number: 0x600 = 1536 Without any explicit "0x" prefix, this message is read like the MFS is set to 600 bytes. MFS, as per MTUs, are usually expressed in decimal base. This commit reports both current and default MFS values in decimal so it's less confusing for end-users. A typical warning message looks like the following : MFS for port 1 (1536) has been set below the default (9728) Signed-off-by: Erwan Velu Reviewed-by: Simon Horman Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen Fixes: 3a2c6ced90e1 ("i40e: Add a check to see if MFS is set") Link: https://lore.kernel.org/r/20240423182723.740401-3-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index d08d41545dae..9efd4b962dce 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16173,8 +16173,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) val = (rd32(&pf->hw, I40E_PRTGL_SAH) & I40E_PRTGL_SAH_MFS_MASK) >> I40E_PRTGL_SAH_MFS_SHIFT; if (val < MAX_FRAME_SIZE_DEFAULT) - dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n", - pf->hw.port, val); + dev_warn(&pdev->dev, "MFS for port %x (%d) has been set below the default (%d)\n", + pf->hw.port, val, MAX_FRAME_SIZE_DEFAULT); /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out From 777d7d0049ff055eb04b65953caf2095c8806d7b Mon Sep 17 00:00:00 2001 From: Sudheer Mogilappagari Date: Tue, 23 Apr 2024 11:27:19 -0700 Subject: [PATCH 488/553] iavf: Fix TC config comparison with existing adapter TC config [ Upstream commit 54976cf58d6168b8d15cebb395069f23b2f34b31 ] Same number of TCs doesn't imply that underlying TC configs are same. The config could be different due to difference in number of queues in each TC. Add utility function to determine if TC configs are same. Fixes: d5b33d024496 ("i40evf: add ndo_setup_tc callback to i40evf") Signed-off-by: Sudheer Mogilappagari Tested-by: Mineri Bhange (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240423182723.740401-4-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf_main.c | 30 ++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index b9c4b311cd62..53b9fe35d803 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3631,6 +3631,34 @@ static void iavf_del_all_cloud_filters(struct iavf_adapter *adapter) spin_unlock_bh(&adapter->cloud_filter_list_lock); } +/** + * iavf_is_tc_config_same - Compare the mqprio TC config with the + * TC config already configured on this adapter. + * @adapter: board private structure + * @mqprio_qopt: TC config received from kernel. + * + * This function compares the TC config received from the kernel + * with the config already configured on the adapter. + * + * Return: True if configuration is same, false otherwise. + **/ +static bool iavf_is_tc_config_same(struct iavf_adapter *adapter, + struct tc_mqprio_qopt *mqprio_qopt) +{ + struct virtchnl_channel_info *ch = &adapter->ch_config.ch_info[0]; + int i; + + if (adapter->num_tc != mqprio_qopt->num_tc) + return false; + + for (i = 0; i < adapter->num_tc; i++) { + if (ch[i].count != mqprio_qopt->count[i] || + ch[i].offset != mqprio_qopt->offset[i]) + return false; + } + return true; +} + /** * __iavf_setup_tc - configure multiple traffic classes * @netdev: network interface device structure @@ -3688,7 +3716,7 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) if (ret) return ret; /* Return if same TC config is requested */ - if (adapter->num_tc == num_tc) + if (iavf_is_tc_config_same(adapter, &mqprio_qopt->qopt)) return 0; adapter->num_tc = num_tc; From f05caed83394c8354bbceea9c90c15f648eec3cd Mon Sep 17 00:00:00 2001 From: Jason Reeder Date: Wed, 24 Apr 2024 12:46:26 +0530 Subject: [PATCH 489/553] net: ethernet: ti: am65-cpts: Fix PTPv1 message type on TX packets [ Upstream commit 1b9e743e923b256e353a9a644195372285e5a6c0 ] The CPTS, by design, captures the messageType (Sync, Delay_Req, etc.) field from the second nibble of the PTP header which is defined in the PTPv2 (1588-2008) specification. In the PTPv1 (1588-2002) specification the first two bytes of the PTP header are defined as the versionType which is always 0x0001. This means that any PTPv1 packets that are tagged for TX timestamping by the CPTS will have their messageType set to 0x0 which corresponds to a Sync message type. This causes issues when a PTPv1 stack is expecting a Delay_Req (messageType: 0x1) timestamp that never appears. Fix this by checking if the ptp_class of the timestamped TX packet is PTP_CLASS_V1 and then matching the PTP sequence ID to the stored sequence ID in the skb->cb data structure. If the sequence IDs match and the packet is of type PTPv1 then there is a chance that the messageType has been incorrectly stored by the CPTS so overwrite the messageType stored by the CPTS with the messageType from the skb->cb data structure. This allows the PTPv1 stack to receive TX timestamps for Delay_Req packets which are necessary to lock onto a PTP Leader. Signed-off-by: Jason Reeder Signed-off-by: Ravi Gunasekaran Tested-by: Ed Trexel Fixes: f6bd59526ca5 ("net: ethernet: ti: introduce am654 common platform time sync driver") Link: https://lore.kernel.org/r/20240424071626.32558-1-r-gunasekaran@ti.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/ti/am65-cpts.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c index 9948ac14e68d..c1bdf045e981 100644 --- a/drivers/net/ethernet/ti/am65-cpts.c +++ b/drivers/net/ethernet/ti/am65-cpts.c @@ -649,6 +649,11 @@ static bool am65_cpts_match_tx_ts(struct am65_cpts *cpts, struct am65_cpts_skb_cb_data *skb_cb = (struct am65_cpts_skb_cb_data *)skb->cb; + if ((ptp_classify_raw(skb) & PTP_CLASS_V1) && + ((mtype_seqid & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK) == + (skb_cb->skb_mtype_seqid & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK))) + mtype_seqid = skb_cb->skb_mtype_seqid; + if (mtype_seqid == skb_cb->skb_mtype_seqid) { u64 ns = event->timestamp; From 2ceacda2709eab9c11ec3247031b2572af4c44ef Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 24 Apr 2024 10:04:43 -0700 Subject: [PATCH 490/553] af_unix: Suppress false-positive lockdep splat for spin_lock() in __unix_gc(). [ Upstream commit 1971d13ffa84a551d29a81fdf5b5ec5be166ac83 ] syzbot reported a lockdep splat regarding unix_gc_lock and unix_state_lock(). One is called from recvmsg() for a connected socket, and another is called from GC for TCP_LISTEN socket. So, the splat is false-positive. Let's add a dedicated lock class for the latter to suppress the splat. Note that this change is not necessary for net-next.git as the issue is only applied to the old GC impl. [0]: WARNING: possible circular locking dependency detected 6.9.0-rc5-syzkaller-00007-g4d2008430ce8 #0 Not tainted ----------------------------------------------------- kworker/u8:1/11 is trying to acquire lock: ffff88807cea4e70 (&u->lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff88807cea4e70 (&u->lock){+.+.}-{2:2}, at: __unix_gc+0x40e/0xf70 net/unix/garbage.c:302 but task is already holding lock: ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: __unix_gc+0x117/0xf70 net/unix/garbage.c:261 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (unix_gc_lock){+.+.}-{2:2}: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] unix_notinflight+0x13d/0x390 net/unix/garbage.c:140 unix_detach_fds net/unix/af_unix.c:1819 [inline] unix_destruct_scm+0x221/0x350 net/unix/af_unix.c:1876 skb_release_head_state+0x100/0x250 net/core/skbuff.c:1188 skb_release_all net/core/skbuff.c:1200 [inline] __kfree_skb net/core/skbuff.c:1216 [inline] kfree_skb_reason+0x16d/0x3b0 net/core/skbuff.c:1252 kfree_skb include/linux/skbuff.h:1262 [inline] manage_oob net/unix/af_unix.c:2672 [inline] unix_stream_read_generic+0x1125/0x2700 net/unix/af_unix.c:2749 unix_stream_splice_read+0x239/0x320 net/unix/af_unix.c:2981 do_splice_read fs/splice.c:985 [inline] splice_file_to_pipe+0x299/0x500 fs/splice.c:1295 do_splice+0xf2d/0x1880 fs/splice.c:1379 __do_splice fs/splice.c:1436 [inline] __do_sys_splice fs/splice.c:1652 [inline] __se_sys_splice+0x331/0x4a0 fs/splice.c:1634 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #0 (&u->lock){+.+.}-{2:2}: check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18cb/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1346/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] __unix_gc+0x40e/0xf70 net/unix/garbage.c:302 process_one_work kernel/workqueue.c:3254 [inline] process_scheduled_works+0xa10/0x17c0 kernel/workqueue.c:3335 worker_thread+0x86d/0xd70 kernel/workqueue.c:3416 kthread+0x2f0/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(unix_gc_lock); lock(&u->lock); lock(unix_gc_lock); lock(&u->lock); *** DEADLOCK *** 3 locks held by kworker/u8:1/11: #0: ffff888015089148 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3229 [inline] #0: ffff888015089148 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_scheduled_works+0x8e0/0x17c0 kernel/workqueue.c:3335 #1: ffffc90000107d00 (unix_gc_work){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3230 [inline] #1: ffffc90000107d00 (unix_gc_work){+.+.}-{0:0}, at: process_scheduled_works+0x91b/0x17c0 kernel/workqueue.c:3335 #2: ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] #2: ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: __unix_gc+0x117/0xf70 net/unix/garbage.c:261 stack backtrace: CPU: 0 PID: 11 Comm: kworker/u8:1 Not tainted 6.9.0-rc5-syzkaller-00007-g4d2008430ce8 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 Workqueue: events_unbound __unix_gc Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 check_noncircular+0x36a/0x4a0 kernel/locking/lockdep.c:2187 check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18cb/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1346/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] __unix_gc+0x40e/0xf70 net/unix/garbage.c:302 process_one_work kernel/workqueue.c:3254 [inline] process_scheduled_works+0xa10/0x17c0 kernel/workqueue.c:3335 worker_thread+0x86d/0xd70 kernel/workqueue.c:3416 kthread+0x2f0/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Fixes: 47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()") Reported-and-tested-by: syzbot+fa379358c28cc87cc307@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fa379358c28cc87cc307 Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240424170443.9832-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/af_unix.h | 3 +++ net/unix/garbage.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 16d6936baa2f..e7d71a516bd4 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -79,6 +79,9 @@ enum unix_socket_lock_class { U_LOCK_NORMAL, U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ + U_LOCK_GC_LISTENER, /* used for listening socket while determining gc + * candidates to close a small race window. + */ }; static inline void unix_state_lock_nested(struct sock *sk, diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 85c6f05c0fa3..d2fc795394a5 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -260,7 +260,7 @@ void unix_gc(void) __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); if (sk->sk_state == TCP_LISTEN) { - unix_state_lock(sk); + unix_state_lock_nested(sk, U_LOCK_GC_LISTENER); unix_state_unlock(sk); } } From b3686200adba26dd1f8beee3d9c1b34563db1e65 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Feb 2023 16:09:45 -0800 Subject: [PATCH 491/553] cifs: Replace remaining 1-element arrays commit 35235e19b393b54db0e0d7c424d658ba45f20468 upstream. The kernel is globally removing the ambiguous 0-length and 1-element arrays in favor of flexible arrays, so that we can gain both compile-time and run-time array bounds checking[1]. Replace the trailing 1-element array with a flexible array in the following structures: struct cifs_spnego_msg struct cifs_quota_data struct get_dfs_referral_rsp struct file_alt_name_info NEGOTIATE_RSP SESSION_SETUP_ANDX TCONX_REQ TCONX_RSP TCONX_RSP_EXT ECHO_REQ ECHO_RSP OPEN_REQ OPENX_REQ LOCK_REQ RENAME_REQ COPY_REQ COPY_RSP NT_RENAME_REQ DELETE_FILE_REQ DELETE_DIRECTORY_REQ CREATE_DIRECTORY_REQ QUERY_INFORMATION_REQ SETATTR_REQ TRANSACT_IOCTL_REQ TRANSACT_CHANGE_NOTIFY_REQ TRANSACTION2_QPI_REQ TRANSACTION2_SPI_REQ TRANSACTION2_FFIRST_REQ TRANSACTION2_GET_DFS_REFER_REQ FILE_UNIX_LINK_INFO FILE_DIRECTORY_INFO FILE_FULL_DIRECTORY_INFO SEARCH_ID_FULL_DIR_INFO FILE_BOTH_DIRECTORY_INFO FIND_FILE_STANDARD_INFO Replace the trailing 1-element array with a flexible array, but leave the existing structure padding: FILE_ALL_INFO FILE_UNIX_INFO Remove unused structures: struct gea struct gealist Adjust all related size calculations to match the changes to sizeof(). No machine code output differences are produced after these changes. [1] For lots of details, see both: https://docs.kernel.org/process/deprecated.html#zero-length-and-one-element-arrays https://people.kernel.org/kees/bounded-flexible-arrays-in-c Cc: Steve French Cc: Paulo Alcantara Cc: Ronnie Sahlberg Cc: Shyam Prasad N Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Kees Cook Signed-off-by: Steve French [ Salvatore Bonaccorso: Patch does not apply cleanly only due to a whitespace difference in fs/smb/client/cifspdu.h . Fixed up manually. ] Signed-off-by: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifs_spnego.h | 2 +- fs/smb/client/cifspdu.h | 96 ++++++++++++++++++------------------- fs/smb/client/readdir.c | 6 +-- fs/smb/client/smb2pdu.c | 4 +- fs/smb/client/smb2pdu.h | 2 +- 5 files changed, 53 insertions(+), 57 deletions(-) diff --git a/fs/smb/client/cifs_spnego.h b/fs/smb/client/cifs_spnego.h index 7f102ffeb675..e4d751b0c812 100644 --- a/fs/smb/client/cifs_spnego.h +++ b/fs/smb/client/cifs_spnego.h @@ -24,7 +24,7 @@ struct cifs_spnego_msg { uint32_t flags; uint32_t sesskey_len; uint32_t secblob_len; - uint8_t data[1]; + uint8_t data[]; }; #ifdef __KERNEL__ diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index 97bb1838555b..94f86374f4b7 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -562,7 +562,7 @@ typedef union smb_com_session_setup_andx { __u32 Reserved; __le32 Capabilities; /* see below */ __le16 ByteCount; - unsigned char SecurityBlob[1]; /* followed by */ + unsigned char SecurityBlob[]; /* followed by */ /* STRING NativeOS */ /* STRING NativeLanMan */ } __attribute__((packed)) req; /* NTLM request format (with @@ -582,7 +582,7 @@ typedef union smb_com_session_setup_andx { __u32 Reserved; /* see below */ __le32 Capabilities; __le16 ByteCount; - unsigned char CaseInsensitivePassword[1]; /* followed by: */ + unsigned char CaseInsensitivePassword[]; /* followed by: */ /* unsigned char * CaseSensitivePassword; */ /* STRING AccountName */ /* STRING PrimaryDomain */ @@ -599,7 +599,7 @@ typedef union smb_com_session_setup_andx { __le16 Action; /* see below */ __le16 SecurityBlobLength; __u16 ByteCount; - unsigned char SecurityBlob[1]; /* followed by */ + unsigned char SecurityBlob[]; /* followed by */ /* unsigned char * NativeOS; */ /* unsigned char * NativeLanMan; */ /* unsigned char * PrimaryDomain; */ @@ -618,7 +618,7 @@ typedef union smb_com_session_setup_andx { __le16 PasswordLength; __u32 Reserved; /* encrypt key len and offset */ __le16 ByteCount; - unsigned char AccountPassword[1]; /* followed by */ + unsigned char AccountPassword[]; /* followed by */ /* STRING AccountName */ /* STRING PrimaryDomain */ /* STRING NativeOS */ @@ -632,7 +632,7 @@ typedef union smb_com_session_setup_andx { __le16 AndXOffset; __le16 Action; /* see below */ __u16 ByteCount; - unsigned char NativeOS[1]; /* followed by */ + unsigned char NativeOS[]; /* followed by */ /* unsigned char * NativeLanMan; */ /* unsigned char * PrimaryDomain; */ } __attribute__((packed)) old_resp; /* pre-NTLM (LANMAN2.1) response */ @@ -693,7 +693,7 @@ typedef struct smb_com_tconx_req { __le16 Flags; /* see below */ __le16 PasswordLength; __le16 ByteCount; - unsigned char Password[1]; /* followed by */ + unsigned char Password[]; /* followed by */ /* STRING Path *//* \\server\share name */ /* STRING Service */ } __attribute__((packed)) TCONX_REQ; @@ -705,7 +705,7 @@ typedef struct smb_com_tconx_rsp { __le16 AndXOffset; __le16 OptionalSupport; /* see below */ __u16 ByteCount; - unsigned char Service[1]; /* always ASCII, not Unicode */ + unsigned char Service[]; /* always ASCII, not Unicode */ /* STRING NativeFileSystem */ } __attribute__((packed)) TCONX_RSP; @@ -718,7 +718,7 @@ typedef struct smb_com_tconx_rsp_ext { __le32 MaximalShareAccessRights; __le32 GuestMaximalShareAccessRights; __u16 ByteCount; - unsigned char Service[1]; /* always ASCII, not Unicode */ + unsigned char Service[]; /* always ASCII, not Unicode */ /* STRING NativeFileSystem */ } __attribute__((packed)) TCONX_RSP_EXT; @@ -755,14 +755,14 @@ typedef struct smb_com_echo_req { struct smb_hdr hdr; __le16 EchoCount; __le16 ByteCount; - char Data[1]; + char Data[]; } __attribute__((packed)) ECHO_REQ; typedef struct smb_com_echo_rsp { struct smb_hdr hdr; __le16 SequenceNumber; __le16 ByteCount; - char Data[1]; + char Data[]; } __attribute__((packed)) ECHO_RSP; typedef struct smb_com_logoff_andx_req { @@ -862,7 +862,7 @@ typedef struct smb_com_open_req { /* also handles create */ __le32 ImpersonationLevel; __u8 SecurityFlags; __le16 ByteCount; - char fileName[1]; + char fileName[]; } __attribute__((packed)) OPEN_REQ; /* open response: oplock levels */ @@ -939,7 +939,7 @@ typedef struct smb_com_openx_req { __le32 Timeout; __le32 Reserved; __le16 ByteCount; /* file name follows */ - char fileName[1]; + char fileName[]; } __attribute__((packed)) OPENX_REQ; typedef struct smb_com_openx_rsp { @@ -1087,7 +1087,7 @@ typedef struct smb_com_lock_req { __le16 NumberOfUnlocks; __le16 NumberOfLocks; __le16 ByteCount; - LOCKING_ANDX_RANGE Locks[1]; + LOCKING_ANDX_RANGE Locks[]; } __attribute__((packed)) LOCK_REQ; /* lock type */ @@ -1116,7 +1116,7 @@ typedef struct smb_com_rename_req { __le16 SearchAttributes; /* target file attributes */ __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII or Unicode */ - unsigned char OldFileName[1]; + unsigned char OldFileName[]; /* followed by __u8 BufferFormat2 */ /* followed by NewFileName */ } __attribute__((packed)) RENAME_REQ; @@ -1136,7 +1136,7 @@ typedef struct smb_com_copy_req { __le16 Flags; __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII or Unicode */ - unsigned char OldFileName[1]; + unsigned char OldFileName[]; /* followed by __u8 BufferFormat2 */ /* followed by NewFileName string */ } __attribute__((packed)) COPY_REQ; @@ -1146,7 +1146,7 @@ typedef struct smb_com_copy_rsp { __le16 CopyCount; /* number of files copied */ __u16 ByteCount; /* may be zero */ __u8 BufferFormat; /* 0x04 - only present if errored file follows */ - unsigned char ErrorFileName[1]; /* only present if error in copy */ + unsigned char ErrorFileName[]; /* only present if error in copy */ } __attribute__((packed)) COPY_RSP; #define CREATE_HARD_LINK 0x103 @@ -1160,7 +1160,7 @@ typedef struct smb_com_nt_rename_req { /* A5 - also used for create hardlink */ __le32 ClusterCount; __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII or Unicode */ - unsigned char OldFileName[1]; + unsigned char OldFileName[]; /* followed by __u8 BufferFormat2 */ /* followed by NewFileName */ } __attribute__((packed)) NT_RENAME_REQ; @@ -1175,7 +1175,7 @@ typedef struct smb_com_delete_file_req { __le16 SearchAttributes; __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII */ - unsigned char fileName[1]; + unsigned char fileName[]; } __attribute__((packed)) DELETE_FILE_REQ; typedef struct smb_com_delete_file_rsp { @@ -1187,7 +1187,7 @@ typedef struct smb_com_delete_directory_req { struct smb_hdr hdr; /* wct = 0 */ __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII */ - unsigned char DirName[1]; + unsigned char DirName[]; } __attribute__((packed)) DELETE_DIRECTORY_REQ; typedef struct smb_com_delete_directory_rsp { @@ -1199,7 +1199,7 @@ typedef struct smb_com_create_directory_req { struct smb_hdr hdr; /* wct = 0 */ __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII */ - unsigned char DirName[1]; + unsigned char DirName[]; } __attribute__((packed)) CREATE_DIRECTORY_REQ; typedef struct smb_com_create_directory_rsp { @@ -1211,7 +1211,7 @@ typedef struct smb_com_query_information_req { struct smb_hdr hdr; /* wct = 0 */ __le16 ByteCount; /* 1 + namelen + 1 */ __u8 BufferFormat; /* 4 = ASCII */ - unsigned char FileName[1]; + unsigned char FileName[]; } __attribute__((packed)) QUERY_INFORMATION_REQ; typedef struct smb_com_query_information_rsp { @@ -1231,7 +1231,7 @@ typedef struct smb_com_setattr_req { __le16 reserved[5]; /* must be zero */ __u16 ByteCount; __u8 BufferFormat; /* 4 = ASCII */ - unsigned char fileName[1]; + unsigned char fileName[]; } __attribute__((packed)) SETATTR_REQ; typedef struct smb_com_setattr_rsp { @@ -1313,7 +1313,7 @@ typedef struct smb_com_transaction_ioctl_req { __u8 IsRootFlag; /* 1 = apply command to root of share (must be DFS) */ __le16 ByteCount; __u8 Pad[3]; - __u8 Data[1]; + __u8 Data[]; } __attribute__((packed)) TRANSACT_IOCTL_REQ; typedef struct smb_com_transaction_compr_ioctl_req { @@ -1431,8 +1431,8 @@ typedef struct smb_com_transaction_change_notify_req { __u8 WatchTree; /* 1 = Monitor subdirectories */ __u8 Reserved2; __le16 ByteCount; -/* __u8 Pad[3];*/ -/* __u8 Data[1];*/ +/* __u8 Pad[3];*/ +/* __u8 Data[];*/ } __attribute__((packed)) TRANSACT_CHANGE_NOTIFY_REQ; /* BB eventually change to use generic ntransact rsp struct @@ -1521,7 +1521,7 @@ struct cifs_quota_data { __u64 space_used; __u64 soft_limit; __u64 hard_limit; - char sid[1]; /* variable size? */ + char sid[]; /* variable size? */ } __attribute__((packed)); /* quota sub commands */ @@ -1673,7 +1673,7 @@ typedef struct smb_com_transaction2_qpi_req { __u8 Pad; __le16 InformationLevel; __u32 Reserved4; - char FileName[1]; + char FileName[]; } __attribute__((packed)) TRANSACTION2_QPI_REQ; typedef struct smb_com_transaction2_qpi_rsp { @@ -1706,7 +1706,7 @@ typedef struct smb_com_transaction2_spi_req { __u16 Pad1; __le16 InformationLevel; __u32 Reserved4; - char FileName[1]; + char FileName[]; } __attribute__((packed)) TRANSACTION2_SPI_REQ; typedef struct smb_com_transaction2_spi_rsp { @@ -1813,7 +1813,7 @@ typedef struct smb_com_transaction2_ffirst_req { __le16 SearchFlags; __le16 InformationLevel; __le32 SearchStorageType; - char FileName[1]; + char FileName[]; } __attribute__((packed)) TRANSACTION2_FFIRST_REQ; typedef struct smb_com_transaction2_ffirst_rsp { @@ -2024,7 +2024,7 @@ typedef struct smb_com_transaction2_get_dfs_refer_req { perhaps?) followed by one byte pad - doesn't seem to matter though */ __le16 MaxReferralLevel; - char RequestFileName[1]; + char RequestFileName[]; } __attribute__((packed)) TRANSACTION2_GET_DFS_REFER_REQ; #define DFS_VERSION cpu_to_le16(0x0003) @@ -2053,7 +2053,7 @@ struct get_dfs_referral_rsp { __le16 PathConsumed; __le16 NumberOfReferrals; __le32 DFSFlags; - REFERRAL3 referrals[1]; /* array of level 3 dfs_referral structures */ + REFERRAL3 referrals[]; /* array of level 3 dfs_referral structures */ /* followed by the strings pointed to by the referral structures */ } __packed; @@ -2292,7 +2292,10 @@ typedef struct { /* data block encoding of response to level 263 QPathInfo */ __le32 Mode; __le32 AlignmentRequirement; __le32 FileNameLength; - char FileName[1]; + union { + char __pad; + DECLARE_FLEX_ARRAY(char, FileName); + }; } __attribute__((packed)) FILE_ALL_INFO; /* level 0x107 QPathInfo */ typedef struct { @@ -2330,7 +2333,7 @@ typedef struct { } __attribute__((packed)) FILE_UNIX_BASIC_INFO; /* level 0x200 QPathInfo */ typedef struct { - char LinkDest[1]; + DECLARE_FLEX_ARRAY(char, LinkDest); } __attribute__((packed)) FILE_UNIX_LINK_INFO; /* level 0x201 QPathInfo */ /* The following three structures are needed only for @@ -2380,7 +2383,7 @@ struct file_end_of_file_info { } __attribute__((packed)); /* size info, level 0x104 for set, 0x106 for query */ struct file_alt_name_info { - __u8 alt_name[1]; + DECLARE_FLEX_ARRAY(__u8, alt_name); } __attribute__((packed)); /* level 0x0108 */ struct file_stream_info { @@ -2490,7 +2493,10 @@ typedef struct { __le32 NextEntryOffset; __u32 ResumeKey; /* as with FileIndex - no need to convert */ FILE_UNIX_BASIC_INFO basic; - char FileName[1]; + union { + char __pad; + DECLARE_FLEX_ARRAY(char, FileName); + }; } __attribute__((packed)) FILE_UNIX_INFO; /* level 0x202 */ typedef struct { @@ -2504,7 +2510,7 @@ typedef struct { __le64 AllocationSize; __le32 ExtFileAttributes; __le32 FileNameLength; - char FileName[1]; + char FileName[]; } __attribute__((packed)) FILE_DIRECTORY_INFO; /* level 0x101 FF resp data */ typedef struct { @@ -2519,7 +2525,7 @@ typedef struct { __le32 ExtFileAttributes; __le32 FileNameLength; __le32 EaSize; /* length of the xattrs */ - char FileName[1]; + char FileName[]; } __attribute__((packed)) FILE_FULL_DIRECTORY_INFO; /* level 0x102 rsp data */ typedef struct { @@ -2536,7 +2542,7 @@ typedef struct { __le32 EaSize; /* EA size */ __le32 Reserved; __le64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/ - char FileName[1]; + char FileName[]; } __attribute__((packed)) SEARCH_ID_FULL_DIR_INFO; /* level 0x105 FF rsp data */ typedef struct { @@ -2554,7 +2560,7 @@ typedef struct { __u8 ShortNameLength; __u8 Reserved; __u8 ShortName[24]; - char FileName[1]; + char FileName[]; } __attribute__((packed)) FILE_BOTH_DIRECTORY_INFO; /* level 0x104 FFrsp data */ typedef struct { @@ -2569,7 +2575,7 @@ typedef struct { __le32 AllocationSize; __le16 Attributes; /* verify not u32 */ __u8 FileNameLength; - char FileName[1]; + char FileName[]; } __attribute__((packed)) FIND_FILE_STANDARD_INFO; /* level 0x1 FF resp data */ @@ -2579,16 +2585,6 @@ struct win_dev { __le64 minor; } __attribute__((packed)); -struct gea { - unsigned char name_len; - char name[1]; -} __attribute__((packed)); - -struct gealist { - unsigned long list_len; - struct gea list[1]; -} __attribute__((packed)); - struct fea { unsigned char EA_flags; __u8 name_len; diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 5990bdbae598..9a1f1913fb59 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -497,7 +497,7 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level) FIND_FILE_STANDARD_INFO *pfData; pfData = (FIND_FILE_STANDARD_INFO *)pDirInfo; - new_entry = old_entry + sizeof(FIND_FILE_STANDARD_INFO) + + new_entry = old_entry + sizeof(FIND_FILE_STANDARD_INFO) + 1 + pfData->FileNameLength; } else { u32 next_offset = le32_to_cpu(pDirInfo->NextEntryOffset); @@ -515,9 +515,9 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level) new_entry, end_of_smb, old_entry); return NULL; } else if (((level == SMB_FIND_FILE_INFO_STANDARD) && - (new_entry + sizeof(FIND_FILE_STANDARD_INFO) > end_of_smb)) + (new_entry + sizeof(FIND_FILE_STANDARD_INFO) + 1 > end_of_smb)) || ((level != SMB_FIND_FILE_INFO_STANDARD) && - (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb))) { + (new_entry + sizeof(FILE_DIRECTORY_INFO) + 1 > end_of_smb))) { cifs_dbg(VFS, "search entry %p extends after end of SMB %p\n", new_entry, end_of_smb); return NULL; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index cc425a616899..e15bf116c755 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -5073,10 +5073,10 @@ smb2_parse_query_directory(struct cifs_tcon *tcon, switch (srch_inf->info_level) { case SMB_FIND_FILE_DIRECTORY_INFO: - info_buf_size = sizeof(FILE_DIRECTORY_INFO) - 1; + info_buf_size = sizeof(FILE_DIRECTORY_INFO); break; case SMB_FIND_FILE_ID_FULL_DIR_INFO: - info_buf_size = sizeof(SEARCH_ID_FULL_DIR_INFO) - 1; + info_buf_size = sizeof(SEARCH_ID_FULL_DIR_INFO); break; case SMB_FIND_FILE_POSIX_INFO: /* note that posix payload are variable size */ diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h index 8d011fedecd0..3a13b9b56452 100644 --- a/fs/smb/client/smb2pdu.h +++ b/fs/smb/client/smb2pdu.h @@ -373,7 +373,7 @@ struct smb2_file_id_extd_directory_info { __le32 EaSize; /* EA size */ __le32 ReparsePointTag; /* valid if FILE_ATTR_REPARSE_POINT set in FileAttributes */ __le64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit */ - char FileName[1]; + char FileName[]; } __packed; /* level 60 */ extern char smb2_padding[7]; From 34410fcad91b3f1f70d444e74fb9d7e3cd63d088 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Apr 2024 12:53:46 +0200 Subject: [PATCH 492/553] Revert "crypto: api - Disallow identical driver names" This reverts commit 680eb0a99336f7b21ff149bc57579d059421c5de which is commit 27016f75f5ed47e2d8e0ca75a8ff1f40bc1a5e27 upstream. It is reported to cause problems in older kernels due to some crypto drivers having the same name, so revert it here to fix the problems. Link: https://lore.kernel.org/r/aceda6e2-cefb-4146-aef8-ff4bafa56e56@roeck-us.net Reported-by: Guenter Roeck Cc: Ovidiu Panait Cc: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/algapi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index c73d1359b9d4..5dc9ccdd5a51 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -290,7 +290,6 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) } if (!strcmp(q->cra_driver_name, alg->cra_name) || - !strcmp(q->cra_driver_name, alg->cra_driver_name) || !strcmp(q->cra_name, alg->cra_driver_name)) goto err; } From 539a2b995a4ed93125cb0efae0f793b00ab2158b Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 3 Apr 2024 08:43:12 -0700 Subject: [PATCH 493/553] virtio_net: Do not send RSS key if it is not supported commit 059a49aa2e25c58f90b50151f109dd3c4cdb3a47 upstream. There is a bug when setting the RSS options in virtio_net that can break the whole machine, getting the kernel into an infinite loop. Running the following command in any QEMU virtual machine with virtionet will reproduce this problem: # ethtool -X eth0 hfunc toeplitz This is how the problem happens: 1) ethtool_set_rxfh() calls virtnet_set_rxfh() 2) virtnet_set_rxfh() calls virtnet_commit_rss_command() 3) virtnet_commit_rss_command() populates 4 entries for the rss scatter-gather 4) Since the command above does not have a key, then the last scatter-gatter entry will be zeroed, since rss_key_size == 0. sg_buf_size = vi->rss_key_size; 5) This buffer is passed to qemu, but qemu is not happy with a buffer with zero length, and do the following in virtqueue_map_desc() (QEMU function): if (!sz) { virtio_error(vdev, "virtio: zero sized buffers are not allowed"); 6) virtio_error() (also QEMU function) set the device as broken vdev->broken = true; 7) Qemu bails out, and do not repond this crazy kernel. 8) The kernel is waiting for the response to come back (function virtnet_send_command()) 9) The kernel is waiting doing the following : while (!virtqueue_get_buf(vi->cvq, &tmp) && !virtqueue_is_broken(vi->cvq)) cpu_relax(); 10) None of the following functions above is true, thus, the kernel loops here forever. Keeping in mind that virtqueue_is_broken() does not look at the qemu `vdev->broken`, so, it never realizes that the vitio is broken at QEMU side. Fix it by not sending RSS commands if the feature is not available in the device. Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Cc: stable@vger.kernel.org Cc: qemu-devel@nongnu.org Signed-off-by: Breno Leitao Reviewed-by: Heng Qi Reviewed-by: Xuan Zhuo Signed-off-by: David S. Miller Signed-off-by: Konstantin Ovsepian Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 45f1a871b7da..32cddb633793 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2948,19 +2948,35 @@ static int virtnet_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfu static int virtnet_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) { struct virtnet_info *vi = netdev_priv(dev); + bool update = false; int i; if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; if (indir) { + if (!vi->has_rss) + return -EOPNOTSUPP; + for (i = 0; i < vi->rss_indir_table_size; ++i) vi->ctrl->rss.indirection_table[i] = indir[i]; + update = true; } - if (key) - memcpy(vi->ctrl->rss.key, key, vi->rss_key_size); - virtnet_commit_rss_command(vi); + if (key) { + /* If either _F_HASH_REPORT or _F_RSS are negotiated, the + * device provides hash calculation capabilities, that is, + * hash_key is configured. + */ + if (!vi->has_rss && !vi->has_rss_hash_report) + return -EOPNOTSUPP; + + memcpy(vi->ctrl->rss.key, key, vi->rss_key_size); + update = true; + } + + if (update) + virtnet_commit_rss_command(vi); return 0; } @@ -3852,13 +3868,15 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) vi->has_rss_hash_report = true; - if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) + if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { vi->has_rss = true; - if (vi->has_rss || vi->has_rss_hash_report) { vi->rss_indir_table_size = virtio_cread16(vdev, offsetof(struct virtio_net_config, rss_max_indirection_table_length)); + } + + if (vi->has_rss || vi->has_rss_hash_report) { vi->rss_key_size = virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); From 0c42f7e039aba3de6d7dbf92da708e2b2ecba557 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 10 Apr 2024 17:14:41 +0800 Subject: [PATCH 494/553] fork: defer linking file vma until vma is fully initialized commit 35e351780fa9d8240dd6f7e4f245f9ea37e96c19 upstream. Thorvald reported a WARNING [1]. And the root cause is below race: CPU 1 CPU 2 fork hugetlbfs_fallocate dup_mmap hugetlbfs_punch_hole i_mmap_lock_write(mapping); vma_interval_tree_insert_after -- Child vma is visible through i_mmap tree. i_mmap_unlock_write(mapping); hugetlb_dup_vma_private -- Clear vma_lock outside i_mmap_rwsem! i_mmap_lock_write(mapping); hugetlb_vmdelete_list vma_interval_tree_foreach hugetlb_vma_trylock_write -- Vma_lock is cleared. tmp->vm_ops->open -- Alloc new vma_lock outside i_mmap_rwsem! hugetlb_vma_unlock_write -- Vma_lock is assigned!!! i_mmap_unlock_write(mapping); hugetlb_dup_vma_private() and hugetlb_vm_op_open() are called outside i_mmap_rwsem lock while vma lock can be used in the same time. Fix this by deferring linking file vma until vma is fully initialized. Those vmas should be initialized first before they can be used. Link: https://lkml.kernel.org/r/20240410091441.3539905-1-linmiaohe@huawei.com Fixes: 8d9bfb260814 ("hugetlb: add vma based lock for pmd sharing") Signed-off-by: Miaohe Lin Reported-by: Thorvald Natvig Closes: https://lore.kernel.org/linux-mm/20240129161735.6gmjsswx62o4pbja@revolver/T/ [1] Reviewed-by: Jane Chu Cc: Christian Brauner Cc: Heiko Carstens Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Mateusz Guzik Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Muchun Song Cc: Oleg Nesterov Cc: Peng Zhang Cc: Tycho Andersen Cc: Signed-off-by: Andrew Morton Signed-off-by: Miaohe Lin Signed-off-by: Greg Kroah-Hartman --- kernel/fork.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 85617928041c..7e9a5919299b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -662,6 +662,15 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, } else if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); + /* + * Copy/update hugetlb private vma information. + */ + if (is_vm_hugetlb_page(tmp)) + hugetlb_dup_vma_private(tmp); + + if (tmp->vm_ops && tmp->vm_ops->open) + tmp->vm_ops->open(tmp); + file = tmp->vm_file; if (file) { struct address_space *mapping = file->f_mapping; @@ -678,12 +687,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, i_mmap_unlock_write(mapping); } - /* - * Copy/update hugetlb private vma information. - */ - if (is_vm_hugetlb_page(tmp)) - hugetlb_dup_vma_private(tmp); - /* Link the vma into the MT */ mas.index = tmp->vm_start; mas.last = tmp->vm_end - 1; @@ -695,9 +698,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, if (!(tmp->vm_flags & VM_WIPEONFORK)) retval = copy_page_range(tmp, mpnt); - if (tmp->vm_ops && tmp->vm_ops->open) - tmp->vm_ops->open(tmp); - if (retval) goto loop_out; } From 6a190e7ca4e503635b9399e3c448e93c7ca1f75b Mon Sep 17 00:00:00 2001 From: David Kaplan Date: Sun, 21 Apr 2024 21:17:28 +0200 Subject: [PATCH 495/553] x86/cpu: Fix check for RDPKRU in __show_regs() commit b53c6bd5d271d023857174b8fd3e32f98ae51372 upstream. cpu_feature_enabled(X86_FEATURE_OSPKE) does not necessarily reflect whether CR4.PKE is set on the CPU. In particular, they may differ on non-BSP CPUs before setup_pku() is executed. In this scenario, RDPKRU will #UD causing the system to hang. Fix by checking CR4 for PKE enablement which is always correct for the current CPU. The scenario happens by inserting a WARN* before setup_pku() in identiy_cpu() or some other diagnostic which would lead to calling __show_regs(). [ bp: Massage commit message. ] Signed-off-by: David Kaplan Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240421191728.32239-1-bp@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/process_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 7f94dbbc397b..a0d3059bee3d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -137,7 +137,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, log_lvl, d3, d6, d7); } - if (cpu_feature_enabled(X86_FEATURE_OSPKE)) + if (cr4 & X86_CR4_PKE) printk("%sPKRU: %08x\n", log_lvl, read_pkru()); } From 9f882077f5180a280e96ddf423f2814f90c4f7bf Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Fri, 8 Mar 2024 09:36:31 +0000 Subject: [PATCH 496/553] rust: don't select CONSTRUCTORS commit 7d49f53af4b988b188d3932deac2c9c80fd7d9ce upstream. This was originally part of commit 4b9a68f2e59a0 ("rust: add support for static synchronisation primitives") from the old Rust branch, which used module constructors to initialize globals containing various synchronisation primitives with pin-init. That commit has never been upstreamed, but the `select CONSTRUCTORS` statement ended up being included in the patch that initially added Rust support to the Linux Kernel. We are not using module constructors, so let's remove the select. Signed-off-by: Alice Ryhl Reviewed-by: Benno Lossin Cc: stable@vger.kernel.org Fixes: 2f7ab1267dc9 ("Kbuild: add Rust support") Link: https://lore.kernel.org/r/20240308-constructors-v1-1-4c811342391c@google.com Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- init/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index b63dce6706c5..ffda34c42456 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1924,7 +1924,6 @@ config RUST depends on !GCC_PLUGINS depends on !RANDSTRUCT depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE - select CONSTRUCTORS help Enables Rust support in the kernel. From 4e6cd21498c2d81b213c78a8e14db31400d9a900 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 4 Apr 2024 15:17:02 +0100 Subject: [PATCH 497/553] rust: make mutually exclusive with CFI_CLANG commit 8933cf4651e02853ca679be7b2d978dfcdcc5e0c upstream. On RISC-V and arm64, and presumably x86, if CFI_CLANG is enabled, loading a rust module will trigger a kernel panic. Support for sanitisers, including kcfi (CFI_CLANG), is in the works, but for now they're nightly-only options in rustc. Make RUST depend on !CFI_CLANG to prevent configuring a kernel without symmetrical support for kfi. [ Matthew Maurer writes [1]: This patch is fine by me - the last patch needed for KCFI to be functional in Rust just landed upstream last night, so we should revisit this (in the form of enabling it) once we move to `rustc-1.79.0` or later. Ramon de C Valle also gave feedback [2] on the status of KCFI for Rust and created a tracking issue [3] in upstream Rust. - Miguel ] Fixes: 2f7ab1267dc9 ("Kbuild: add Rust support") Cc: stable@vger.kernel.org Signed-off-by: Conor Dooley Acked-by: Nathan Chancellor Link: https://lore.kernel.org/rust-for-linux/CAGSQo024u1gHJgzsO38Xg3c4or+JupoPABQx_+0BLEpPg0cOEA@mail.gmail.com/ [1] Link: https://lore.kernel.org/rust-for-linux/CAOcBZOS2kPyH0Dm7Fuh4GC3=v7nZhyzBj_-dKu3PfAnrHZvaxg@mail.gmail.com/ [2] Link: https://github.com/rust-lang/rust/issues/123479 [3] Link: https://lore.kernel.org/r/20240404-providing-emporium-e652e359c711@spud [ Added feedback from the list, links, and used Cc for the tag. ] Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- init/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/init/Kconfig b/init/Kconfig index ffda34c42456..537f01eba2e6 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1920,6 +1920,7 @@ config RUST bool "Rust support" depends on HAVE_RUST depends on RUST_IS_AVAILABLE + depends on !CFI_CLANG depends on !MODVERSIONS depends on !GCC_PLUGINS depends on !RANDSTRUCT From c7882362897ba1d8248e6da07bea058550336b71 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 1 Apr 2024 11:24:17 -0700 Subject: [PATCH 498/553] Bluetooth: Fix type of len in {l2cap,sco}_sock_getsockopt_old() commit 9bf4e919ccad613b3596eebf1ff37b05b6405307 upstream. After an innocuous optimization change in LLVM main (19.0.0), x86_64 allmodconfig (which enables CONFIG_KCSAN / -fsanitize=thread) fails to build due to the checks in check_copy_size(): In file included from net/bluetooth/sco.c:27: In file included from include/linux/module.h:13: In file included from include/linux/stat.h:19: In file included from include/linux/time.h:60: In file included from include/linux/time32.h:13: In file included from include/linux/timex.h:67: In file included from arch/x86/include/asm/timex.h:6: In file included from arch/x86/include/asm/tsc.h:10: In file included from arch/x86/include/asm/msr.h:15: In file included from include/linux/percpu.h:7: In file included from include/linux/smp.h:118: include/linux/thread_info.h:244:4: error: call to '__bad_copy_from' declared with 'error' attribute: copy source size is too small 244 | __bad_copy_from(); | ^ The same exact error occurs in l2cap_sock.c. The copy_to_user() statements that are failing come from l2cap_sock_getsockopt_old() and sco_sock_getsockopt_old(). This does not occur with GCC with or without KCSAN or Clang without KCSAN enabled. len is defined as an 'int' because it is assigned from '__user int *optlen'. However, it is clamped against the result of sizeof(), which has a type of 'size_t' ('unsigned long' for 64-bit platforms). This is done with min_t() because min() requires compatible types, which results in both len and the result of sizeof() being casted to 'unsigned int', meaning len changes signs and the result of sizeof() is truncated. From there, len is passed to copy_to_user(), which has a third parameter type of 'unsigned long', so it is widened and changes signs again. This excessive casting in combination with the KCSAN instrumentation causes LLVM to fail to eliminate the __bad_copy_from() call, failing the build. The official recommendation from LLVM developers is to consistently use long types for all size variables to avoid the unnecessary casting in the first place. Change the type of len to size_t in both l2cap_sock_getsockopt_old() and sco_sock_getsockopt_old(). This clears up the error while allowing min_t() to be replaced with min(), resulting in simpler code with no casts and fewer implicit conversions. While len is a different type than optlen now, it should result in no functional change because the result of sizeof() will clamp all values of optlen in the same manner as before. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/2007 Link: https://github.com/llvm/llvm-project/issues/85647 Signed-off-by: Nathan Chancellor Reviewed-by: Justin Stitt Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_sock.c | 7 ++++--- net/bluetooth/sco.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 4198ca66fbe1..e3c7029ec8a6 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -457,7 +457,8 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, struct l2cap_chan *chan = l2cap_pi(sk)->chan; struct l2cap_options opts; struct l2cap_conninfo cinfo; - int len, err = 0; + int err = 0; + size_t len; u32 opt; BT_DBG("sk %p", sk); @@ -504,7 +505,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, BT_DBG("mode 0x%2.2x", chan->mode); - len = min_t(unsigned int, len, sizeof(opts)); + len = min(len, sizeof(opts)); if (copy_to_user(optval, (char *) &opts, len)) err = -EFAULT; @@ -554,7 +555,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, cinfo.hci_handle = chan->conn->hcon->handle; memcpy(cinfo.dev_class, chan->conn->hcon->dev_class, 3); - len = min_t(unsigned int, len, sizeof(cinfo)); + len = min(len, sizeof(cinfo)); if (copy_to_user(optval, (char *) &cinfo, len)) err = -EFAULT; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 2e9137c539a4..4a6bf60f3e7a 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -971,7 +971,8 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, struct sock *sk = sock->sk; struct sco_options opts; struct sco_conninfo cinfo; - int len, err = 0; + int err = 0; + size_t len; BT_DBG("sk %p", sk); @@ -993,7 +994,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, BT_DBG("mtu %u", opts.mtu); - len = min_t(unsigned int, len, sizeof(opts)); + len = min(len, sizeof(opts)); if (copy_to_user(optval, (char *)&opts, len)) err = -EFAULT; @@ -1011,7 +1012,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, cinfo.hci_handle = sco_pi(sk)->conn->hcon->handle; memcpy(cinfo.dev_class, sco_pi(sk)->conn->hcon->dev_class, 3); - len = min_t(unsigned int, len, sizeof(cinfo)); + len = min(len, sizeof(cinfo)); if (copy_to_user(optval, (char *)&cinfo, len)) err = -EFAULT; From e6dd0117e947da0847c469a12ac9e11512d13be2 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Fri, 29 Mar 2024 10:34:39 +0800 Subject: [PATCH 499/553] Bluetooth: btusb: Add Realtek RTL8852BE support ID 0x0bda:0x4853 commit d1a5a7eede2977da3d2002d5ea3b519019cc1a98 upstream. Add the support ID(0x0bda, 0x4853) to usb_device_id table for Realtek RTL8852BE. Without this change the device utilizes an obsolete version of the firmware that is encoded in it rather than the updated Realtek firmware and config files from the firmware directory. The latter files implement many new features. The device table is as follows: T: Bus=03 Lev=01 Prnt=01 Port=09 Cnt=03 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0bda ProdID=4853 Rev= 0.00 S: Manufacturer=Realtek S: Product=Bluetooth Radio S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Cc: stable@vger.kernel.org Signed-off-by: Larry Finger Signed-off-by: WangYuli Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 954f7f3b5cc3..6a772b955d69 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -535,6 +535,8 @@ static const struct usb_device_id blacklist_table[] = { /* Realtek 8852BE Bluetooth devices */ { USB_DEVICE(0x0cb8, 0xc559), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0bda, 0x4853), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0x887b), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0xb85b), .driver_info = BTUSB_REALTEK | From e60502b907be350c518819297b565007a94c706d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Apr 2024 15:57:47 +0200 Subject: [PATCH 500/553] Bluetooth: qca: fix NULL-deref on non-serdev suspend commit 73e87c0a49fda31d7b589edccf4c72e924411371 upstream. Qualcomm ROME controllers can be registered from the Bluetooth line discipline and in this case the HCI UART serdev pointer is NULL. Add the missing sanity check to prevent a NULL-pointer dereference when wakeup() is called for a non-serdev controller during suspend. Just return true for now to restore the original behaviour and address the crash with pre-6.2 kernels, which do not have commit e9b3e5b8c657 ("Bluetooth: hci_qca: only assign wakeup with serial port support") that causes the crash to happen already at setup() time. Fixes: c1a74160eaf1 ("Bluetooth: hci_qca: Add device_may_wakeup support") Cc: stable@vger.kernel.org # 5.13 Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_qca.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index ca6065297a7b..179278b801eb 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1645,6 +1645,9 @@ static bool qca_wakeup(struct hci_dev *hdev) struct hci_uart *hu = hci_get_drvdata(hdev); bool wakeup; + if (!hu->serdev) + return true; + /* BT SoC attached through the serial bus is handled by the serdev driver. * So we need to use the device handle of the serdev driver to get the * status of device may wakeup. From a957ea5aa3d3518067a1ba32c6127322ad348d20 Mon Sep 17 00:00:00 2001 From: Mantas Pucka Date: Thu, 21 Mar 2024 14:30:01 +0000 Subject: [PATCH 501/553] mmc: sdhci-msm: pervent access to suspended controller commit f8def10f73a516b771051a2f70f2f0446902cb4f upstream. Generic sdhci code registers LED device and uses host->runtime_suspended flag to protect access to it. The sdhci-msm driver doesn't set this flag, which causes a crash when LED is accessed while controller is runtime suspended. Fix this by setting the flag correctly. Cc: stable@vger.kernel.org Fixes: 67e6db113c90 ("mmc: sdhci-msm: Add pm_runtime and system PM support") Signed-off-by: Mantas Pucka Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20240321-sdhci-mmc-suspend-v1-1-fbc555a64400@8devices.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-msm.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index a5ab2af3e520..e37fb25577c0 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -2831,6 +2831,11 @@ static __maybe_unused int sdhci_msm_runtime_suspend(struct device *dev) struct sdhci_host *host = dev_get_drvdata(dev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); + unsigned long flags; + + spin_lock_irqsave(&host->lock, flags); + host->runtime_suspended = true; + spin_unlock_irqrestore(&host->lock, flags); /* Drop the performance vote */ dev_pm_opp_set_rate(dev, 0); @@ -2845,6 +2850,7 @@ static __maybe_unused int sdhci_msm_runtime_resume(struct device *dev) struct sdhci_host *host = dev_get_drvdata(dev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); + unsigned long flags; int ret; ret = clk_bulk_prepare_enable(ARRAY_SIZE(msm_host->bulk_clks), @@ -2863,7 +2869,15 @@ static __maybe_unused int sdhci_msm_runtime_resume(struct device *dev) dev_pm_opp_set_rate(dev, msm_host->clk_rate); - return sdhci_msm_ice_resume(msm_host); + ret = sdhci_msm_ice_resume(msm_host); + if (ret) + return ret; + + spin_lock_irqsave(&host->lock, flags); + host->runtime_suspended = false; + spin_unlock_irqrestore(&host->lock, flags); + + return ret; } static const struct dev_pm_ops sdhci_msm_pm_ops = { From 2b8bf690e05c5ef134aa9709b0a622f01e6eb337 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 23 Apr 2024 20:41:22 -0600 Subject: [PATCH 502/553] smb: client: Fix struct_group() usage in __packed structs commit 9a1f1d04f63c59550a5364858b46eeffdf03e8d6 upstream. Use struct_group_attr() in __packed structs, instead of struct_group(). Below you can see the pahole output before/after changes: pahole -C smb2_file_network_open_info fs/smb/client/smb2ops.o struct smb2_file_network_open_info { union { struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le64 AllocationSize; /* 32 8 */ __le64 EndOfFile; /* 40 8 */ __le32 Attributes; /* 48 4 */ }; /* 0 56 */ struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le64 AllocationSize; /* 32 8 */ __le64 EndOfFile; /* 40 8 */ __le32 Attributes; /* 48 4 */ } network_open_info; /* 0 56 */ }; /* 0 56 */ __le32 Reserved; /* 56 4 */ /* size: 60, cachelines: 1, members: 2 */ /* last cacheline: 60 bytes */ } __attribute__((__packed__)); pahole -C smb2_file_network_open_info fs/smb/client/smb2ops.o struct smb2_file_network_open_info { union { struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le64 AllocationSize; /* 32 8 */ __le64 EndOfFile; /* 40 8 */ __le32 Attributes; /* 48 4 */ } __attribute__((__packed__)); /* 0 52 */ struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le64 AllocationSize; /* 32 8 */ __le64 EndOfFile; /* 40 8 */ __le32 Attributes; /* 48 4 */ } __attribute__((__packed__)) network_open_info; /* 0 52 */ }; /* 0 52 */ __le32 Reserved; /* 52 4 */ /* size: 56, cachelines: 1, members: 2 */ /* last cacheline: 56 bytes */ }; pahole -C smb_com_open_rsp fs/smb/client/cifssmb.o struct smb_com_open_rsp { ... union { struct { __le64 CreationTime; /* 48 8 */ __le64 LastAccessTime; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __le64 LastWriteTime; /* 64 8 */ __le64 ChangeTime; /* 72 8 */ __le32 FileAttributes; /* 80 4 */ }; /* 48 40 */ struct { __le64 CreationTime; /* 48 8 */ __le64 LastAccessTime; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __le64 LastWriteTime; /* 64 8 */ __le64 ChangeTime; /* 72 8 */ __le32 FileAttributes; /* 80 4 */ } common_attributes; /* 48 40 */ }; /* 48 40 */ ... /* size: 111, cachelines: 2, members: 14 */ /* last cacheline: 47 bytes */ } __attribute__((__packed__)); pahole -C smb_com_open_rsp fs/smb/client/cifssmb.o struct smb_com_open_rsp { ... union { struct { __le64 CreationTime; /* 48 8 */ __le64 LastAccessTime; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __le64 LastWriteTime; /* 64 8 */ __le64 ChangeTime; /* 72 8 */ __le32 FileAttributes; /* 80 4 */ } __attribute__((__packed__)); /* 48 36 */ struct { __le64 CreationTime; /* 48 8 */ __le64 LastAccessTime; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __le64 LastWriteTime; /* 64 8 */ __le64 ChangeTime; /* 72 8 */ __le32 FileAttributes; /* 80 4 */ } __attribute__((__packed__)) common_attributes; /* 48 36 */ }; /* 48 36 */ ... /* size: 107, cachelines: 2, members: 14 */ /* last cacheline: 43 bytes */ } __attribute__((__packed__)); pahole -C FILE_ALL_INFO fs/smb/client/cifssmb.o typedef struct { union { struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le32 Attributes; /* 32 4 */ }; /* 0 40 */ struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le32 Attributes; /* 32 4 */ } common_attributes; /* 0 40 */ }; /* 0 40 */ ... /* size: 113, cachelines: 2, members: 17 */ /* last cacheline: 49 bytes */ } __attribute__((__packed__)) FILE_ALL_INFO; pahole -C FILE_ALL_INFO fs/smb/client/cifssmb.o typedef struct { union { struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le32 Attributes; /* 32 4 */ } __attribute__((__packed__)); /* 0 36 */ struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le32 Attributes; /* 32 4 */ } __attribute__((__packed__)) common_attributes; /* 0 36 */ }; /* 0 36 */ ... /* size: 109, cachelines: 2, members: 17 */ /* last cacheline: 45 bytes */ } __attribute__((__packed__)) FILE_ALL_INFO; Fixes: 0015eb6e1238 ("smb: client, common: fix fortify warnings") Cc: stable@vger.kernel.org Reviewed-by: Namjae Jeon Signed-off-by: Gustavo A. R. Silva Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifspdu.h | 4 ++-- fs/smb/client/smb2pdu.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index 94f86374f4b7..9cb457706334 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -882,7 +882,7 @@ typedef struct smb_com_open_rsp { __u8 OplockLevel; __u16 Fid; __le32 CreateAction; - struct_group(common_attributes, + struct_group_attr(common_attributes, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; @@ -2270,7 +2270,7 @@ typedef struct { /* QueryFileInfo/QueryPathinfo (also for SetPath/SetFile) data buffer formats */ /******************************************************************************/ typedef struct { /* data block encoding of response to level 263 QPathInfo */ - struct_group(common_attributes, + struct_group_attr(common_attributes, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h index 3a13b9b56452..2823526b66f7 100644 --- a/fs/smb/client/smb2pdu.h +++ b/fs/smb/client/smb2pdu.h @@ -339,7 +339,7 @@ struct smb2_file_reparse_point_info { } __packed; struct smb2_file_network_open_info { - struct_group(network_open_info, + struct_group_attr(network_open_info, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; From c7a4bca289e50bb4b2650f845c41bb3e453f4c66 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 25 Apr 2024 12:49:50 -0500 Subject: [PATCH 503/553] smb3: fix lock ordering potential deadlock in cifs_sync_mid_result commit 8861fd5180476f45f9e8853db154600469a0284f upstream. Coverity spotted that the cifs_sync_mid_result function could deadlock "Thread deadlock (ORDER_REVERSAL) lock_order: Calling spin_lock acquires lock TCP_Server_Info.srv_lock while holding lock TCP_Server_Info.mid_lock" Addresses-Coverity: 1590401 ("Thread deadlock (ORDER_REVERSAL)") Cc: stable@vger.kernel.org Reviewed-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/transport.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index df44acaec9ae..338b34c99b2d 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -931,12 +931,15 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) list_del_init(&mid->qhead); mid->mid_flags |= MID_DELETED; } + spin_unlock(&server->mid_lock); cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n", __func__, mid->mid, mid->mid_state); rc = -EIO; + goto sync_mid_done; } spin_unlock(&server->mid_lock); +sync_mid_done: release_mid(mid); return rc; } From 0561b65fbd53d3e788c5b0222d9112ca016fd6a1 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 18 Mar 2024 11:59:02 +0100 Subject: [PATCH 504/553] HID: i2c-hid: remove I2C_HID_READ_PENDING flag to prevent lock-up commit 9c0f59e47a90c54d0153f8ddc0f80d7a36207d0e upstream. The flag I2C_HID_READ_PENDING is used to serialize I2C operations. However, this is not necessary, because I2C core already has its own locking for that. More importantly, this flag can cause a lock-up: if the flag is set in i2c_hid_xfer() and an interrupt happens, the interrupt handler (i2c_hid_irq) will check this flag and return immediately without doing anything, then the interrupt handler will be invoked again in an infinite loop. Since interrupt handler is an RT task, it takes over the CPU and the flag-clearing task never gets scheduled, thus we have a lock-up. Delete this unnecessary flag. Reported-and-tested-by: Eva Kurchatova Closes: https://lore.kernel.org/r/CA+eeCSPUDpUg76ZO8dszSbAGn+UHjcyv8F1J-CUPVARAzEtW9w@mail.gmail.com Fixes: 4a200c3b9a40 ("HID: i2c-hid: introduce HID over i2c specification implementation") Cc: Signed-off-by: Nam Cao Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/i2c-hid/i2c-hid-core.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 969f8eb086f0..0b05bb1e4410 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -61,7 +61,6 @@ /* flags */ #define I2C_HID_STARTED 0 #define I2C_HID_RESET_PENDING 1 -#define I2C_HID_READ_PENDING 2 #define I2C_HID_PWR_ON 0x00 #define I2C_HID_PWR_SLEEP 0x01 @@ -193,15 +192,10 @@ static int i2c_hid_xfer(struct i2c_hid *ihid, msgs[n].len = recv_len; msgs[n].buf = recv_buf; n++; - - set_bit(I2C_HID_READ_PENDING, &ihid->flags); } ret = i2c_transfer(client->adapter, msgs, n); - if (recv_len) - clear_bit(I2C_HID_READ_PENDING, &ihid->flags); - if (ret != n) return ret < 0 ? ret : -EIO; @@ -569,9 +563,6 @@ static irqreturn_t i2c_hid_irq(int irq, void *dev_id) { struct i2c_hid *ihid = dev_id; - if (test_bit(I2C_HID_READ_PENDING, &ihid->flags)) - return IRQ_HANDLED; - i2c_hid_get_input(ihid); return IRQ_HANDLED; From 8bdbcfaf3eac42f98e5486b3d7e130fa287811f6 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 17 Apr 2024 10:45:47 +0200 Subject: [PATCH 505/553] btrfs: fix information leak in btrfs_ioctl_logical_to_ino() commit 2f7ef5bb4a2f3e481ef05fab946edb97c84f67cf upstream. Syzbot reported the following information leak for in btrfs_ioctl_logical_to_ino(): BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:114 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_user+0xbc/0x110 lib/usercopy.c:40 instrument_copy_to_user include/linux/instrumented.h:114 [inline] _copy_to_user+0xbc/0x110 lib/usercopy.c:40 copy_to_user include/linux/uaccess.h:191 [inline] btrfs_ioctl_logical_to_ino+0x440/0x750 fs/btrfs/ioctl.c:3499 btrfs_ioctl+0x714/0x1260 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:904 [inline] __se_sys_ioctl+0x261/0x450 fs/ioctl.c:890 __x64_sys_ioctl+0x96/0xe0 fs/ioctl.c:890 x64_sys_call+0x1883/0x3b50 arch/x86/include/generated/asm/syscalls_64.h:17 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Uninit was created at: __kmalloc_large_node+0x231/0x370 mm/slub.c:3921 __do_kmalloc_node mm/slub.c:3954 [inline] __kmalloc_node+0xb07/0x1060 mm/slub.c:3973 kmalloc_node include/linux/slab.h:648 [inline] kvmalloc_node+0xc0/0x2d0 mm/util.c:634 kvmalloc include/linux/slab.h:766 [inline] init_data_container+0x49/0x1e0 fs/btrfs/backref.c:2779 btrfs_ioctl_logical_to_ino+0x17c/0x750 fs/btrfs/ioctl.c:3480 btrfs_ioctl+0x714/0x1260 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:904 [inline] __se_sys_ioctl+0x261/0x450 fs/ioctl.c:890 __x64_sys_ioctl+0x96/0xe0 fs/ioctl.c:890 x64_sys_call+0x1883/0x3b50 arch/x86/include/generated/asm/syscalls_64.h:17 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Bytes 40-65535 of 65536 are uninitialized Memory access of size 65536 starts at ffff888045a40000 This happens, because we're copying a 'struct btrfs_data_container' back to user-space. This btrfs_data_container is allocated in 'init_data_container()' via kvmalloc(), which does not zero-fill the memory. Fix this by using kvzalloc() which zeroes out the memory on allocation. CC: stable@vger.kernel.org # 4.14+ Reported-by: Reviewed-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/backref.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 18cf801ab590..23d0372e8882 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -2475,20 +2475,14 @@ struct btrfs_data_container *init_data_container(u32 total_bytes) size_t alloc_bytes; alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); - data = kvmalloc(alloc_bytes, GFP_KERNEL); + data = kvzalloc(alloc_bytes, GFP_KERNEL); if (!data) return ERR_PTR(-ENOMEM); - if (total_bytes >= sizeof(*data)) { + if (total_bytes >= sizeof(*data)) data->bytes_left = total_bytes - sizeof(*data); - data->bytes_missing = 0; - } else { + else data->bytes_missing = sizeof(*data) - total_bytes; - data->bytes_left = 0; - } - - data->elem_cnt = 0; - data->elem_missed = 0; return data; } From 38f17d1fbb5bfb56ca1419e2d06376d57a9396f9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Apr 2024 17:05:54 -0700 Subject: [PATCH 506/553] cpu: Re-enable CPU mitigations by default for !X86 architectures commit fe42754b94a42d08cf9501790afc25c4f6a5f631 upstream. Rename x86's to CPU_MITIGATIONS, define it in generic code, and force it on for all architectures exception x86. A recent commit to turn mitigations off by default if SPECULATION_MITIGATIONS=n kinda sorta missed that "cpu_mitigations" is completely generic, whereas SPECULATION_MITIGATIONS is x86-specific. Rename x86's SPECULATIVE_MITIGATIONS instead of keeping both and have it select CPU_MITIGATIONS, as having two configs for the same thing is unnecessary and confusing. This will also allow x86 to use the knob to manage mitigations that aren't strictly related to speculative execution. Use another Kconfig to communicate to common code that CPU_MITIGATIONS is already defined instead of having x86's menu depend on the common CPU_MITIGATIONS. This allows keeping a single point of contact for all of x86's mitigations, and it's not clear that other architectures *want* to allow disabling mitigations at compile-time. Fixes: f337a6a21e2f ("x86/cpu: Actually turn off mitigations by default for SPECULATION_MITIGATIONS=n") Closes: https://lkml.kernel.org/r/20240413115324.53303a68%40canb.auug.org.au Reported-by: Stephen Rothwell Reported-by: Michael Ellerman Reported-by: Geert Uytterhoeven Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov (AMD) Acked-by: Josh Poimboeuf Acked-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240420000556.2645001-2-seanjc@google.com Signed-off-by: Greg Kroah-Hartman --- arch/Kconfig | 8 ++++++++ arch/x86/Kconfig | 11 ++++++----- kernel/cpu.c | 4 ++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index f99fd9a4ca77..e959abf969ec 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -9,6 +9,14 @@ # source "arch/$(SRCARCH)/Kconfig" +config ARCH_CONFIGURES_CPU_MITIGATIONS + bool + +if !ARCH_CONFIGURES_CPU_MITIGATIONS +config CPU_MITIGATIONS + def_bool y +endif + menu "General architecture-dependent options" config CRASH_CORE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5f7a86f240db..49cea5b81649 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -61,6 +61,7 @@ config X86 select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_INIT + select ARCH_CONFIGURES_CPU_MITIGATIONS select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 @@ -2449,17 +2450,17 @@ config CC_HAS_SLS config CC_HAS_RETURN_THUNK def_bool $(cc-option,-mfunction-return=thunk-extern) -menuconfig SPECULATION_MITIGATIONS - bool "Mitigations for speculative execution vulnerabilities" +menuconfig CPU_MITIGATIONS + bool "Mitigations for CPU vulnerabilities" default y help - Say Y here to enable options which enable mitigations for - speculative execution hardware vulnerabilities. + Say Y here to enable options which enable mitigations for hardware + vulnerabilities (usually related to speculative execution). If you say N, all mitigations will be disabled. You really should know what you are doing to say so. -if SPECULATION_MITIGATIONS +if CPU_MITIGATIONS config PAGE_TABLE_ISOLATION bool "Remove the kernel mapping in user mode" diff --git a/kernel/cpu.c b/kernel/cpu.c index 2c44dd12a158..e0e09b700b43 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2788,8 +2788,8 @@ enum cpu_mitigations { }; static enum cpu_mitigations cpu_mitigations __ro_after_init = - IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : - CPU_MITIGATIONS_OFF; + IS_ENABLED(CONFIG_CPU_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : + CPU_MITIGATIONS_OFF; static int __init mitigations_parse_cmdline(char *arg) { From 94021d1d2b57f2b45638ad67babb29bd30518c3f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 24 Apr 2024 12:36:07 +0800 Subject: [PATCH 507/553] LoongArch: Fix callchain parse error with kernel tracepoint events commit d3119bc985fb645ad3b2a9cf9952c1d56d9daaa3 upstream. In order to fix perf's callchain parse error for LoongArch, we implement perf_arch_fetch_caller_regs() which fills several necessary registers used for callchain unwinding, including sp, fp, and era. This is similar to the following commits. commit b3eac0265bf6: ("arm: perf: Fix callchain parse error with kernel tracepoint events") commit 5b09a094f2fb: ("arm64: perf: Fix callchain parse error with kernel tracepoint events") commit 9a7e8ec0d4cc: ("riscv: perf: Fix callchain parse error with kernel tracepoint events") Test with commands: perf record -e sched:sched_switch -g --call-graph dwarf perf report Without this patch: Children Self Command Shared Object Symbol ........ ........ ............. ................. .................... 43.41% 43.41% swapper [unknown] [k] 0000000000000000 10.94% 10.94% loong-container [unknown] [k] 0000000000000000 | |--5.98%--0x12006ba38 | |--2.56%--0x12006bb84 | --2.40%--0x12006b6b8 With this patch, callchain can be parsed correctly: Children Self Command Shared Object Symbol ........ ........ ............. ................. .................... 47.57% 47.57% swapper [kernel.vmlinux] [k] __schedule | ---__schedule 26.76% 26.76% loong-container [kernel.vmlinux] [k] __schedule | |--13.78%--0x12006ba38 | | | |--9.19%--__schedule | | | --4.59%--handle_syscall | do_syscall | sys_futex | do_futex | futex_wait | futex_wait_queue_me | hrtimer_start_range_ns | __schedule | |--8.38%--0x12006bb84 | handle_syscall | do_syscall | sys_epoll_pwait | do_epoll_wait | schedule_hrtimeout_range_clock | hrtimer_start_range_ns | __schedule | --4.59%--0x12006b6b8 handle_syscall do_syscall sys_nanosleep hrtimer_nanosleep do_nanosleep hrtimer_start_range_ns __schedule Cc: stable@vger.kernel.org Fixes: b37042b2bb7cd751f0 ("LoongArch: Add perf events support") Reported-by: Youling Tang Suggested-by: Youling Tang Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/include/asm/perf_event.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/loongarch/include/asm/perf_event.h b/arch/loongarch/include/asm/perf_event.h index 2a35a0bc2aaa..52b638059e40 100644 --- a/arch/loongarch/include/asm/perf_event.h +++ b/arch/loongarch/include/asm/perf_event.h @@ -7,6 +7,14 @@ #ifndef __LOONGARCH_PERF_EVENT_H__ #define __LOONGARCH_PERF_EVENT_H__ +#include + #define perf_arch_bpf_user_pt_regs(regs) (struct user_pt_regs *)regs +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->csr_era = (__ip); \ + (regs)->regs[3] = current_stack_pointer; \ + (regs)->regs[22] = (unsigned long) __builtin_frame_address(0); \ +} + #endif /* __LOONGARCH_PERF_EVENT_H__ */ From ba9bcc0e58f3e51dd0e2e018bae221d99d53c737 Mon Sep 17 00:00:00 2001 From: Jiantao Shan Date: Wed, 24 Apr 2024 12:36:07 +0800 Subject: [PATCH 508/553] LoongArch: Fix access error when read fault on a write-only VMA commit efb44ff64c95340b06331fc48634b99efc9dd77c upstream. As with most architectures, allow handling of read faults in VMAs that have VM_WRITE but without VM_READ (WRITE implies READ). Otherwise, reading before writing a write-only memory will error while reading after writing everything is fine. BTW, move the VM_EXEC judgement before VM_READ/VM_WRITE to make logic a little clearer. Cc: stable@vger.kernel.org Fixes: 09cfefb7fa70c3af01 ("LoongArch: Add memory management") Signed-off-by: Jiantao Shan Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/mm/fault.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index b829ab911a17..007718d51f09 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -193,10 +193,10 @@ good_area: if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } else { - if (!(vma->vm_flags & VM_READ) && address != exception_era(regs)) - goto bad_area; if (!(vma->vm_flags & VM_EXEC) && address == exception_era(regs)) goto bad_area; + if (!(vma->vm_flags & (VM_READ | VM_WRITE)) && address != exception_era(regs)) + goto bad_area; } /* From ffddf569e35ec823b6af0bde5a2a5f32dcf7b1c0 Mon Sep 17 00:00:00 2001 From: Iskander Amara Date: Fri, 8 Mar 2024 09:52:42 +0100 Subject: [PATCH 509/553] arm64: dts: rockchip: enable internal pull-up for Q7_THRM# on RK3399 Puma commit 0ac417b8f124427c90ec8c2ef4f632b821d924cc upstream. Q7_THRM# pin is connected to a diode on the module which is used as a level shifter, and the pin have a pull-down enabled by default. We need to configure it to internal pull-up, other- wise whenever the pin is configured as INPUT and we try to control it externally the value will always remain zero. Signed-off-by: Iskander Amara Fixes: 2c66fc34e945 ("arm64: dts: rockchip: add RK3399-Q7 (Puma) SoM") Reviewed-by: Quentin Schulz Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240308085243.69903-1-iskander.amara@theobroma-systems.com Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index a77f922107c2..937a15005eb0 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -407,6 +407,16 @@ }; &pinctrl { + pinctrl-names = "default"; + pinctrl-0 = <&q7_thermal_pin>; + + gpios { + q7_thermal_pin: q7-thermal-pin { + rockchip,pins = + <0 RK_PA3 RK_FUNC_GPIO &pcfg_pull_up>; + }; + }; + i2c8 { i2c8_xfer_a: i2c8-xfer { rockchip,pins = From 404b0ae432cc3f50c06053b36dec426b2db191a3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 14 Apr 2024 21:20:56 -0400 Subject: [PATCH 510/553] drm/amdgpu/sdma5.2: use legacy HDP flush for SDMA2/3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9792b7cc18aaa0c2acae6af5d0acf249bcb1ab0d upstream. This avoids a potential conflict with firmwares with the newer HDP flush mechanism. Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 856db876af14..c7af36370b0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -345,17 +345,21 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + if (ring->me > 1) { + amdgpu_asic_flush_hdp(adev, ring); + } else { + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); - amdgpu_ring_write(ring, ref_and_mask); /* reference */ - amdgpu_ring_write(ring, ref_and_mask); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); + amdgpu_ring_write(ring, ref_and_mask); /* reference */ + amdgpu_ring_write(ring, ref_and_mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + } } /** From 64f9d8ac2cd43b1df11c8fc4f9742993748e36cc Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Thu, 18 Apr 2024 11:32:34 -0400 Subject: [PATCH 511/553] drm/amdgpu: Fix leak when GPU memory allocation fails commit 25e9227c6afd200bed6774c866980b8e36d033af upstream. Free the sync object if the memory allocation fails for any reason. Signed-off-by: Mukul Joshi Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 260e6a3316db..7d5fbaaba72f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1779,6 +1779,7 @@ err_node_allow: err_bo_create: amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags); err_reserve_limit: + amdgpu_sync_free(&(*mem)->sync); mutex_destroy(&(*mem)->lock); if (gobj) drm_gem_object_put(gobj); From dd681710ab77c8beafe2e263064cb1bd0e2d6ca9 Mon Sep 17 00:00:00 2001 From: Guanrui Huang Date: Thu, 18 Apr 2024 14:10:52 +0800 Subject: [PATCH 512/553] irqchip/gic-v3-its: Prevent double free on error commit c26591afd33adce296c022e3480dea4282b7ef91 upstream. The error handling path in its_vpe_irq_domain_alloc() causes a double free when its_vpe_init() fails after successfully allocating at least one interrupt. This happens because its_vpe_irq_domain_free() frees the interrupts along with the area bitmap and the vprop_page and its_vpe_irq_domain_alloc() subsequently frees the area bitmap and the vprop_page again. Fix this by unconditionally invoking its_vpe_irq_domain_free() which handles all cases correctly and by removing the bitmap/vprop_page freeing from its_vpe_irq_domain_alloc(). [ tglx: Massaged change log ] Fixes: 7d75bbb4bc1a ("irqchip/gic-v3-its: Add VPE irq domain allocation/teardown") Signed-off-by: Guanrui Huang Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Reviewed-by: Zenghui Yu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240418061053.96803-2-guanrui.huang@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3-its.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 4d03fb3a8246..f9ab5cfc9b94 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -4535,13 +4535,8 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq set_bit(i, bitmap); } - if (err) { - if (i > 0) - its_vpe_irq_domain_free(domain, virq, i); - - its_lpi_free(bitmap, base, nr_ids); - its_free_prop_table(vprop_page); - } + if (err) + its_vpe_irq_domain_free(domain, virq, i); return err; } From 01fc53be672acae37e611c80cc0b4f3939584de3 Mon Sep 17 00:00:00 2001 From: Jarred White Date: Fri, 1 Mar 2024 11:25:59 -0800 Subject: [PATCH 513/553] ACPI: CPPC: Use access_width over bit_width for system memory accesses commit 2f4a4d63a193be6fd530d180bb13c3592052904c upstream. To align with ACPI 6.3+, since bit_width can be any 8-bit value, it cannot be depended on to be always on a clean 8b boundary. This was uncovered on the Cobalt 100 platform. SError Interrupt on CPU26, code 0xbe000011 -- SError CPU: 26 PID: 1510 Comm: systemd-udevd Not tainted 5.15.2.1-13 #1 Hardware name: MICROSOFT CORPORATION, BIOS MICROSOFT CORPORATION pstate: 62400009 (nZCv daif +PAN -UAO +TCO -DIT -SSBS BTYPE=--) pc : cppc_get_perf_caps+0xec/0x410 lr : cppc_get_perf_caps+0xe8/0x410 sp : ffff8000155ab730 x29: ffff8000155ab730 x28: ffff0080139d0038 x27: ffff0080139d0078 x26: 0000000000000000 x25: ffff0080139d0058 x24: 00000000ffffffff x23: ffff0080139d0298 x22: ffff0080139d0278 x21: 0000000000000000 x20: ffff00802b251910 x19: ffff0080139d0000 x18: ffffffffffffffff x17: 0000000000000000 x16: ffffdc7e111bad04 x15: ffff00802b251008 x14: ffffffffffffffff x13: ffff013f1fd63300 x12: 0000000000000006 x11: ffffdc7e128f4420 x10: 0000000000000000 x9 : ffffdc7e111badec x8 : ffff00802b251980 x7 : 0000000000000000 x6 : ffff0080139d0028 x5 : 0000000000000000 x4 : ffff0080139d0018 x3 : 00000000ffffffff x2 : 0000000000000008 x1 : ffff8000155ab7a0 x0 : 0000000000000000 Kernel panic - not syncing: Asynchronous SError Interrupt CPU: 26 PID: 1510 Comm: systemd-udevd Not tainted 5.15.2.1-13 #1 Hardware name: MICROSOFT CORPORATION, BIOS MICROSOFT CORPORATION Call trace: dump_backtrace+0x0/0x1e0 show_stack+0x24/0x30 dump_stack_lvl+0x8c/0xb8 dump_stack+0x18/0x34 panic+0x16c/0x384 add_taint+0x0/0xc0 arm64_serror_panic+0x7c/0x90 arm64_is_fatal_ras_serror+0x34/0xa4 do_serror+0x50/0x6c el1h_64_error_handler+0x40/0x74 el1h_64_error+0x7c/0x80 cppc_get_perf_caps+0xec/0x410 cppc_cpufreq_cpu_init+0x74/0x400 [cppc_cpufreq] cpufreq_online+0x2dc/0xa30 cpufreq_add_dev+0xc0/0xd4 subsys_interface_register+0x134/0x14c cpufreq_register_driver+0x1b0/0x354 cppc_cpufreq_init+0x1a8/0x1000 [cppc_cpufreq] do_one_initcall+0x50/0x250 do_init_module+0x60/0x27c load_module+0x2300/0x2570 __do_sys_finit_module+0xa8/0x114 __arm64_sys_finit_module+0x2c/0x3c invoke_syscall+0x78/0x100 el0_svc_common.constprop.0+0x180/0x1a0 do_el0_svc+0x84/0xa0 el0_svc+0x2c/0xc0 el0t_64_sync_handler+0xa4/0x12c el0t_64_sync+0x1a4/0x1a8 Instead, use access_width to determine the size and use the offset and width to shift and mask the bits to read/write out. Make sure to add a check for system memory since pcc redefines the access_width to subspace id. If access_width is not set, then fall back to using bit_width. Signed-off-by: Jarred White Reviewed-by: Easwar Hariharan Cc: 5.15+ # 5.15+ [ rjw: Subject and changelog edits, comment adjustments ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/cppc_acpi.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 093675b1a1ff..c123fdbca693 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -163,6 +163,13 @@ show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, nominal_freq); show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, reference_perf); show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, wraparound_time); +/* Check for valid access_width, otherwise, fallback to using bit_width */ +#define GET_BIT_WIDTH(reg) ((reg)->access_width ? (8 << ((reg)->access_width - 1)) : (reg)->bit_width) + +/* Shift and apply the mask for CPC reads/writes */ +#define MASK_VAL(reg, val) ((val) >> ((reg)->bit_offset & \ + GENMASK(((reg)->bit_width), 0))) + static ssize_t show_feedback_ctrs(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -776,6 +783,7 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) } else if (gas_t->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { if (gas_t->address) { void __iomem *addr; + size_t access_width; if (!osc_cpc_flexible_adr_space_confirmed) { pr_debug("Flexible address space capability not supported\n"); @@ -783,7 +791,8 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) goto out_free; } - addr = ioremap(gas_t->address, gas_t->bit_width/8); + access_width = GET_BIT_WIDTH(gas_t) / 8; + addr = ioremap(gas_t->address, access_width); if (!addr) goto out_free; cpc_ptr->cpc_regs[i-2].sys_mem_vaddr = addr; @@ -979,6 +988,7 @@ int __weak cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) { void __iomem *vaddr = NULL; + int size; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cpc_reg *reg = ®_res->cpc_entry.reg; @@ -990,7 +1000,7 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) *val = 0; if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { - u32 width = 8 << (reg->access_width - 1); + u32 width = GET_BIT_WIDTH(reg); u32 val_u32; acpi_status status; @@ -1014,7 +1024,9 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) return acpi_os_read_memory((acpi_physical_address)reg->address, val, reg->bit_width); - switch (reg->bit_width) { + size = GET_BIT_WIDTH(reg); + + switch (size) { case 8: *val = readb_relaxed(vaddr); break; @@ -1033,18 +1045,22 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) return -EFAULT; } + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + *val = MASK_VAL(reg, *val); + return 0; } static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) { int ret_val = 0; + int size; void __iomem *vaddr = NULL; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cpc_reg *reg = ®_res->cpc_entry.reg; if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { - u32 width = 8 << (reg->access_width - 1); + u32 width = GET_BIT_WIDTH(reg); acpi_status status; status = acpi_os_write_port((acpi_io_address)reg->address, @@ -1066,7 +1082,12 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) return acpi_os_write_memory((acpi_physical_address)reg->address, val, reg->bit_width); - switch (reg->bit_width) { + size = GET_BIT_WIDTH(reg); + + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + val = MASK_VAL(reg, val); + + switch (size) { case 8: writeb_relaxed(val, vaddr); break; From 0f708a7e0abdceaaa35dc1844020e12a1153de53 Mon Sep 17 00:00:00 2001 From: Jarred White Date: Mon, 8 Apr 2024 22:23:09 -0700 Subject: [PATCH 514/553] ACPI: CPPC: Fix bit_offset shift in MASK_VAL() macro commit 05d92ee782eeb7b939bdd0189e6efcab9195bf95 upstream. Commit 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") neglected to properly wrap the bit_offset shift when it comes to applying the mask. This may cause incorrect values to be read and may cause the cpufreq module not be loaded. [ 11.059751] cpu_capacity: CPU0 missing/invalid highest performance. [ 11.066005] cpu_capacity: partial information: fallback to 1024 for all CPUs Also, corrected the bitmask generation in GENMASK (extra bit being added). Fixes: 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") Signed-off-by: Jarred White Cc: 5.15+ # 5.15+ Reviewed-by: Vanshidhar Konda Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/cppc_acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index c123fdbca693..f153f7d3e6b7 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -167,8 +167,8 @@ show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, wraparound_time); #define GET_BIT_WIDTH(reg) ((reg)->access_width ? (8 << ((reg)->access_width - 1)) : (reg)->bit_width) /* Shift and apply the mask for CPC reads/writes */ -#define MASK_VAL(reg, val) ((val) >> ((reg)->bit_offset & \ - GENMASK(((reg)->bit_width), 0))) +#define MASK_VAL(reg, val) (((val) >> (reg)->bit_offset) & \ + GENMASK(((reg)->bit_width) - 1, 0)) static ssize_t show_feedback_ctrs(struct kobject *kobj, struct kobj_attribute *attr, char *buf) From ba234a54ee56e5b23e763551e28fc220072d19c8 Mon Sep 17 00:00:00 2001 From: Vanshidhar Konda Date: Thu, 11 Apr 2024 16:18:44 -0700 Subject: [PATCH 515/553] ACPI: CPPC: Fix access width used for PCC registers commit f489c948028b69cea235d9c0de1cc10eeb26a172 upstream. commit 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") modified cpc_read()/cpc_write() to use access_width to read CPC registers. However, for PCC registers the access width field in the ACPI register macro specifies the PCC subspace ID. For non-zero PCC subspace ID it is incorrectly treated as access width. This causes errors when reading from PCC registers in the CPPC driver. For PCC registers, base the size of read/write on the bit width field. The debug message in cpc_read()/cpc_write() is updated to print relevant information for the address space type used to read the register. Fixes: 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") Signed-off-by: Vanshidhar Konda Tested-by: Jarred White Reviewed-by: Jarred White Reviewed-by: Easwar Hariharan Cc: 5.15+ # 5.15+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/cppc_acpi.c | 53 ++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index f153f7d3e6b7..49339f37d940 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -998,14 +998,14 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) } *val = 0; + size = GET_BIT_WIDTH(reg); if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { - u32 width = GET_BIT_WIDTH(reg); u32 val_u32; acpi_status status; status = acpi_os_read_port((acpi_io_address)reg->address, - &val_u32, width); + &val_u32, size); if (ACPI_FAILURE(status)) { pr_debug("Error: Failed to read SystemIO port %llx\n", reg->address); @@ -1014,17 +1014,22 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) *val = val_u32; return 0; - } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) { + /* + * For registers in PCC space, the register size is determined + * by the bit width field; the access size is used to indicate + * the PCC subspace id. + */ + size = reg->bit_width; vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); + } else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) vaddr = reg_res->sys_mem_vaddr; else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) return cpc_read_ffh(cpu, reg, val); else return acpi_os_read_memory((acpi_physical_address)reg->address, - val, reg->bit_width); - - size = GET_BIT_WIDTH(reg); + val, size); switch (size) { case 8: @@ -1040,8 +1045,13 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) *val = readq_relaxed(vaddr); break; default: - pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n", - reg->bit_width, pcc_ss_id); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + pr_debug("Error: Cannot read %u bit width from system memory: 0x%llx\n", + size, reg->address); + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { + pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n", + size, pcc_ss_id); + } return -EFAULT; } @@ -1059,12 +1069,13 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cpc_reg *reg = ®_res->cpc_entry.reg; + size = GET_BIT_WIDTH(reg); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { - u32 width = GET_BIT_WIDTH(reg); acpi_status status; status = acpi_os_write_port((acpi_io_address)reg->address, - (u32)val, width); + (u32)val, size); if (ACPI_FAILURE(status)) { pr_debug("Error: Failed to write SystemIO port %llx\n", reg->address); @@ -1072,17 +1083,22 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) } return 0; - } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) { + /* + * For registers in PCC space, the register size is determined + * by the bit width field; the access size is used to indicate + * the PCC subspace id. + */ + size = reg->bit_width; vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); + } else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) vaddr = reg_res->sys_mem_vaddr; else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) return cpc_write_ffh(cpu, reg, val); else return acpi_os_write_memory((acpi_physical_address)reg->address, - val, reg->bit_width); - - size = GET_BIT_WIDTH(reg); + val, size); if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) val = MASK_VAL(reg, val); @@ -1101,8 +1117,13 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) writeq_relaxed(val, vaddr); break; default: - pr_debug("Error: Cannot write %u bit width to PCC for ss: %d\n", - reg->bit_width, pcc_ss_id); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + pr_debug("Error: Cannot write %u bit width to system memory: 0x%llx\n", + size, reg->address); + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { + pr_debug("Error: Cannot write %u bit width to PCC for ss: %d\n", + size, pcc_ss_id); + } ret_val = -EFAULT; break; } From 8e2c583c268003ce7241bb7cd839954273de4a5b Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 23 Apr 2024 11:13:03 -0700 Subject: [PATCH 516/553] ethernet: Add helper for assigning packet type when dest address does not match device address commit 6e159fd653d7ebf6290358e0330a0cb8a75cf73b upstream. Enable reuse of logic in eth_type_trans for determining packet type. Suggested-by: Sabrina Dubroca Cc: stable@vger.kernel.org Signed-off-by: Rahul Rameshbabu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-3-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/linux/etherdevice.h | 25 +++++++++++++++++++++++++ net/ethernet/eth.c | 12 +----------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index a541f0c4f146..d7eef2158667 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -593,6 +593,31 @@ static inline void eth_hw_addr_gen(struct net_device *dev, const u8 *base_addr, eth_hw_addr_set(dev, addr); } +/** + * eth_skb_pkt_type - Assign packet type if destination address does not match + * @skb: Assigned a packet type if address does not match @dev address + * @dev: Network device used to compare packet address against + * + * If the destination MAC address of the packet does not match the network + * device address, assign an appropriate packet type. + */ +static inline void eth_skb_pkt_type(struct sk_buff *skb, + const struct net_device *dev) +{ + const struct ethhdr *eth = eth_hdr(skb); + + if (unlikely(!ether_addr_equal_64bits(eth->h_dest, dev->dev_addr))) { + if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { + if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + } else { + skb->pkt_type = PACKET_OTHERHOST; + } + } +} + /** * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame * @skb: Buffer to pad diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index e02daa74e833..5ba7b460cbf7 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -164,17 +164,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) eth = (struct ethhdr *)skb->data; skb_pull_inline(skb, ETH_HLEN); - if (unlikely(!ether_addr_equal_64bits(eth->h_dest, - dev->dev_addr))) { - if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { - if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) - skb->pkt_type = PACKET_BROADCAST; - else - skb->pkt_type = PACKET_MULTICAST; - } else { - skb->pkt_type = PACKET_OTHERHOST; - } - } + eth_skb_pkt_type(skb, dev); /* * Some variants of DSA tagging don't have an ethertype field From c35fc180715d4756e231da326e5295fe8541ddeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20M=C3=BCnster?= Date: Wed, 24 Apr 2024 15:51:52 +0200 Subject: [PATCH 517/553] net: b44: set pause params only when interface is up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e3eb7dd47bd4806f00e104eb6da092c435f9fb21 upstream. b44_free_rings() accesses b44::rx_buffers (and ::tx_buffers) unconditionally, but b44::rx_buffers is only valid when the device is up (they get allocated in b44_open(), and deallocated again in b44_close()), any other time these are just a NULL pointers. So if you try to change the pause params while the network interface is disabled/administratively down, everything explodes (which likely netifd tries to do). Link: https://github.com/openwrt/openwrt/issues/13789 Fixes: 1da177e4c3f4 (Linux-2.6.12-rc2) Cc: stable@vger.kernel.org Reported-by: Peter Münster Suggested-by: Jonas Gorski Signed-off-by: Vaclav Svoboda Tested-by: Peter Münster Reviewed-by: Andrew Lunn Signed-off-by: Peter Münster Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/87y192oolj.fsf@a16n.net Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/b44.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 7f876721596c..5b6209f5a801 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2033,12 +2033,14 @@ static int b44_set_pauseparam(struct net_device *dev, bp->flags |= B44_FLAG_TX_PAUSE; else bp->flags &= ~B44_FLAG_TX_PAUSE; - if (bp->flags & B44_FLAG_PAUSE_AUTO) { - b44_halt(bp); - b44_init_rings(bp); - b44_init_hw(bp, B44_FULL_RESET); - } else { - __b44_set_flow_ctrl(bp, bp->flags); + if (netif_running(dev)) { + if (bp->flags & B44_FLAG_PAUSE_AUTO) { + b44_halt(bp); + b44_init_rings(bp); + b44_init_hw(bp, B44_FULL_RESET); + } else { + __b44_set_flow_ctrl(bp, bp->flags); + } } spin_unlock_irq(&bp->lock); From d0205d6e0a5a3bfa25225b027bca0f70cbd7fdcf Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 18 Apr 2024 16:11:33 +0200 Subject: [PATCH 518/553] stackdepot: respect __GFP_NOLOCKDEP allocation flag commit 6fe60465e1d53ea321ee909be26d97529e8f746c upstream. If stack_depot_save_flags() allocates memory it always drops __GFP_NOLOCKDEP flag. So when KASAN tries to track __GFP_NOLOCKDEP allocation we may end up with lockdep splat like bellow: ====================================================== WARNING: possible circular locking dependency detected 6.9.0-rc3+ #49 Not tainted ------------------------------------------------------ kswapd0/149 is trying to acquire lock: ffff88811346a920 (&xfs_nondir_ilock_class){++++}-{4:4}, at: xfs_reclaim_inode+0x3ac/0x590 [xfs] but task is already holding lock: ffffffff8bb33100 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0x5d9/0xad0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (fs_reclaim){+.+.}-{0:0}: __lock_acquire+0x7da/0x1030 lock_acquire+0x15d/0x400 fs_reclaim_acquire+0xb5/0x100 prepare_alloc_pages.constprop.0+0xc5/0x230 __alloc_pages+0x12a/0x3f0 alloc_pages_mpol+0x175/0x340 stack_depot_save_flags+0x4c5/0x510 kasan_save_stack+0x30/0x40 kasan_save_track+0x10/0x30 __kasan_slab_alloc+0x83/0x90 kmem_cache_alloc+0x15e/0x4a0 __alloc_object+0x35/0x370 __create_object+0x22/0x90 __kmalloc_node_track_caller+0x477/0x5b0 krealloc+0x5f/0x110 xfs_iext_insert_raw+0x4b2/0x6e0 [xfs] xfs_iext_insert+0x2e/0x130 [xfs] xfs_iread_bmbt_block+0x1a9/0x4d0 [xfs] xfs_btree_visit_block+0xfb/0x290 [xfs] xfs_btree_visit_blocks+0x215/0x2c0 [xfs] xfs_iread_extents+0x1a2/0x2e0 [xfs] xfs_buffered_write_iomap_begin+0x376/0x10a0 [xfs] iomap_iter+0x1d1/0x2d0 iomap_file_buffered_write+0x120/0x1a0 xfs_file_buffered_write+0x128/0x4b0 [xfs] vfs_write+0x675/0x890 ksys_write+0xc3/0x160 do_syscall_64+0x94/0x170 entry_SYSCALL_64_after_hwframe+0x71/0x79 Always preserve __GFP_NOLOCKDEP to fix this. Link: https://lkml.kernel.org/r/20240418141133.22950-1-ryabinin.a.a@gmail.com Fixes: cd11016e5f52 ("mm, kasan: stackdepot implementation. Enable stackdepot for SLAB") Signed-off-by: Andrey Ryabinin Reported-by: Xiubo Li Closes: https://lore.kernel.org/all/a0caa289-ca02-48eb-9bf2-d86fd47b71f4@redhat.com/ Reported-by: Damien Le Moal Closes: https://lore.kernel.org/all/f9ff999a-e170-b66b-7caf-293f2b147ac2@opensource.wdc.com/ Suggested-by: Dave Chinner Tested-by: Xiubo Li Cc: Christoph Hellwig Cc: Alexander Potapenko Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- lib/stackdepot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 79e894cf8406..77eb944b7a6b 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -466,10 +466,10 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic - * contexts and I/O. + * contexts, I/O, nolockdep. */ alloc_flags &= ~GFP_ZONEMASK; - alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); + alloc_flags &= (GFP_ATOMIC | GFP_KERNEL | __GFP_NOLOCKDEP); alloc_flags |= __GFP_NOWARN; page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER); if (page) From e3f0519da4d77e339314a6477b8bf89836812765 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Tue, 23 Apr 2024 13:50:53 +0200 Subject: [PATCH 519/553] fbdev: fix incorrect address computation in deferred IO commit 78d9161d2bcd442d93d917339297ffa057dbee8c upstream. With deferred IO enabled, a page fault happens when data is written to the framebuffer device. Then driver determines which page is being updated by calculating the offset of the written virtual address within the virtual memory area, and uses this offset to get the updated page within the internal buffer. This page is later copied to hardware (thus the name "deferred IO"). This offset calculation is only correct if the virtual memory area is mapped to the beginning of the internal buffer. Otherwise this is wrong. For example, if users do: mmap(ptr, 4096, PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, 0xff000); Then the virtual memory area will mapped at offset 0xff000 within the internal buffer. This offset 0xff000 is not accounted for, and wrong page is updated. Correct the calculation by using vmf->pgoff instead. With this change, the variable "offset" will no longer hold the exact offset value, but it is rounded down to multiples of PAGE_SIZE. But this is still correct, because this variable is only used to calculate the page offset. Reported-by: Harshit Mogalapalli Closes: https://lore.kernel.org/linux-fbdev/271372d6-e665-4e7f-b088-dee5f4ab341a@oracle.com Fixes: 56c134f7f1b5 ("fbdev: Track deferred-I/O pages in pageref struct") Cc: Signed-off-by: Nam Cao Reviewed-by: Thomas Zimmermann Tested-by: Harshit Mogalapalli Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240423115053.4490-1-namcao@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/fb_defio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index 49883c8012e6..3b376345d4d4 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -200,7 +200,7 @@ err_mutex_unlock: */ static vm_fault_t fb_deferred_io_page_mkwrite(struct fb_info *info, struct vm_fault *vmf) { - unsigned long offset = vmf->address - vmf->vma->vm_start; + unsigned long offset = vmf->pgoff << PAGE_SHIFT; struct page *page = vmf->page; file_update_time(vmf->vma->vm_file); From 4ebf1ff60e53ebbb0985d3ead8a454d6a8fad8cd Mon Sep 17 00:00:00 2001 From: Yick Xie Date: Fri, 19 Apr 2024 01:06:10 +0800 Subject: [PATCH 520/553] udp: preserve the connected status if only UDP cmsg commit 680d11f6e5427b6af1321932286722d24a8b16c1 upstream. If "udp_cmsg_send()" returned 0 (i.e. only UDP cmsg), "connected" should not be set to 0. Otherwise it stops the connected socket from using the cached route. Fixes: 2e8de8576343 ("udp: add gso segment cmsg") Signed-off-by: Yick Xie Cc: stable@vger.kernel.org Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240418170610.867084-1-yick.xie@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp.c | 5 +++-- net/ipv6/udp.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2a78c78186c3..39fae7581d35 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1141,16 +1141,17 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = udp_cmsg_send(sk, msg, &ipc.gso_size); - if (err > 0) + if (err > 0) { err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6); + connected = 0; + } if (unlikely(err < 0)) { kfree(ipc.opt); return err; } if (ipc.opt) free = 1; - connected = 0; } if (!ipc.opt) { struct ip_options_rcu *inet_opt; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1775e9b9b85a..504ea27d08fb 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1493,9 +1493,11 @@ do_udp_sendmsg: ipc6.opt = opt; err = udp_cmsg_send(sk, msg, &ipc6.gso_size); - if (err > 0) + if (err > 0) { err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6, &ipc6); + connected = false; + } if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1507,7 +1509,6 @@ do_udp_sendmsg: } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; - connected = false; } if (!opt) { opt = txopt_get(np); From ad371d69a6785f886b7240bad2a7a993b7e34040 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Apr 2024 16:30:04 +0200 Subject: [PATCH 521/553] mtd: diskonchip: work around ubsan link failure commit 21c9fb611c25d5cd038f6fe485232e7884bb0b3d upstream. I ran into a randconfig build failure with UBSAN using gcc-13.2: arm-linux-gnueabi-ld: error: unplaced orphan section `.bss..Lubsan_data31' from `drivers/mtd/nand/raw/diskonchip.o' I'm not entirely sure what is going on here, but I suspect this has something to do with the check for the end of the doc_locations[] array that contains an (unsigned long)0xffffffff element, which is compared against the signed (int)0xffffffff. If this is the case, we should get a runtime check for undefined behavior, but we instead get an unexpected build-time error. I would have expected this to work fine on 32-bit architectures despite the signed integer overflow, though on 64-bit architectures this likely won't ever work. Changing the contition to instead check for the size of the array makes the code safe everywhere and avoids the ubsan check that leads to the link error. The loop code goes back to before 2.6.12. Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240405143015.717429-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/raw/diskonchip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c index 5d2ddb037a9a..2068025d5639 100644 --- a/drivers/mtd/nand/raw/diskonchip.c +++ b/drivers/mtd/nand/raw/diskonchip.c @@ -53,7 +53,7 @@ static unsigned long doc_locations[] __initdata = { 0xe8000, 0xea000, 0xec000, 0xee000, #endif #endif - 0xffffffff }; +}; static struct mtd_info *doclist = NULL; @@ -1552,7 +1552,7 @@ static int __init init_nanddoc(void) if (ret < 0) return ret; } else { - for (i = 0; (doc_locations[i] != 0xffffffff); i++) { + for (i = 0; i < ARRAY_SIZE(doc_locations); i++) { doc_probe(doc_locations[i]); } } From 2bd852307fdcefe474ff59730aec3f397f1585ae Mon Sep 17 00:00:00 2001 From: Aswin Unnikrishnan Date: Fri, 19 Apr 2024 21:50:13 +0000 Subject: [PATCH 522/553] rust: remove `params` from `module` macro example commit 19843452dca40e28d6d3f4793d998b681d505c7f upstream. Remove argument `params` from the `module` macro example, because the macro does not currently support module parameters since it was not sent with the initial merge. Signed-off-by: Aswin Unnikrishnan Reviewed-by: Alice Ryhl Cc: stable@vger.kernel.org Fixes: 1fbde52bde73 ("rust: add `macros` crate") Link: https://lore.kernel.org/r/20240419215015.157258-1-aswinunni01@gmail.com [ Reworded slightly. ] Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- rust/macros/lib.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs index 91764bfb1f89..f2efa86a747a 100644 --- a/rust/macros/lib.rs +++ b/rust/macros/lib.rs @@ -27,18 +27,6 @@ use proc_macro::TokenStream; /// author: b"Rust for Linux Contributors", /// description: b"My very own kernel module!", /// license: b"GPL", -/// params: { -/// my_i32: i32 { -/// default: 42, -/// permissions: 0o000, -/// description: b"Example of i32", -/// }, -/// writeable_i32: i32 { -/// default: 42, -/// permissions: 0o644, -/// description: b"Example of i32", -/// }, -/// }, /// } /// /// struct MyModule; From a8e8c79ed2eb195e04b8119b7d83c4ddcccf9739 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 24 Apr 2024 11:20:35 +0300 Subject: [PATCH 523/553] x86/tdx: Preserve shared bit on mprotect() commit a0a8d15a798be4b8f20aca2ba91bf6b688c6a640 upstream. The TDX guest platform takes one bit from the physical address to indicate if the page is shared (accessible by VMM). This bit is not part of the physical_mask and is not preserved during mprotect(). As a result, the 'shared' bit is lost during mprotect() on shared mappings. _COMMON_PAGE_CHG_MASK specifies which PTE bits need to be preserved during modification. AMD includes 'sme_me_mask' in the define to preserve the 'encrypt' bit. To cover both Intel and AMD cases, include 'cc_mask' in _COMMON_PAGE_CHG_MASK instead of 'sme_me_mask'. Reported-and-tested-by: Chris Oo Fixes: 41394e33f3a0 ("x86/tdx: Extend the confidential computing API to support TDX guests") Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Rick Edgecombe Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Tom Lendacky Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240424082035.4092071-1-kirill.shutemov%40linux.intel.com Signed-off-by: Kirill A. Shutemov Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/coco.h | 5 ++++- arch/x86/include/asm/pgtable_types.h | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 1f97d00ad858..100a752c33bb 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -13,9 +13,10 @@ enum cc_vendor { }; extern enum cc_vendor cc_vendor; -extern u64 cc_mask; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM +extern u64 cc_mask; + static inline void cc_set_mask(u64 mask) { RIP_REL_REF(cc_mask) = mask; @@ -25,6 +26,8 @@ u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); void cc_random_init(void); #else +static const u64 cc_mask = 0; + static inline u64 cc_mkenc(u64 val) { return val; diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index f6116b66f289..f0b9b37c4609 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -127,7 +127,7 @@ */ #define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |\ - _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \ + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_CC | \ _PAGE_UFFD_WP) #define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT) #define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE) @@ -153,6 +153,7 @@ enum page_cache_mode { }; #endif +#define _PAGE_CC (_AT(pteval_t, cc_mask)) #define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) #define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT) From 56bce3fcf8471f93bfb00d4d9e08fa3d18c82189 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Mar 2024 14:21:07 +0100 Subject: [PATCH 524/553] dmaengine: owl: fix register access functions [ Upstream commit 43c633ef93a5d293c96ebcedb40130df13128428 ] When building with 'make W=1', clang notices that the computed register values are never actually written back but instead the wrong variable is set: drivers/dma/owl-dma.c:244:6: error: variable 'regval' set but not used [-Werror,-Wunused-but-set-variable] 244 | u32 regval; | ^ drivers/dma/owl-dma.c:268:6: error: variable 'regval' set but not used [-Werror,-Wunused-but-set-variable] 268 | u32 regval; | ^ Change these to what was most likely intended. Fixes: 47e20577c24d ("dmaengine: Add Actions Semi Owl family S900 DMA driver") Signed-off-by: Arnd Bergmann Reviewed-by: Peter Korsgaard Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240322132116.906475-1-arnd@kernel.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/owl-dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c index b6e0ac8314e5..0819f19c87cc 100644 --- a/drivers/dma/owl-dma.c +++ b/drivers/dma/owl-dma.c @@ -249,7 +249,7 @@ static void pchan_update(struct owl_dma_pchan *pchan, u32 reg, else regval &= ~val; - writel(val, pchan->base + reg); + writel(regval, pchan->base + reg); } static void pchan_writel(struct owl_dma_pchan *pchan, u32 reg, u32 data) @@ -273,7 +273,7 @@ static void dma_update(struct owl_dma *od, u32 reg, u32 val, bool state) else regval &= ~val; - writel(val, od->base + reg); + writel(regval, od->base + reg); } static void dma_writel(struct owl_dma *od, u32 reg, u32 data) From 33d8e3e5f36468c30b610e810169dc2724b06013 Mon Sep 17 00:00:00 2001 From: Akhil R Date: Fri, 15 Mar 2024 18:14:11 +0530 Subject: [PATCH 525/553] dmaengine: tegra186: Fix residual calculation [ Upstream commit 30f0ced9971b2d8c8c24ae75786f9079489a012d ] The existing residual calculation returns an incorrect value when bytes_xfer == bytes_req. This scenario occurs particularly with drivers like UART where DMA is scheduled for maximum number of bytes and is terminated when the bytes inflow stops. At higher baud rates, it could request the tx_status while there is no bytes left to transfer. This will lead to incorrect residual being set. Hence return residual as '0' when bytes transferred equals to the bytes requested. Fixes: ee17028009d4 ("dmaengine: tegra: Add tegra gpcdma driver") Signed-off-by: Akhil R Reviewed-by: Jon Hunter Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20240315124411.17582-1-akhilrajeev@nvidia.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/tegra186-gpc-dma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/tegra186-gpc-dma.c b/drivers/dma/tegra186-gpc-dma.c index 75af3488a3ba..e70b7c41dcab 100644 --- a/drivers/dma/tegra186-gpc-dma.c +++ b/drivers/dma/tegra186-gpc-dma.c @@ -742,6 +742,9 @@ static int tegra_dma_get_residual(struct tegra_dma_channel *tdc) bytes_xfer = dma_desc->bytes_xfer + sg_req[dma_desc->sg_idx].len - (wcount * 4); + if (dma_desc->bytes_req == bytes_xfer) + return 0; + residual = dma_desc->bytes_req - (bytes_xfer % dma_desc->bytes_req); return residual; From 2203a447fd1e71a9b92590db7fc942a7df92b031 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 21 Mar 2024 14:04:21 +0200 Subject: [PATCH 526/553] idma64: Don't try to serve interrupts when device is powered off [ Upstream commit 9140ce47872bfd89fca888c2f992faa51d20c2bc ] When iDMA 64-bit device is powered off, the IRQ status register is all 1:s. This is never happen in real case and signalling that the device is simply powered off. Don't try to serve interrupts that are not ours. Fixes: 667dfed98615 ("dmaengine: add a driver for Intel integrated DMA 64-bit") Reported-by: Heiner Kallweit Closes: https://lore.kernel.org/r/700bbb84-90e1-4505-8ff0-3f17ea8bc631@gmail.com Tested-by: Heiner Kallweit Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240321120453.1360138-1-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/idma64.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c index f4c07ad3be15..af8777a1ec2e 100644 --- a/drivers/dma/idma64.c +++ b/drivers/dma/idma64.c @@ -167,6 +167,10 @@ static irqreturn_t idma64_irq(int irq, void *dev) u32 status_err; unsigned short i; + /* Since IRQ may be shared, check if DMA controller is powered on */ + if (status == GENMASK(31, 0)) + return IRQ_NONE; + dev_vdbg(idma64->dma.dev, "%s: status=%#x\n", __func__, status); /* Check if we have any interrupt from the DMA controller */ From 976df695f579bbb2914114b4e9974fe4ed1eb813 Mon Sep 17 00:00:00 2001 From: Mikhail Kobuk Date: Thu, 21 Mar 2024 19:47:30 +0300 Subject: [PATCH 527/553] phy: marvell: a3700-comphy: Fix out of bounds read [ Upstream commit e4308bc22b9d46cf33165c9dfaeebcf29cd56f04 ] There is an out of bounds read access of 'gbe_phy_init_fix[fix_idx].addr' every iteration after 'fix_idx' reaches 'ARRAY_SIZE(gbe_phy_init_fix)'. Make sure 'gbe_phy_init[addr]' is used when all elements of 'gbe_phy_init_fix' array are handled. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 934337080c6c ("phy: marvell: phy-mvebu-a3700-comphy: Add native kernel implementation") Signed-off-by: Mikhail Kobuk Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/20240321164734.49273-1-m.kobuk@ispras.ru Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/marvell/phy-mvebu-a3700-comphy.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c index d641b345afa3..392a8ae1bc66 100644 --- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c @@ -610,11 +610,12 @@ static void comphy_gbe_phy_init(struct mvebu_a3700_comphy_lane *lane, * comparison to 3.125 Gbps values. These register values are * stored in "gbe_phy_init_fix" array. */ - if (!is_1gbps && gbe_phy_init_fix[fix_idx].addr == addr) { + if (!is_1gbps && + fix_idx < ARRAY_SIZE(gbe_phy_init_fix) && + gbe_phy_init_fix[fix_idx].addr == addr) { /* Use new value */ val = gbe_phy_init_fix[fix_idx].value; - if (fix_idx < ARRAY_SIZE(gbe_phy_init_fix)) - fix_idx++; + fix_idx++; } else { val = gbe_phy_init[addr]; } From d6a6bacd0a118db9a02d1f8e54182a2ec1b13e36 Mon Sep 17 00:00:00 2001 From: Mikhail Kobuk Date: Thu, 21 Mar 2024 19:47:31 +0300 Subject: [PATCH 528/553] phy: marvell: a3700-comphy: Fix hardcoded array size [ Upstream commit 627207703b73615653eea5ab7a841d5b478d961e ] Replace hardcoded 'gbe_phy_init' array size by explicit one. Fixes: 934337080c6c ("phy: marvell: phy-mvebu-a3700-comphy: Add native kernel implementation") Signed-off-by: Mikhail Kobuk Link: https://lore.kernel.org/r/20240321164734.49273-2-m.kobuk@ispras.ru Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/marvell/phy-mvebu-a3700-comphy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c index 392a8ae1bc66..251e1aedd4a6 100644 --- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c @@ -602,7 +602,7 @@ static void comphy_gbe_phy_init(struct mvebu_a3700_comphy_lane *lane, u16 val; fix_idx = 0; - for (addr = 0; addr < 512; addr++) { + for (addr = 0; addr < ARRAY_SIZE(gbe_phy_init); addr++) { /* * All PHY register values are defined in full for 3.125Gbps * SERDES speed. The values required for 1.25 Gbps are almost From ed4b981b1d8671b2aabbf8cf128ba8829471c6b6 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Thu, 13 Oct 2022 09:47:01 +0800 Subject: [PATCH 529/553] phy: freescale: imx8m-pcie: Refine i.MX8MM PCIe PHY driver [ Upstream commit ca679c49c4463595499a053ba94328acb574fffa ] To make it more flexible and easy to expand. Refine i.MX8MM PCIe PHY driver. - Use gpr compatible string to avoid the codes duplications when add another platform PCIe PHY support. - Re-arrange the codes to let it more flexible and easy to expand. No functional change. Re-arrange the TX tuning, since internal registers can be wrote through APB interface before assertion of CMN_RST. Signed-off-by: Richard Zhu Signed-off-by: Lucas Stach Tested-by: Marek Vasut Tested-by: Richard Leitner Tested-by: Alexander Stein Reviewed-by: Lucas Stach Reviewed-by: Ahmad Fatoum Link: https://lore.kernel.org/r/1665625622-20551-4-git-send-email-hongxing.zhu@nxp.com Signed-off-by: Vinod Koul Stable-dep-of: 3a161017f1de ("phy: freescale: imx8m-pcie: fix pcie link-up instability") Signed-off-by: Sasha Levin --- drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 106 +++++++++++++-------- 1 file changed, 66 insertions(+), 40 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c index c93286483b42..f1476936b8d9 100644 --- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,15 @@ #define IMX8MM_GPR_PCIE_SSC_EN BIT(16) #define IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE BIT(9) +enum imx8_pcie_phy_type { + IMX8MM, +}; + +struct imx8_pcie_phy_drvdata { + const char *gpr; + enum imx8_pcie_phy_type variant; +}; + struct imx8_pcie_phy { void __iomem *base; struct clk *clk; @@ -57,6 +67,7 @@ struct imx8_pcie_phy { u32 tx_deemph_gen1; u32 tx_deemph_gen2; bool clkreq_unused; + const struct imx8_pcie_phy_drvdata *drvdata; }; static int imx8_pcie_phy_power_on(struct phy *phy) @@ -68,31 +79,17 @@ static int imx8_pcie_phy_power_on(struct phy *phy) reset_control_assert(imx8_phy->reset); pad_mode = imx8_phy->refclk_pad_mode; - /* Set AUX_EN_OVERRIDE 1'b0, when the CLKREQ# isn't hooked */ - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE, - imx8_phy->clkreq_unused ? - 0 : IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE); - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_AUX_EN, - IMX8MM_GPR_PCIE_AUX_EN); - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_POWER_OFF, 0); - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_SSC_EN, 0); - - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_REF_CLK_SEL, - pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT ? - IMX8MM_GPR_PCIE_REF_CLK_EXT : - IMX8MM_GPR_PCIE_REF_CLK_PLL); - usleep_range(100, 200); - - /* Do the PHY common block reset */ - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_CMN_RST, - IMX8MM_GPR_PCIE_CMN_RST); - usleep_range(200, 500); + switch (imx8_phy->drvdata->variant) { + case IMX8MM: + /* Tune PHY de-emphasis setting to pass PCIe compliance. */ + if (imx8_phy->tx_deemph_gen1) + writel(imx8_phy->tx_deemph_gen1, + imx8_phy->base + PCIE_PHY_TRSV_REG5); + if (imx8_phy->tx_deemph_gen2) + writel(imx8_phy->tx_deemph_gen2, + imx8_phy->base + PCIE_PHY_TRSV_REG6); + break; + } if (pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT || pad_mode == IMX8_PCIE_REFCLK_PAD_UNUSED) { @@ -120,15 +117,37 @@ static int imx8_pcie_phy_power_on(struct phy *phy) imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG065); } - /* Tune PHY de-emphasis setting to pass PCIe compliance. */ - if (imx8_phy->tx_deemph_gen1) - writel(imx8_phy->tx_deemph_gen1, - imx8_phy->base + PCIE_PHY_TRSV_REG5); - if (imx8_phy->tx_deemph_gen2) - writel(imx8_phy->tx_deemph_gen2, - imx8_phy->base + PCIE_PHY_TRSV_REG6); + /* Set AUX_EN_OVERRIDE 1'b0, when the CLKREQ# isn't hooked */ + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE, + imx8_phy->clkreq_unused ? + 0 : IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE); + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_AUX_EN, + IMX8MM_GPR_PCIE_AUX_EN); + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_POWER_OFF, 0); + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_SSC_EN, 0); - reset_control_deassert(imx8_phy->reset); + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_REF_CLK_SEL, + pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT ? + IMX8MM_GPR_PCIE_REF_CLK_EXT : + IMX8MM_GPR_PCIE_REF_CLK_PLL); + usleep_range(100, 200); + + /* Do the PHY common block reset */ + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_CMN_RST, + IMX8MM_GPR_PCIE_CMN_RST); + + switch (imx8_phy->drvdata->variant) { + case IMX8MM: + reset_control_deassert(imx8_phy->reset); + usleep_range(200, 500); + break; + } /* Polling to check the phy is ready or not. */ ret = readl_poll_timeout(imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG75, @@ -160,6 +179,17 @@ static const struct phy_ops imx8_pcie_phy_ops = { .owner = THIS_MODULE, }; +static const struct imx8_pcie_phy_drvdata imx8mm_drvdata = { + .gpr = "fsl,imx8mm-iomuxc-gpr", + .variant = IMX8MM, +}; + +static const struct of_device_id imx8_pcie_phy_of_match[] = { + {.compatible = "fsl,imx8mm-pcie-phy", .data = &imx8mm_drvdata, }, + { }, +}; +MODULE_DEVICE_TABLE(of, imx8_pcie_phy_of_match); + static int imx8_pcie_phy_probe(struct platform_device *pdev) { struct phy_provider *phy_provider; @@ -172,6 +202,8 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev) if (!imx8_phy) return -ENOMEM; + imx8_phy->drvdata = of_device_get_match_data(dev); + /* get PHY refclk pad mode */ of_property_read_u32(np, "fsl,refclk-pad-mode", &imx8_phy->refclk_pad_mode); @@ -197,7 +229,7 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev) /* Grab GPR config register range */ imx8_phy->iomuxc_gpr = - syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr"); + syscon_regmap_lookup_by_compatible(imx8_phy->drvdata->gpr); if (IS_ERR(imx8_phy->iomuxc_gpr)) { dev_err(dev, "unable to find iomuxc registers\n"); return PTR_ERR(imx8_phy->iomuxc_gpr); @@ -225,12 +257,6 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev) return PTR_ERR_OR_ZERO(phy_provider); } -static const struct of_device_id imx8_pcie_phy_of_match[] = { - {.compatible = "fsl,imx8mm-pcie-phy",}, - { }, -}; -MODULE_DEVICE_TABLE(of, imx8_pcie_phy_of_match); - static struct platform_driver imx8_pcie_phy_driver = { .probe = imx8_pcie_phy_probe, .driver = { From 4723dfe76de0c91ab70bda148b5c2af664a46c80 Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Fri, 22 Mar 2024 14:06:32 +0100 Subject: [PATCH 530/553] phy: freescale: imx8m-pcie: fix pcie link-up instability [ Upstream commit 3a161017f1de55cc48be81f6156004c151f32677 ] Leaving AUX_PLL_REFCLK_SEL at its reset default of AUX_IN (PLL clock) proves to be more stable on the i.MX 8M Mini. Fixes: 1aa97b002258 ("phy: freescale: pcie: Initialize the imx8 pcie standalone phy driver") Signed-off-by: Marcel Ziswiler Reviewed-by: Richard Zhu Link: https://lore.kernel.org/r/20240322130646.1016630-2-marcel@ziswiler.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c index f1476936b8d9..211ce84d980f 100644 --- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c @@ -108,8 +108,10 @@ static int imx8_pcie_phy_power_on(struct phy *phy) /* Source clock from SoC internal PLL */ writel(ANA_PLL_CLK_OUT_TO_EXT_IO_SEL, imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG062); - writel(AUX_PLL_REFCLK_SEL_SYS_PLL, - imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG063); + if (imx8_phy->drvdata->variant != IMX8MM) { + writel(AUX_PLL_REFCLK_SEL_SYS_PLL, + imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG063); + } val = ANA_AUX_RX_TX_SEL_TX | ANA_AUX_TX_TERM; writel(val | ANA_AUX_RX_TERM_GND_EN, imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG064); From 199895b8b6f7a24dd4c40ef3cc678d6f41a8852a Mon Sep 17 00:00:00 2001 From: Michal Tomek Date: Thu, 4 Apr 2024 19:11:26 +0200 Subject: [PATCH 531/553] phy: rockchip-snps-pcie3: fix bifurcation on rk3588 [ Upstream commit f8020dfb311d2b6cf657668792aaa5fa8863a7dd ] So far all RK3588 boards use fully aggregated PCIe. CM3588 is one of the few boards using this feature and apparently it is broken. The PHY offers the following mapping options: port 0 lane 0 - always mapped to controller 0 (4L) port 0 lane 1 - to controller 0 or 2 (1L0) port 1 lane 0 - to controller 0 or 1 (2L) port 1 lane 1 - to controller 0, 1 or 3 (1L1) The data-lanes DT property maps these as follows: 0 = no controller (unsupported by the HW) 1 = 4L 2 = 2L 3 = 1L0 4 = 1L1 That allows the following configurations with first column being the mainline data-lane mapping, second column being the downstream name, third column being PCIE3PHY_GRF_CMN_CON0 and PHP_GRF_PCIESEL register values and final column being the user visible lane setup: <1 1 1 1> = AGGREG = [4 0] = x4 (aggregation) <1 1 2 2> = NANBNB = [0 0] = x2 x2 (no bif.) <1 3 2 2> = NANBBI = [1 1] = x2 x1x1 (bif. of port 0) <1 1 2 4> = NABINB = [2 2] = x1x1 x2 (bif. of port 1) <1 3 2 4> = NABIBI = [3 3] = x1x1 x1x1 (bif. of both ports) The driver currently does not program PHP_GRF_PCIESEL correctly, which is fixed by this patch. As a side-effect the new logic is much simpler than the old logic. Fixes: 2e9bffc4f713 ("phy: rockchip: Support PCIe v3") Signed-off-by: Michal Tomek Signed-off-by: Sebastian Reichel Acked-by: Heiko Stuebner Link: https://lore.kernel.org/r/20240404-rk3588-pcie-bifurcation-fixes-v1-1-9907136eeafd@kernel.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- .../phy/rockchip/phy-rockchip-snps-pcie3.c | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c index 1d355b32ba55..4f32a2dc2458 100644 --- a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c +++ b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c @@ -131,7 +131,7 @@ static const struct rockchip_p3phy_ops rk3568_ops = { static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv) { u32 reg = 0; - u8 mode = 0; + u8 mode = RK3588_LANE_AGGREGATION; /* default */ int ret; /* Deassert PCIe PMA output clamp mode */ @@ -139,28 +139,20 @@ static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv) /* Set bifurcation if needed */ for (int i = 0; i < priv->num_lanes; i++) { - if (!priv->lanes[i]) - mode |= (BIT(i) << 3); - if (priv->lanes[i] > 1) - mode |= (BIT(i) >> 1); - } - - if (!mode) - reg = RK3588_LANE_AGGREGATION; - else { - if (mode & (BIT(0) | BIT(1))) - reg |= RK3588_BIFURCATION_LANE_0_1; - - if (mode & (BIT(2) | BIT(3))) - reg |= RK3588_BIFURCATION_LANE_2_3; + mode &= ~RK3588_LANE_AGGREGATION; + if (priv->lanes[i] == 3) + mode |= RK3588_BIFURCATION_LANE_0_1; + if (priv->lanes[i] == 4) + mode |= RK3588_BIFURCATION_LANE_2_3; } + reg = mode; regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0, (0x7<<16) | reg); /* Set pcie1ln_sel in PHP_GRF_PCIESEL_CON */ if (!IS_ERR(priv->pipe_grf)) { - reg = (mode & (BIT(6) | BIT(7))) >> 6; + reg = mode & 3; if (reg) regmap_write(priv->pipe_grf, PHP_GRF_PCIESEL_CON, (reg << 16) | reg); From e71d5ec7c04362cdcffd51027b97f35cace4b616 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 4 Apr 2024 19:11:27 +0200 Subject: [PATCH 532/553] phy: rockchip-snps-pcie3: fix clearing PHP_GRF_PCIESEL_CON bits [ Upstream commit 55491a5fa163bf15158f34f3650b3985f25622b9 ] Currently the PCIe v3 PHY driver only sets the pcie1ln_sel bits, but does not clear them because of an incorrect write mask. This fixes up the issue by using a newly introduced constant for the write mask. While at it also introduces a proper GENMASK based constant for the PCIE30_PHY_MODE. Fixes: 2e9bffc4f713 ("phy: rockchip: Support PCIe v3") Signed-off-by: Sebastian Reichel Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20240404-rk3588-pcie-bifurcation-fixes-v1-2-9907136eeafd@kernel.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/rockchip/phy-rockchip-snps-pcie3.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c index 4f32a2dc2458..c6aa6bc69e90 100644 --- a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c +++ b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c @@ -39,6 +39,8 @@ #define RK3588_BIFURCATION_LANE_0_1 BIT(0) #define RK3588_BIFURCATION_LANE_2_3 BIT(1) #define RK3588_LANE_AGGREGATION BIT(2) +#define RK3588_PCIE1LN_SEL_EN (GENMASK(1, 0) << 16) +#define RK3588_PCIE30_PHY_MODE_EN (GENMASK(2, 0) << 16) struct rockchip_p3phy_ops; @@ -148,14 +150,15 @@ static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv) } reg = mode; - regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0, (0x7<<16) | reg); + regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0, + RK3588_PCIE30_PHY_MODE_EN | reg); /* Set pcie1ln_sel in PHP_GRF_PCIESEL_CON */ if (!IS_ERR(priv->pipe_grf)) { - reg = mode & 3; + reg = mode & (RK3588_BIFURCATION_LANE_0_1 | RK3588_BIFURCATION_LANE_2_3); if (reg) regmap_write(priv->pipe_grf, PHP_GRF_PCIESEL_CON, - (reg << 16) | reg); + RK3588_PCIE1LN_SEL_EN | reg); } reset_control_deassert(priv->p30phy); From 8bf574183282d219cfa991f7df37aad491d74c11 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 8 Mar 2024 16:00:32 -0500 Subject: [PATCH 533/553] dma: xilinx_dpdma: Fix locking [ Upstream commit 244296cc3a155199a8b080d19e645d7d49081a38 ] There are several places where either chan->lock or chan->vchan.lock was not held. Add appropriate locking. This fixes lockdep warnings like [ 31.077578] ------------[ cut here ]------------ [ 31.077831] WARNING: CPU: 2 PID: 40 at drivers/dma/xilinx/xilinx_dpdma.c:834 xilinx_dpdma_chan_queue_transfer+0x274/0x5e0 [ 31.077953] Modules linked in: [ 31.078019] CPU: 2 PID: 40 Comm: kworker/u12:1 Not tainted 6.6.20+ #98 [ 31.078102] Hardware name: xlnx,zynqmp (DT) [ 31.078169] Workqueue: events_unbound deferred_probe_work_func [ 31.078272] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 31.078377] pc : xilinx_dpdma_chan_queue_transfer+0x274/0x5e0 [ 31.078473] lr : xilinx_dpdma_chan_queue_transfer+0x270/0x5e0 [ 31.078550] sp : ffffffc083bb2e10 [ 31.078590] x29: ffffffc083bb2e10 x28: 0000000000000000 x27: ffffff880165a168 [ 31.078754] x26: ffffff880164e920 x25: ffffff880164eab8 x24: ffffff880164d480 [ 31.078920] x23: ffffff880165a148 x22: ffffff880164e988 x21: 0000000000000000 [ 31.079132] x20: ffffffc082aa3000 x19: ffffff880164e880 x18: 0000000000000000 [ 31.079295] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 [ 31.079453] x14: 0000000000000000 x13: ffffff8802263dc0 x12: 0000000000000001 [ 31.079613] x11: 0001ffc083bb2e34 x10: 0001ff880164e98f x9 : 0001ffc082aa3def [ 31.079824] x8 : 0001ffc082aa3dec x7 : 0000000000000000 x6 : 0000000000000516 [ 31.079982] x5 : ffffffc7f8d43000 x4 : ffffff88003c9c40 x3 : ffffffffffffffff [ 31.080147] x2 : ffffffc7f8d43000 x1 : 00000000000000c0 x0 : 0000000000000000 [ 31.080307] Call trace: [ 31.080340] xilinx_dpdma_chan_queue_transfer+0x274/0x5e0 [ 31.080518] xilinx_dpdma_issue_pending+0x11c/0x120 [ 31.080595] zynqmp_disp_layer_update+0x180/0x3ac [ 31.080712] zynqmp_dpsub_plane_atomic_update+0x11c/0x21c [ 31.080825] drm_atomic_helper_commit_planes+0x20c/0x684 [ 31.080951] drm_atomic_helper_commit_tail+0x5c/0xb0 [ 31.081139] commit_tail+0x234/0x294 [ 31.081246] drm_atomic_helper_commit+0x1f8/0x210 [ 31.081363] drm_atomic_commit+0x100/0x140 [ 31.081477] drm_client_modeset_commit_atomic+0x318/0x384 [ 31.081634] drm_client_modeset_commit_locked+0x8c/0x24c [ 31.081725] drm_client_modeset_commit+0x34/0x5c [ 31.081812] __drm_fb_helper_restore_fbdev_mode_unlocked+0x104/0x168 [ 31.081899] drm_fb_helper_set_par+0x50/0x70 [ 31.081971] fbcon_init+0x538/0xc48 [ 31.082047] visual_init+0x16c/0x23c [ 31.082207] do_bind_con_driver.isra.0+0x2d0/0x634 [ 31.082320] do_take_over_console+0x24c/0x33c [ 31.082429] do_fbcon_takeover+0xbc/0x1b0 [ 31.082503] fbcon_fb_registered+0x2d0/0x34c [ 31.082663] register_framebuffer+0x27c/0x38c [ 31.082767] __drm_fb_helper_initial_config_and_unlock+0x5c0/0x91c [ 31.082939] drm_fb_helper_initial_config+0x50/0x74 [ 31.083012] drm_fbdev_dma_client_hotplug+0xb8/0x108 [ 31.083115] drm_client_register+0xa0/0xf4 [ 31.083195] drm_fbdev_dma_setup+0xb0/0x1cc [ 31.083293] zynqmp_dpsub_drm_init+0x45c/0x4e0 [ 31.083431] zynqmp_dpsub_probe+0x444/0x5e0 [ 31.083616] platform_probe+0x8c/0x13c [ 31.083713] really_probe+0x258/0x59c [ 31.083793] __driver_probe_device+0xc4/0x224 [ 31.083878] driver_probe_device+0x70/0x1c0 [ 31.083961] __device_attach_driver+0x108/0x1e0 [ 31.084052] bus_for_each_drv+0x9c/0x100 [ 31.084125] __device_attach+0x100/0x298 [ 31.084207] device_initial_probe+0x14/0x20 [ 31.084292] bus_probe_device+0xd8/0xdc [ 31.084368] deferred_probe_work_func+0x11c/0x180 [ 31.084451] process_one_work+0x3ac/0x988 [ 31.084643] worker_thread+0x398/0x694 [ 31.084752] kthread+0x1bc/0x1c0 [ 31.084848] ret_from_fork+0x10/0x20 [ 31.084932] irq event stamp: 64549 [ 31.084970] hardirqs last enabled at (64548): [] _raw_spin_unlock_irqrestore+0x80/0x90 [ 31.085157] hardirqs last disabled at (64549): [] _raw_spin_lock_irqsave+0xc0/0xdc [ 31.085277] softirqs last enabled at (64503): [] __do_softirq+0x47c/0x500 [ 31.085390] softirqs last disabled at (64498): [] ____do_softirq+0x10/0x1c [ 31.085501] ---[ end trace 0000000000000000 ]--- Fixes: 7cbb0c63de3f ("dmaengine: xilinx: dpdma: Add the Xilinx DisplayPort DMA engine driver") Signed-off-by: Sean Anderson Reviewed-by: Tomi Valkeinen Link: https://lore.kernel.org/r/20240308210034.3634938-2-sean.anderson@linux.dev Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/xilinx/xilinx_dpdma.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c index 84dc5240a807..93938ed80fc8 100644 --- a/drivers/dma/xilinx/xilinx_dpdma.c +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -214,7 +214,8 @@ struct xilinx_dpdma_tx_desc { * @running: true if the channel is running * @first_frame: flag for the first frame of stream * @video_group: flag if multi-channel operation is needed for video channels - * @lock: lock to access struct xilinx_dpdma_chan + * @lock: lock to access struct xilinx_dpdma_chan. Must be taken before + * @vchan.lock, if both are to be held. * @desc_pool: descriptor allocation pool * @err_task: error IRQ bottom half handler * @desc: References to descriptors being processed @@ -1097,12 +1098,14 @@ static void xilinx_dpdma_chan_vsync_irq(struct xilinx_dpdma_chan *chan) * Complete the active descriptor, if any, promote the pending * descriptor to active, and queue the next transfer, if any. */ + spin_lock(&chan->vchan.lock); if (chan->desc.active) vchan_cookie_complete(&chan->desc.active->vdesc); chan->desc.active = pending; chan->desc.pending = NULL; xilinx_dpdma_chan_queue_transfer(chan); + spin_unlock(&chan->vchan.lock); out: spin_unlock_irqrestore(&chan->lock, flags); @@ -1264,10 +1267,12 @@ static void xilinx_dpdma_issue_pending(struct dma_chan *dchan) struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan); unsigned long flags; - spin_lock_irqsave(&chan->vchan.lock, flags); + spin_lock_irqsave(&chan->lock, flags); + spin_lock(&chan->vchan.lock); if (vchan_issue_pending(&chan->vchan)) xilinx_dpdma_chan_queue_transfer(chan); - spin_unlock_irqrestore(&chan->vchan.lock, flags); + spin_unlock(&chan->vchan.lock); + spin_unlock_irqrestore(&chan->lock, flags); } static int xilinx_dpdma_config(struct dma_chan *dchan, @@ -1495,7 +1500,9 @@ static void xilinx_dpdma_chan_err_task(struct tasklet_struct *t) XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id); spin_lock_irqsave(&chan->lock, flags); + spin_lock(&chan->vchan.lock); xilinx_dpdma_chan_queue_transfer(chan); + spin_unlock(&chan->vchan.lock); spin_unlock_irqrestore(&chan->lock, flags); } From 023b6390a15a98f9c3aa5e7da78d485d5384a08e Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 13 Mar 2024 14:40:31 -0700 Subject: [PATCH 534/553] dmaengine: idxd: Fix oops during rmmod on single-CPU platforms [ Upstream commit f221033f5c24659dc6ad7e5cf18fb1b075f4a8be ] During the removal of the idxd driver, registered offline callback is invoked as part of the clean up process. However, on systems with only one CPU online, no valid target is available to migrate the perf context, resulting in a kernel oops: BUG: unable to handle page fault for address: 000000000002a2b8 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 1470e1067 P4D 0 Oops: 0002 [#1] PREEMPT SMP NOPTI CPU: 0 PID: 20 Comm: cpuhp/0 Not tainted 6.8.0-rc6-dsa+ #57 Hardware name: Intel Corporation AvenueCity/AvenueCity, BIOS BHSDCRB1.86B.2492.D03.2307181620 07/18/2023 RIP: 0010:mutex_lock+0x2e/0x50 ... Call Trace: __die+0x24/0x70 page_fault_oops+0x82/0x160 do_user_addr_fault+0x65/0x6b0 __pfx___rdmsr_safe_on_cpu+0x10/0x10 exc_page_fault+0x7d/0x170 asm_exc_page_fault+0x26/0x30 mutex_lock+0x2e/0x50 mutex_lock+0x1e/0x50 perf_pmu_migrate_context+0x87/0x1f0 perf_event_cpu_offline+0x76/0x90 [idxd] cpuhp_invoke_callback+0xa2/0x4f0 __pfx_perf_event_cpu_offline+0x10/0x10 [idxd] cpuhp_thread_fun+0x98/0x150 smpboot_thread_fn+0x27/0x260 smpboot_thread_fn+0x1af/0x260 __pfx_smpboot_thread_fn+0x10/0x10 kthread+0x103/0x140 __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x50 __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Fix the issue by preventing the migration of the perf context to an invalid target. Fixes: 81dd4d4d6178 ("dmaengine: idxd: Add IDXD performance monitor support") Reported-by: Terrence Xu Tested-by: Terrence Xu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20240313214031.1658045-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/idxd/perfmon.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/dma/idxd/perfmon.c b/drivers/dma/idxd/perfmon.c index d73004f47cf4..612ef13b7160 100644 --- a/drivers/dma/idxd/perfmon.c +++ b/drivers/dma/idxd/perfmon.c @@ -529,14 +529,11 @@ static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node) return 0; target = cpumask_any_but(cpu_online_mask, cpu); - /* migrate events if there is a valid target */ - if (target < nr_cpu_ids) + if (target < nr_cpu_ids) { cpumask_set_cpu(target, &perfmon_dsa_cpu_mask); - else - target = -1; - - perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target); + perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target); + } return 0; } From 0b947c90e3aa8f381683984e4100a93971943c10 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Tue, 5 Dec 2023 11:02:55 +0800 Subject: [PATCH 535/553] riscv: fix VMALLOC_START definition [ Upstream commit ac88ff6b9d7dea9f0907c86bdae204dde7d5c0e6 ] When below config items are set, compiler complained: -------------------- CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y ...... ----------------------- ------------------------------------------------------------------- arch/riscv/kernel/crash_core.c: In function 'arch_crash_save_vmcoreinfo': arch/riscv/kernel/crash_core.c:11:58: warning: format '%lx' expects argument of type 'long unsigned int', but argument 2 has type 'int' [-Wformat=] 11 | vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); | ~~^ | | | long unsigned int | %x ---------------------------------------------------------------------- This is because on riscv macro VMALLOC_START has different type when CONFIG_MMU is set or unset. arch/riscv/include/asm/pgtable.h: -------------------------------------------------- Changing it to _AC(0, UL) in case CONFIG_MMU=n can fix the warning. Link: https://lkml.kernel.org/r/ZW7OsX4zQRA3mO4+@MiWiFi-R3L-srv Signed-off-by: Baoquan He Reported-by: Randy Dunlap Acked-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Cc: Eric DeVolder Cc: Ignat Korchagin Cc: Stephen Rothwell Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Signed-off-by: Andrew Morton Stable-dep-of: 6065e736f82c ("riscv: Fix TASK_SIZE on 64-bit NOMMU") Signed-off-by: Sasha Levin --- arch/riscv/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 63055c6ad2c2..73fe12c93cad 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -800,7 +800,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #define PAGE_KERNEL __pgprot(0) #define swapper_pg_dir NULL #define TASK_SIZE 0xffffffffUL -#define VMALLOC_START 0 +#define VMALLOC_START _AC(0, UL) #define VMALLOC_END TASK_SIZE #endif /* !CONFIG_MMU */ From 4201b8c8f2c32af321fb50867e68ac6c1cbed4be Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 26 Feb 2024 16:34:46 -0800 Subject: [PATCH 536/553] riscv: Fix TASK_SIZE on 64-bit NOMMU [ Upstream commit 6065e736f82c817c9a597a31ee67f0ce4628e948 ] On NOMMU, userspace memory can come from anywhere in physical RAM. The current definition of TASK_SIZE is wrong if any RAM exists above 4G, causing spurious failures in the userspace access routines. Fixes: 6bd33e1ece52 ("riscv: add nommu support") Fixes: c3f896dcf1e4 ("mm: switch the test_vmalloc module to use __vmalloc_node") Signed-off-by: Samuel Holland Reviewed-by: Jisheng Zhang Reviewed-by: Bo Gan Link: https://lore.kernel.org/r/20240227003630.3634533-2-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 73fe12c93cad..2d9416a6a070 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -799,7 +799,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #define PAGE_SHARED __pgprot(0) #define PAGE_KERNEL __pgprot(0) #define swapper_pg_dir NULL -#define TASK_SIZE 0xffffffffUL +#define TASK_SIZE _AC(-1, UL) #define VMALLOC_START _AC(0, UL) #define VMALLOC_END TASK_SIZE From 25b3498485ac281e5851700e33b97f12c9533fd8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 6 Apr 2024 16:08:21 +0200 Subject: [PATCH 537/553] phy: ti: tusb1210: Resolve charger-det crash if charger psy is unregistered [ Upstream commit bf6e4ee5c43690e4c5a8a057bbcd4ff986bed052 ] The power_supply frame-work is not really designed for there to be long living in kernel references to power_supply devices. Specifically unregistering a power_supply while some other code has a reference to it triggers a WARN in power_supply_unregister(): WARN_ON(atomic_dec_return(&psy->use_cnt)); Folllowed by the power_supply still getting removed and the backing data freed anyway, leaving the tusb1210 charger-detect code with a dangling reference, resulting in a crash the next time tusb1210_get_online() is called. Fix this by only holding the reference in tusb1210_get_online() freeing it at the end of the function. Note this still leaves a theoretical race window, but it avoids the issue when manually rmmod-ing the charger chip driver during development. Fixes: 48969a5623ed ("phy: ti: tusb1210: Add charger detection") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240406140821.18624-1-hdegoede@redhat.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/ti/phy-tusb1210.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/phy/ti/phy-tusb1210.c b/drivers/phy/ti/phy-tusb1210.c index 669c13d6e402..bdd44ec3e809 100644 --- a/drivers/phy/ti/phy-tusb1210.c +++ b/drivers/phy/ti/phy-tusb1210.c @@ -64,7 +64,6 @@ struct tusb1210 { struct delayed_work chg_det_work; struct notifier_block psy_nb; struct power_supply *psy; - struct power_supply *charger; #endif }; @@ -230,19 +229,24 @@ static const char * const tusb1210_chargers[] = { static bool tusb1210_get_online(struct tusb1210 *tusb) { + struct power_supply *charger = NULL; union power_supply_propval val; - int i; + bool online = false; + int i, ret; - for (i = 0; i < ARRAY_SIZE(tusb1210_chargers) && !tusb->charger; i++) - tusb->charger = power_supply_get_by_name(tusb1210_chargers[i]); + for (i = 0; i < ARRAY_SIZE(tusb1210_chargers) && !charger; i++) + charger = power_supply_get_by_name(tusb1210_chargers[i]); - if (!tusb->charger) + if (!charger) return false; - if (power_supply_get_property(tusb->charger, POWER_SUPPLY_PROP_ONLINE, &val)) - return false; + ret = power_supply_get_property(charger, POWER_SUPPLY_PROP_ONLINE, &val); + if (ret == 0) + online = val.intval; - return val.intval; + power_supply_put(charger); + + return online; } static void tusb1210_chg_det_work(struct work_struct *work) @@ -466,9 +470,6 @@ static void tusb1210_remove_charger_detect(struct tusb1210 *tusb) cancel_delayed_work_sync(&tusb->chg_det_work); power_supply_unregister(tusb->psy); } - - if (tusb->charger) - power_supply_put(tusb->charger); } #else static void tusb1210_probe_charger_detect(struct tusb1210 *tusb) { } From 4e75e222d397c6752b229ed72fc4644c8c36ecde Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 26 Apr 2024 08:44:08 +0200 Subject: [PATCH 538/553] i2c: smbus: fix NULL function pointer dereference [ Upstream commit 91811a31b68d3765b3065f4bb6d7d6d84a7cfc9f ] Baruch reported an OOPS when using the designware controller as target only. Target-only modes break the assumption of one transfer function always being available. Fix this by always checking the pointer in __i2c_transfer. Reported-by: Baruch Siach Closes: https://lore.kernel.org/r/4269631780e5ba789cf1ae391eec1b959def7d99.1712761976.git.baruch@tkos.co.il Fixes: 4b1acc43331d ("i2c: core changes for slave support") [wsa: dropped the simplification in core-smbus to avoid theoretical regressions] Signed-off-by: Wolfram Sang Tested-by: Baruch Siach Signed-off-by: Sasha Levin --- drivers/i2c/i2c-core-base.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 5e3976ba5265..1ebc95379914 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -2075,13 +2075,18 @@ static int i2c_check_for_quirks(struct i2c_adapter *adap, struct i2c_msg *msgs, * Returns negative errno, else the number of messages executed. * * Adapter lock must be held when calling this function. No debug logging - * takes place. adap->algo->master_xfer existence isn't checked. + * takes place. */ int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) { unsigned long orig_jiffies; int ret, try; + if (!adap->algo->master_xfer) { + dev_dbg(&adap->dev, "I2C level transfers not supported\n"); + return -EOPNOTSUPP; + } + if (WARN_ON(!msgs || num < 1)) return -EINVAL; @@ -2148,11 +2153,6 @@ int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) { int ret; - if (!adap->algo->master_xfer) { - dev_dbg(&adap->dev, "I2C level transfers not supported\n"); - return -EOPNOTSUPP; - } - /* REVISIT the fault reporting model here is weak: * * - When we get an error after receiving N bytes from a slave, From 9b7c5004d7c5ae062134052a85290869a015814c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 29 Apr 2024 15:47:51 +0100 Subject: [PATCH 539/553] bounds: Use the right number of bits for power-of-two CONFIG_NR_CPUS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5af385f5f4cddf908f663974847a4083b2ff2c79 upstream. bits_per() rounds up to the next power of two when passed a power of two. This causes crashes on some machines and configurations. Reported-by: Михаил Новоселов Tested-by: Ильфат Гаптрахманов Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3347 Link: https://lore.kernel.org/all/1c978cf1-2934-4e66-e4b3-e81b04cb3571@rosalinux.ru/ Fixes: f2d5dcb48f7b (bounds: support non-power-of-two CONFIG_NR_CPUS) Cc: Signed-off-by: Matthew Wilcox (Oracle) Cc: Rik van Riel Cc: Mel Gorman Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/bounds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bounds.c b/kernel/bounds.c index c5a9fcd2d622..29b2cd00df2c 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -19,7 +19,7 @@ int main(void) DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); #ifdef CONFIG_SMP - DEFINE(NR_CPUS_BITS, bits_per(CONFIG_NR_CPUS)); + DEFINE(NR_CPUS_BITS, order_base_2(CONFIG_NR_CPUS)); #endif DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); #ifdef CONFIG_LRU_GEN From 6536f12fe2ddaa134b9ffa47b115256bd1302a6e Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 29 Apr 2024 17:44:21 -0700 Subject: [PATCH 540/553] macsec: Enable devices to advertise whether they update sk_buff md_dst during offloads commit 475747a19316b08e856c666a20503e73d7ed67ed upstream. Omit rx_use_md_dst comment in upstream commit since macsec_ops is not documented. Cannot know whether a Rx skb missing md_dst is intended for MACsec or not without knowing whether the device is able to update this field during an offload. Assume that an offload to a MACsec device cannot support updating md_dst by default. Capable devices can advertise that they do indicate that an skb is related to a MACsec offloaded packet using the md_dst. Cc: Sabrina Dubroca Cc: stable@vger.kernel.org Fixes: 860ead89b851 ("net/macsec: Add MACsec skb_metadata_dst Rx Data path support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Benjamin Poirier Reviewed-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-2-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/net/macsec.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/macsec.h b/include/net/macsec.h index 65c93959c2dc..dd578d193f9a 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -302,6 +302,7 @@ struct macsec_ops { int (*mdo_get_tx_sa_stats)(struct macsec_context *ctx); int (*mdo_get_rx_sc_stats)(struct macsec_context *ctx); int (*mdo_get_rx_sa_stats)(struct macsec_context *ctx); + bool rx_uses_md_dst; }; void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa); From 21e042d29e8990e30a10b2e087fc06cdc9597a35 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 29 Apr 2024 17:44:23 -0700 Subject: [PATCH 541/553] macsec: Detect if Rx skb is macsec-related for offloading devices that update md_dst commit 642c984dd0e37dbaec9f87bd1211e5fac1f142bf upstream. Can now correctly identify where the packets should be delivered by using md_dst or its absence on devices that provide it. This detection is not possible without device drivers that update md_dst. A fallback pattern should be used for supporting such device drivers. This fallback mode causes multicast messages to be cloned to both the non-macsec and macsec ports, independent of whether the multicast message received was encrypted over MACsec or not. Other non-macsec traffic may also fail to be handled correctly for devices in promiscuous mode. Link: https://lore.kernel.org/netdev/ZULRxX9eIbFiVi7v@hog/ Cc: Sabrina Dubroca Cc: stable@vger.kernel.org Fixes: 860ead89b851 ("net/macsec: Add MACsec skb_metadata_dst Rx Data path support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Benjamin Poirier Reviewed-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-4-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/macsec.c | 44 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 209ee9f35275..8a8fd74110e2 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1007,10 +1007,12 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) struct metadata_dst *md_dst; struct macsec_rxh_data *rxd; struct macsec_dev *macsec; + bool is_macsec_md_dst; rcu_read_lock(); rxd = macsec_data_rcu(skb->dev); md_dst = skb_metadata_dst(skb); + is_macsec_md_dst = md_dst && md_dst->type == METADATA_MACSEC; list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct sk_buff *nskb; @@ -1021,10 +1023,42 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) * the SecTAG, so we have to deduce which port to deliver to. */ if (macsec_is_offloaded(macsec) && netif_running(ndev)) { - if (md_dst && md_dst->type == METADATA_MACSEC && - (!find_rx_sc(&macsec->secy, md_dst->u.macsec_info.sci))) + const struct macsec_ops *ops; + + ops = macsec_get_ops(macsec, NULL); + + if (ops->rx_uses_md_dst && !is_macsec_md_dst) continue; + if (is_macsec_md_dst) { + struct macsec_rx_sc *rx_sc; + + /* All drivers that implement MACsec offload + * support using skb metadata destinations must + * indicate that they do so. + */ + DEBUG_NET_WARN_ON_ONCE(!ops->rx_uses_md_dst); + rx_sc = find_rx_sc(&macsec->secy, + md_dst->u.macsec_info.sci); + if (!rx_sc) + continue; + /* device indicated macsec offload occurred */ + skb->dev = ndev; + skb->pkt_type = PACKET_HOST; + eth_skb_pkt_type(skb, ndev); + ret = RX_HANDLER_ANOTHER; + goto out; + } + + /* This datapath is insecure because it is unable to + * enforce isolation of broadcast/multicast traffic and + * unicast traffic with promiscuous mode on the macsec + * netdev. Since the core stack has no mechanism to + * check that the hardware did indeed receive MACsec + * traffic, it is possible that the response handling + * done by the MACsec port was to a plaintext packet. + * This violates the MACsec protocol standard. + */ if (ether_addr_equal_64bits(hdr->h_dest, ndev->dev_addr)) { /* exact match, divert skb to this port */ @@ -1040,11 +1074,7 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) break; nskb->dev = ndev; - if (ether_addr_equal_64bits(hdr->h_dest, - ndev->broadcast)) - nskb->pkt_type = PACKET_BROADCAST; - else - nskb->pkt_type = PACKET_MULTICAST; + eth_skb_pkt_type(nskb, ndev); __netif_rx(nskb); } From ca817d44befcfdff4aab0bbc5a9c8a0c1eea6983 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 29 Apr 2024 17:44:24 -0700 Subject: [PATCH 542/553] net/mlx5e: Advertise mlx5 ethernet driver updates sk_buff md_dst for MACsec commit 39d26a8f2efcb8b5665fe7d54a7dba306a8f1dff upstream. mlx5 Rx flow steering and CQE handling enable the driver to be able to update an skb's md_dst attribute as MACsec when MACsec traffic arrives when a device is configured for offloading. Advertise this to the core stack to take advantage of this capability. Cc: stable@vger.kernel.org Fixes: b7c9400cbc48 ("net/mlx5e: Implement MACsec Rx data path using MACsec skb_metadata_dst") Signed-off-by: Rahul Rameshbabu Reviewed-by: Benjamin Poirier Reviewed-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-5-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index a7832a0180ee..48cf691842b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -1703,6 +1703,7 @@ static const struct macsec_ops macsec_offload_ops = { .mdo_add_secy = mlx5e_macsec_add_secy, .mdo_upd_secy = mlx5e_macsec_upd_secy, .mdo_del_secy = mlx5e_macsec_del_secy, + .rx_uses_md_dst = true, }; bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb) From 909ba1f1b4146de529469910c1bd0b1248964536 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 May 2024 16:29:32 +0200 Subject: [PATCH 543/553] Linux 6.1.90 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240430103047.561802595@linuxfoundation.org Tested-by: Salvatore Bonaccorso Tested-by: SeongJae Park Tested-by: Florian Fainelli Tested-by: Ron Economos Tested-by: Linux Kernel Functional Testing Tested-by: Miguel Ojeda Tested-by: Jon Hunter Tested-by: Mateusz Jończyk Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Tested-by: Pascal Ernster Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a0472e1cf715..7ae5cf9ec9e5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 89 +SUBLEVEL = 90 EXTRAVERSION = NAME = Curry Ramen From 0cf5cecba66e91b69268fd87ded20e755bee1938 Mon Sep 17 00:00:00 2001 From: Will McVicker Date: Thu, 30 May 2024 13:00:52 -0700 Subject: [PATCH 544/553] Revert "crypto: api - Disallow identical driver names" This reverts commit 680eb0a99336f7b21ff149bc57579d059421c5de which is commit 27016f75f5ed47e2d8e0ca75a8ff1f40bc1a5e27 upstream. This breaks for devices that use the downstream fips140 module. Bug: 335718233 Bug: 335830134 Change-Id: Ie465403c40fe75fee5934ea160b86a4c77ef8f17 Signed-off-by: Will McVicker --- crypto/algapi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index 05a39b80aabe..60b98d2c400e 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -329,7 +329,6 @@ __crypto_register_alg(struct crypto_alg *alg, struct list_head *algs_to_put) } if (!strcmp(q->cra_driver_name, alg->cra_name) || - !strcmp(q->cra_driver_name, alg->cra_driver_name) || !strcmp(q->cra_name, alg->cra_driver_name)) goto err; } From 416b90bf47dc6988b628abe55fb27668260e4742 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 Jun 2024 09:51:34 +0000 Subject: [PATCH 545/553] Revert "vsock/virtio: fix packet delivery to tap device" This reverts commit db388b8e12aa7c3660617cc5e8beb90f71bba206 which is commit b32a09ea7c38849ff925489a6bf5bd8914bc45df upstream. It breaks the build due to previous reverts of ABI breaking changes in the virtio_transport.c driver. This is safe to revert here as this driver is not used in GKI kernels, only in test builds. Bug: 161946584 Change-Id: Iad31e3a4d4a7b88469cbfa2aa2635bcf61295bd4 Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/virtio_transport.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 10528081e800..460e7fbb42da 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -139,8 +139,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) break; } - virtio_transport_deliver_tap_pkt(skb); - if (reply) { struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; int val; From da08c217d80d6a6b1f23354a08a75f70f89c3976 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 May 2024 14:12:38 +0000 Subject: [PATCH 546/553] ANDROID: preserve CRC values in struct sk_buff due to ip_defrag_offset removal In commit 7d0567842b78 ("inet: inet_defrag: prevent sk release while still in use"), the ip_defrag_offset variable was removed from the union in struct sk_buff and moved to an internal-to-the-network-stack location in order to resolve an issue. This does not change the structure size, but the ABI tracking logic doesn't appreciate the variable going away, so add the union back. No functional changes here as no external code could ever use the ip_defrag_offset variable as seen in the commit that changed this. Bug: 161946584 Fixes: 7d0567842b78 ("inet: inet_defrag: prevent sk release while still in use") Change-Id: Id09f9398f41d9dbe9742807495d260e1f7a7a516 Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a9e8875e3394..bcf948920812 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -872,7 +872,15 @@ struct sk_buff { struct llist_node ll_node; }; - struct sock *sk; + /* ANDROID: + * Put back the union removed in commit 7d0567842b78 ("inet: + * inet_defrag: prevent sk release while still in use") to preserve the + * crcs of stuff. Does not affect any code functionality. + */ + union { + struct sock *sk; + int ip_defrag_offset; + }; union { ktime_t tstamp; From a28d27f66e99a39df2573c78fa1e295646fd3606 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 18 Jun 2024 11:27:40 +0000 Subject: [PATCH 547/553] ANDROID: fix crc issue in net/ipv4/inet_fragment.c In commit 7d0567842b78 ("inet: inet_defrag: prevent sk release while still in use") a new include file is added, which messes up the crc generation of some of the exported symbols in net/ipv4/inet_fragment.c. Fix this up by only including it when NOT generating the crc values. Bug: 161946584 Fixes: 7d0567842b78 ("inet: inet_defrag: prevent sk release while still in use") Change-Id: Ifef668d676faed4c9b96a481807b7d000e5c39db Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_fragment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 834cdc57755f..63968d081d96 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -24,7 +24,9 @@ #include #include +#ifndef __GENKSYMS__ #include "../core/sock_destructor.h" +#endif /* Use skb->cb to track consecutive/adjacent fragments coming at * the end of the queue. Nodes in the rb-tree queue will From 37db5a6cac9c4944aefd0484469a09d64b35a89f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 15 May 2024 13:59:41 +0000 Subject: [PATCH 548/553] Reapply "scsi: core: Add struct for args to execution functions" This reverts commit 345b6b831980964b607db53cfd681abd2234a1b7 which is commit d0949565811f0896c1c7e781ab2ad99d34273fdf upstream. Reverts of a revert, fun Android API stuff... It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I9f3f79916a434ecc00c5be57c56c2731760b4f2d Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_lib.c | 52 ++++++++++++++++++-------------------- include/scsi/scsi_device.h | 32 +++++++++++++++++++++++ 2 files changed, 57 insertions(+), 27 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 55ad2bf330a8..f0b26b03c20e 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -185,39 +185,37 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) __scsi_queue_insert(cmd, reason, true); } - /** - * __scsi_execute - insert request and wait for the result - * @sdev: scsi device + * scsi_execute_cmd - insert request and wait for the result + * @sdev: scsi_device * @cmd: scsi command - * @data_direction: data direction + * @opf: block layer request cmd_flags * @buffer: data buffer * @bufflen: len of buffer - * @sense: optional sense buffer - * @sshdr: optional decoded sense header * @timeout: request timeout in HZ * @retries: number of times to retry request - * @flags: flags for ->cmd_flags - * @rq_flags: flags for ->rq_flags - * @resid: optional residual length + * @args: Optional args. See struct definition for field descriptions * * Returns the scsi_cmnd result field if a command was executed, or a negative * Linux error code if we didn't get that far. */ -int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, - int data_direction, void *buffer, unsigned bufflen, - unsigned char *sense, struct scsi_sense_hdr *sshdr, - int timeout, int retries, blk_opf_t flags, - req_flags_t rq_flags, int *resid) +int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, + blk_opf_t opf, void *buffer, unsigned int bufflen, + int timeout, int retries, + const struct scsi_exec_args *args) { + static const struct scsi_exec_args default_args; struct request *req; struct scsi_cmnd *scmd; int ret; - req = scsi_alloc_request(sdev->request_queue, - data_direction == DMA_TO_DEVICE ? - REQ_OP_DRV_OUT : REQ_OP_DRV_IN, - rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0); + if (!args) + args = &default_args; + else if (WARN_ON_ONCE(args->sense && + args->sense_len != SCSI_SENSE_BUFFERSIZE)) + return -EINVAL; + + req = scsi_alloc_request(sdev->request_queue, opf, args->req_flags); if (IS_ERR(req)) return PTR_ERR(req); @@ -233,8 +231,7 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, scmd->allowed = retries; scmd->flags |= args->scmd_flags; req->timeout = timeout; - req->cmd_flags |= flags; - req->rq_flags |= rq_flags | RQF_QUIET; + req->rq_flags |= RQF_QUIET; /* * head injection *required* here otherwise quiesce won't work @@ -250,20 +247,21 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, if (unlikely(scmd->resid_len > 0 && scmd->resid_len <= bufflen)) memset(buffer + bufflen - scmd->resid_len, 0, scmd->resid_len); - if (resid) - *resid = scmd->resid_len; - if (sense && scmd->sense_len) - memcpy(sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); - if (sshdr) + if (args->resid) + *args->resid = scmd->resid_len; + if (args->sense) + memcpy(args->sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); + if (args->sshdr) scsi_normalize_sense(scmd->sense_buffer, scmd->sense_len, - sshdr); + args->sshdr); + ret = scmd->result; out: blk_mq_free_request(req); return ret; } -EXPORT_SYMBOL(__scsi_execute); +EXPORT_SYMBOL(scsi_execute_cmd); /* * Wake up the error handler if necessary. Avoid as follows that the error diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 7d894e474a07..58348f9e2bec 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -476,6 +476,38 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, int timeout, int retries, const struct scsi_exec_args *args); +/* Make sure any sense buffer is the correct size. */ +#define scsi_execute(_sdev, _cmd, _data_dir, _buffer, _bufflen, _sense, \ + _sshdr, _timeout, _retries, _flags, _rq_flags, \ + _resid) \ +({ \ + scsi_execute_cmd(_sdev, _cmd, (_data_dir == DMA_TO_DEVICE ? \ + REQ_OP_DRV_OUT : REQ_OP_DRV_IN) | _flags, \ + _buffer, _bufflen, _timeout, _retries, \ + &(struct scsi_exec_args) { \ + .sense = _sense, \ + .sshdr = _sshdr, \ + .req_flags = _rq_flags & RQF_PM ? \ + BLK_MQ_REQ_PM : 0, \ + .resid = _resid, \ + }); \ +}) + +static inline int scsi_execute_req(struct scsi_device *sdev, + const unsigned char *cmd, int data_direction, void *buffer, + unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, + int retries, int *resid) +{ + return scsi_execute_cmd(sdev, cmd, + data_direction == DMA_TO_DEVICE ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, buffer, + bufflen, timeout, retries, + &(struct scsi_exec_args) { + .sshdr = sshdr, + .resid = resid, + }); +} + extern void sdev_disable_disk_events(struct scsi_device *sdev); extern void sdev_enable_disk_events(struct scsi_device *sdev); extern int scsi_vpd_lun_id(struct scsi_device *, char *, size_t); From 077eb0a09d73775c4b971b9c245446fed51393d6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 20 Jun 2024 16:20:08 +0000 Subject: [PATCH 549/553] Revert "usb: xhci: Add timeout argument in address_device USB HCD callback" This reverts commit 5f9b63193bcac6938298ec8f079e533da6db5e86 which is commit a769154c7cac037914ba375ae88aae55b2c853e0 upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I77f823bdc857c0eb1a1f500b469f4678597d1484 Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 2 +- drivers/usb/host/xhci-mem.c | 2 -- drivers/usb/host/xhci-ring.c | 11 +++++------ drivers/usb/host/xhci.c | 23 +++++++---------------- drivers/usb/host/xhci.h | 10 ++-------- include/linux/usb/hcd.h | 5 ++--- 6 files changed, 17 insertions(+), 36 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 8895f2301876..8e03ccb23ba1 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4739,7 +4739,7 @@ static int hub_set_address(struct usb_device *udev, int devnum) if (udev->state != USB_STATE_DEFAULT) return -EINVAL; if (hcd->driver->address_device) - retval = hcd->driver->address_device(hcd, udev, timeout_ms); + retval = hcd->driver->address_device(hcd, udev); else retval = usb_control_msg(udev, usb_sndaddr0pipe(), USB_REQ_SET_ADDRESS, 0, devnum, 0, diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 9be58e6dd290..060b83a2ce11 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1832,8 +1832,6 @@ struct xhci_command *xhci_alloc_command(struct xhci_hcd *xhci, } command->status = 0; - /* set default timeout to 5000 ms */ - command->timeout_ms = XHCI_CMD_DEFAULT_TIMEOUT; INIT_LIST_HEAD(&command->cmd_list); return command; } diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index c11c1b1185ef..bfee7c695d86 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -334,10 +334,9 @@ void xhci_ring_cmd_db(struct xhci_hcd *xhci) } EXPORT_SYMBOL_GPL(xhci_ring_cmd_db); -static bool xhci_mod_cmd_timer(struct xhci_hcd *xhci) +static bool xhci_mod_cmd_timer(struct xhci_hcd *xhci, unsigned long delay) { - return mod_delayed_work(system_wq, &xhci->cmd_timer, - msecs_to_jiffies(xhci->current_cmd->timeout_ms)); + return mod_delayed_work(system_wq, &xhci->cmd_timer, delay); } static struct xhci_command *xhci_next_queued_cmd(struct xhci_hcd *xhci) @@ -381,7 +380,7 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) && !(xhci->xhc_state & XHCI_STATE_DYING)) { xhci->current_cmd = cur_cmd; - xhci_mod_cmd_timer(xhci); + xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); xhci_ring_cmd_db(xhci); } } @@ -1765,7 +1764,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, if (!list_is_singular(&xhci->cmd_list)) { xhci->current_cmd = list_first_entry(&cmd->cmd_list, struct xhci_command, cmd_list); - xhci_mod_cmd_timer(xhci); + xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); } else if (xhci->current_cmd == cmd) { xhci->current_cmd = NULL; } @@ -4342,7 +4341,7 @@ static int queue_command(struct xhci_hcd *xhci, struct xhci_command *cmd, /* if there are no other commands queued we start the timeout timer */ if (list_empty(&xhci->cmd_list)) { xhci->current_cmd = cmd; - xhci_mod_cmd_timer(xhci); + xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); } list_add_tail(&cmd->cmd_list, &xhci->cmd_list); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index b835af8dcbc2..3b9c8b71ff1a 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4224,18 +4224,12 @@ disable_slot: return 0; } -/** - * xhci_setup_device - issues an Address Device command to assign a unique - * USB bus address. - * @hcd: USB host controller data structure. - * @udev: USB dev structure representing the connected device. - * @setup: Enum specifying setup mode: address only or with context. - * @timeout_ms: Max wait time (ms) for the command operation to complete. - * - * Return: 0 if successful; otherwise, negative error code. +/* + * Issue an Address Device command and optionally send a corresponding + * SetAddress request to the device. */ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, - enum xhci_setup_dev setup, unsigned int timeout_ms) + enum xhci_setup_dev setup) { const char *act = setup == SETUP_CONTEXT_ONLY ? "context" : "address"; unsigned long flags; @@ -4292,7 +4286,6 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, } command->in_ctx = virt_dev->in_ctx; - command->timeout_ms = timeout_ms; slot_ctx = xhci_get_slot_ctx(xhci, virt_dev->in_ctx); ctrl_ctx = xhci_get_input_control_ctx(virt_dev->in_ctx); @@ -4426,17 +4419,15 @@ out: return ret; } -int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev, - unsigned int timeout_ms) +int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev) { - return xhci_setup_device(hcd, udev, SETUP_CONTEXT_ADDRESS, timeout_ms); + return xhci_setup_device(hcd, udev, SETUP_CONTEXT_ADDRESS); } EXPORT_SYMBOL_GPL(xhci_address_device); static int xhci_enable_device(struct usb_hcd *hcd, struct usb_device *udev) { - return xhci_setup_device(hcd, udev, SETUP_CONTEXT_ONLY, - XHCI_CMD_DEFAULT_TIMEOUT); + return xhci_setup_device(hcd, udev, SETUP_CONTEXT_ONLY); } /* diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 67d9b22e8d84..91754f5e786e 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -817,9 +817,6 @@ struct xhci_command { union xhci_trb *command_trb; struct list_head cmd_list; - /* xHCI command response timeout in milliseconds */ - unsigned int timeout_ms; - ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); }; @@ -1583,11 +1580,8 @@ struct xhci_td { unsigned int num_trbs; }; -/* - * xHCI command default timeout value in milliseconds. - * USB 3.2 spec, section 9.2.6.1 - */ -#define XHCI_CMD_DEFAULT_TIMEOUT 5000 +/* xHCI command default timeout value */ +#define XHCI_CMD_DEFAULT_TIMEOUT (5 * HZ) /* command descriptor */ struct xhci_cd { diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index bc919c56262a..3269c0e733ac 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -377,9 +377,8 @@ struct hc_driver { * or bandwidth constraints. */ void (*reset_bandwidth)(struct usb_hcd *, struct usb_device *); - /* Set the hardware-chosen device address */ - int (*address_device)(struct usb_hcd *, struct usb_device *udev, - unsigned int timeout_ms); + /* Returns the hardware-chosen device address */ + int (*address_device)(struct usb_hcd *, struct usb_device *udev); /* prepares the hardware to send commands to the device */ int (*enable_device)(struct usb_hcd *, struct usb_device *udev); /* Notifies the HCD after a hub descriptor is fetched. From 3b75c4ca77290990c3fa4f2b630961a006fc5f36 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Jun 2024 07:00:33 +0000 Subject: [PATCH 550/553] ANDROID: update .stg for change to struct clk_core In commit a424e713e0cc ("clk: Get runtime PM before walking tree during disable_unused"), the internal struct clk_core structure gets a new field. That disturbs the abi checker, as many apis use a pointer to struct clk_core. But because all callers treat this as an opaque structure, and it is internal to the kernel core only, it is safe to update the definition of it as there is no real "abi break" here at all. Bug: 161946584 Fixes: a424e713e0cc ("clk: Get runtime PM before walking tree during disable_unused") Change-Id: Ic57abb142c040b62a006c5391f4c041c6e293c34 Signed-off-by: Greg Kroah-Hartman --- android/abi_gki_aarch64.stg | 243 ++++++++++++++++++------------------ 1 file changed, 122 insertions(+), 121 deletions(-) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 4413ebbb7289..4857e59849ff 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -43665,10 +43665,10 @@ member { offset: 20352 } member { - id: 0x6b47fdc8 + id: 0x6b47f58f name: "accuracy" type_id: 0x33756485 - offset: 1216 + offset: 1344 } member { id: 0x6bbd8f78 @@ -55603,10 +55603,10 @@ member { offset: 128 } member { - id: 0x1068dd68 + id: 0x1068d085 name: "boot_enabled" type_id: 0x6d7f5ff6 - offset: 984 + offset: 1112 } member { id: 0xc12e10df @@ -61739,10 +61739,10 @@ member { offset: 4608 } member { - id: 0x19578fc7 + id: 0x1957812f name: "child_node" type_id: 0x49a73111 - offset: 1472 + offset: 1600 } member { id: 0x19cde9e5 @@ -61841,10 +61841,10 @@ member { offset: 64 } member { - id: 0x25f4197a + id: 0x25f4116e name: "children" type_id: 0x5e8dc7f4 - offset: 1408 + offset: 1536 } member { id: 0x7d657f67 @@ -63161,10 +63161,10 @@ member { offset: 2816 } member { - id: 0x08f4a5ea + id: 0x08f4ab77 name: "clks" type_id: 0x5e8dc7f4 - offset: 1600 + offset: 1728 } member { id: 0x5f036ab2 @@ -72960,10 +72960,10 @@ member { offset: 62464 } member { - id: 0xe0038b0c + id: 0xe0038ca1 name: "debug_node" type_id: 0x49a73111 - offset: 1792 + offset: 1920 } member { id: 0x2c8c53b8 @@ -74282,18 +74282,18 @@ member { type_id: 0x120540d1 offset: 34368 } +member { + id: 0xf3efe7b3 + name: "dentry" + type_id: 0x120540d1 + offset: 1856 +} member { id: 0xf3efe84f name: "dentry" type_id: 0x120540d1 offset: 64 } -member { - id: 0xf3efeb8b - name: "dentry" - type_id: 0x120540d1 - offset: 1728 -} member { id: 0xf3efebee name: "dentry" @@ -82007,10 +82007,10 @@ member { offset: 64 } member { - id: 0xd5562c37 + id: 0xd5562760 name: "duty" type_id: 0x7747934c - offset: 1312 + offset: 1440 } member { id: 0xc1236c49 @@ -83410,10 +83410,10 @@ member { offset: 256 } member { - id: 0x7ad6105e + id: 0x7ad61d96 name: "enable_count" type_id: 0x4585663f - offset: 992 + offset: 1120 } member { id: 0xd6605ae1 @@ -91915,12 +91915,6 @@ member { type_id: 0x33756485 offset: 3264 } -member { - id: 0x2d5bfb9c - name: "flags" - type_id: 0x33756485 - offset: 896 -} member { id: 0x2d5bfbb9 name: "flags" @@ -125527,18 +125521,18 @@ member { type_id: 0xc93e017b offset: 912 } -member { - id: 0x5c4b3279 - name: "max_rate" - type_id: 0x33756485 - offset: 1152 -} member { id: 0x5c4b3567 name: "max_rate" type_id: 0x33756485 offset: 128 } +member { + id: 0x5c4b3ad8 + name: "max_rate" + type_id: 0x33756485 + offset: 1280 +} member { id: 0x5c4b3b62 name: "max_rate" @@ -128726,18 +128720,18 @@ member { type_id: 0x92233392 offset: 256 } -member { - id: 0x78e29322 - name: "min_rate" - type_id: 0x33756485 - offset: 1088 -} member { id: 0x78e29ab5 name: "min_rate" type_id: 0x33756485 offset: 64 } +member { + id: 0x78e29c90 + name: "min_rate" + type_id: 0x33756485 + offset: 1216 +} member { id: 0x78e29eff name: "min_rate" @@ -134627,10 +134621,10 @@ member { bitsize: 1 } member { - id: 0xfbc6aa01 + id: 0xfbc6a07e name: "need_sync" type_id: 0x6d7f5ff6 - offset: 976 + offset: 1104 } member { id: 0xfbc6aa7e @@ -135096,10 +135090,10 @@ member { offset: 1568 } member { - id: 0x4c568493 + id: 0x4c568fda name: "new_child" type_id: 0x16b3acfc - offset: 832 + offset: 960 } member { id: 0x6fa9956d @@ -135138,16 +135132,16 @@ member { offset: 2432 } member { - id: 0x7c11dfa4 + id: 0x7c11ddac name: "new_parent" type_id: 0x16b3acfc - offset: 768 + offset: 896 } member { - id: 0xde66b021 + id: 0xde66bd46 name: "new_parent_index" type_id: 0x295c7202 - offset: 520 + offset: 648 } member { id: 0x47a34ff8 @@ -135163,10 +135157,10 @@ member { offset: 32 } member { - id: 0x4c8e95ba + id: 0x4c8e943d name: "new_rate" type_id: 0x33756485 - offset: 704 + offset: 832 } member { id: 0x0a536b98 @@ -137693,10 +137687,10 @@ member { offset: 192 } member { - id: 0xbbe14bb7 + id: 0xbbe14ee8 name: "notifier_count" type_id: 0x4585663f - offset: 1664 + offset: 1792 } member { id: 0xdec375c9 @@ -140517,18 +140511,18 @@ member { type_id: 0x6720d32f offset: 1088 } +member { + id: 0x5f13c0bd + name: "num_parents" + type_id: 0x295c7202 + offset: 640 +} member { id: 0x5f13c103 name: "num_parents" type_id: 0x295c7202 offset: 320 } -member { - id: 0x5f13cddc - name: "num_parents" - type_id: 0x295c7202 - offset: 512 -} member { id: 0x5f7f1537 name: "num_parents" @@ -144617,18 +144611,18 @@ member { type_id: 0x92233392 offset: 2176 } +member { + id: 0x6ba6999e + name: "orphan" + type_id: 0x6d7f5ff6 + offset: 1088 +} member { id: 0x6ba699d3 name: "orphan" type_id: 0x6d7f5ff6 offset: 32 } -member { - id: 0x6ba69c1b - name: "orphan" - type_id: 0x6d7f5ff6 - offset: 960 -} member { id: 0x304e36cb name: "orphan_count" @@ -147159,10 +147153,10 @@ member { offset: 768 } member { - id: 0x7227c0c4 + id: 0x7227ca4e name: "parent" type_id: 0x16b3acfc - offset: 384 + offset: 512 } member { id: 0x72281112 @@ -147447,10 +147441,10 @@ member { offset: 1024 } member { - id: 0x0a0fa3a3 + id: 0x0a0fada6 name: "parents" type_id: 0x27b8a069 - offset: 448 + offset: 576 } member { id: 0x0a33e4ac @@ -150048,10 +150042,10 @@ member { offset: 44992 } member { - id: 0x39ef5b6d + id: 0x39ef5fe3 name: "phase" type_id: 0x6720d32f - offset: 1280 + offset: 1408 } member { id: 0xad142fe3 @@ -154613,10 +154607,10 @@ member { offset: 64 } member { - id: 0x088ec4c0 + id: 0x088ecd03 name: "prepare_count" type_id: 0x4585663f - offset: 1024 + offset: 1152 } member { id: 0x90c1fac3 @@ -157453,10 +157447,10 @@ member { offset: 1608 } member { - id: 0x6fd5241a + id: 0x6fd52f10 name: "protect_count" type_id: 0x4585663f - offset: 1056 + offset: 1184 } member { id: 0xbe992d26 @@ -160280,17 +160274,17 @@ member { type_id: 0x0baa70a7 offset: 448 } +member { + id: 0x5fe125a7 + name: "rate" + type_id: 0x33756485 + offset: 704 +} member { id: 0x5fe1279a name: "rate" type_id: 0x33756485 } -member { - id: 0x5fe12ee8 - name: "rate" - type_id: 0x33756485 - offset: 576 -} member { id: 0x5fe12f53 name: "rate" @@ -163122,17 +163116,17 @@ member { type_id: 0x6f1daf87 offset: 832 } -member { - id: 0xce703a83 - name: "ref" - type_id: 0x6f1daf87 - offset: 1920 -} member { id: 0xce703b5d name: "ref" type_id: 0x6f1daf87 } +member { + id: 0xce703cdf + name: "ref" + type_id: 0x6f1daf87 + offset: 2048 +} member { id: 0xce703d24 name: "ref" @@ -166184,10 +166178,10 @@ member { offset: 5120 } member { - id: 0x9c3f558c + id: 0x9c3f5b5b name: "req_rate" type_id: 0x33756485 - offset: 640 + offset: 768 } member { id: 0xfa136415 @@ -170047,10 +170041,10 @@ member { offset: 34816 } member { - id: 0x1070b82a + id: 0x1070ba03 name: "rpm_enabled" type_id: 0x6d7f5ff6 - offset: 968 + offset: 1096 } member { id: 0x175e0853 @@ -170058,6 +170052,12 @@ member { type_id: 0xa69e469b offset: 704 } +member { + id: 0x6796571d + name: "rpm_node" + type_id: 0x49a73111 + offset: 320 +} member { id: 0x82287aae name: "rpm_status" @@ -224659,42 +224659,43 @@ struct_union { kind: STRUCT name: "clk_core" definition { - bytesize: 248 + bytesize: 264 member_id: 0x0de57ce8 member_id: 0xafb61a8a member_id: 0x97cde891 member_id: 0x4a965a99 member_id: 0xce3bba18 - member_id: 0xf7c3f586 - member_id: 0x7227c0c4 - member_id: 0x0a0fa3a3 - member_id: 0x5f13cddc - member_id: 0xde66b021 - member_id: 0x5fe12ee8 - member_id: 0x9c3f558c - member_id: 0x4c8e95ba - member_id: 0x7c11dfa4 - member_id: 0x4c568493 - member_id: 0x2d5bfb9c - member_id: 0x6ba69c1b - member_id: 0x1070b82a - member_id: 0xfbc6aa01 - member_id: 0x1068dd68 - member_id: 0x7ad6105e - member_id: 0x088ec4c0 - member_id: 0x6fd5241a - member_id: 0x78e29322 - member_id: 0x5c4b3279 - member_id: 0x6b47fdc8 - member_id: 0x39ef5b6d - member_id: 0xd5562c37 - member_id: 0x25f4197a - member_id: 0x19578fc7 - member_id: 0x08f4a5ea - member_id: 0xbbe14bb7 - member_id: 0xf3efeb8b - member_id: 0xe0038b0c - member_id: 0xce703a83 + member_id: 0x6796571d + member_id: 0xf7c3f536 + member_id: 0x7227ca4e + member_id: 0x0a0fada6 + member_id: 0x5f13c0bd + member_id: 0xde66bd46 + member_id: 0x5fe125a7 + member_id: 0x9c3f5b5b + member_id: 0x4c8e943d + member_id: 0x7c11ddac + member_id: 0x4c568fda + member_id: 0x2d5bf625 + member_id: 0x6ba6999e + member_id: 0x1070ba03 + member_id: 0xfbc6a07e + member_id: 0x1068d085 + member_id: 0x7ad61d96 + member_id: 0x088ecd03 + member_id: 0x6fd52f10 + member_id: 0x78e29c90 + member_id: 0x5c4b3ad8 + member_id: 0x6b47f58f + member_id: 0x39ef5fe3 + member_id: 0xd5562760 + member_id: 0x25f4116e + member_id: 0x1957812f + member_id: 0x08f4ab77 + member_id: 0xbbe14ee8 + member_id: 0xf3efe7b3 + member_id: 0xe0038ca1 + member_id: 0xce703cdf } } struct_union { From f17db53dd96184f19a8e017a534a4466f6c3efa9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Jun 2024 12:26:13 +0000 Subject: [PATCH 551/553] Revert "net/mlx5e: Advertise mlx5 ethernet driver updates sk_buff md_dst for MACsec" This reverts commit ca817d44befcfdff4aab0bbc5a9c8a0c1eea6983 which is commit 39d26a8f2efcb8b5665fe7d54a7dba306a8f1dff upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: If034cf13737b592b71e4418657c631ea0e34a6c9 Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index e82ee1977486..0f8f3ce35537 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -1703,7 +1703,6 @@ static const struct macsec_ops macsec_offload_ops = { .mdo_add_secy = mlx5e_macsec_add_secy, .mdo_upd_secy = mlx5e_macsec_upd_secy, .mdo_del_secy = mlx5e_macsec_del_secy, - .rx_uses_md_dst = true, }; bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb) From 35df421fc41b3f6780320b7369ebbe3b8c546523 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Jun 2024 12:26:33 +0000 Subject: [PATCH 552/553] Revert "macsec: Detect if Rx skb is macsec-related for offloading devices that update md_dst" This reverts commit 21e042d29e8990e30a10b2e087fc06cdc9597a35 which is commit 642c984dd0e37dbaec9f87bd1211e5fac1f142bf upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I950792c74cae726b11779d62164947cfafd9c5d7 Signed-off-by: Greg Kroah-Hartman --- drivers/net/macsec.c | 44 +++++++------------------------------------- 1 file changed, 7 insertions(+), 37 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index fc8a8c09a56b..42bd1bcaa3b5 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1007,12 +1007,10 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) struct metadata_dst *md_dst; struct macsec_rxh_data *rxd; struct macsec_dev *macsec; - bool is_macsec_md_dst; rcu_read_lock(); rxd = macsec_data_rcu(skb->dev); md_dst = skb_metadata_dst(skb); - is_macsec_md_dst = md_dst && md_dst->type == METADATA_MACSEC; list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct sk_buff *nskb; @@ -1023,42 +1021,10 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) * the SecTAG, so we have to deduce which port to deliver to. */ if (macsec_is_offloaded(macsec) && netif_running(ndev)) { - const struct macsec_ops *ops; - - ops = macsec_get_ops(macsec, NULL); - - if (ops->rx_uses_md_dst && !is_macsec_md_dst) + if (md_dst && md_dst->type == METADATA_MACSEC && + (!find_rx_sc(&macsec->secy, md_dst->u.macsec_info.sci))) continue; - if (is_macsec_md_dst) { - struct macsec_rx_sc *rx_sc; - - /* All drivers that implement MACsec offload - * support using skb metadata destinations must - * indicate that they do so. - */ - DEBUG_NET_WARN_ON_ONCE(!ops->rx_uses_md_dst); - rx_sc = find_rx_sc(&macsec->secy, - md_dst->u.macsec_info.sci); - if (!rx_sc) - continue; - /* device indicated macsec offload occurred */ - skb->dev = ndev; - skb->pkt_type = PACKET_HOST; - eth_skb_pkt_type(skb, ndev); - ret = RX_HANDLER_ANOTHER; - goto out; - } - - /* This datapath is insecure because it is unable to - * enforce isolation of broadcast/multicast traffic and - * unicast traffic with promiscuous mode on the macsec - * netdev. Since the core stack has no mechanism to - * check that the hardware did indeed receive MACsec - * traffic, it is possible that the response handling - * done by the MACsec port was to a plaintext packet. - * This violates the MACsec protocol standard. - */ if (ether_addr_equal_64bits(hdr->h_dest, ndev->dev_addr)) { /* exact match, divert skb to this port */ @@ -1074,7 +1040,11 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) break; nskb->dev = ndev; - eth_skb_pkt_type(nskb, ndev); + if (ether_addr_equal_64bits(hdr->h_dest, + ndev->broadcast)) + nskb->pkt_type = PACKET_BROADCAST; + else + nskb->pkt_type = PACKET_MULTICAST; __netif_rx(nskb); } From c034535679d056565875fec24925fd21309040c5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Jun 2024 12:26:47 +0000 Subject: [PATCH 553/553] Revert "macsec: Enable devices to advertise whether they update sk_buff md_dst during offloads" This reverts commit 6536f12fe2ddaa134b9ffa47b115256bd1302a6e which is commit 475747a19316b08e856c666a20503e73d7ed67ed upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I4f7a1f587c8d75573d5a5b3a091d2d6a5cedb639 Signed-off-by: Greg Kroah-Hartman --- include/net/macsec.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/net/macsec.h b/include/net/macsec.h index 312522ad834a..5b9c61c4d3a6 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -301,7 +301,6 @@ struct macsec_ops { int (*mdo_get_tx_sa_stats)(struct macsec_context *ctx); int (*mdo_get_rx_sc_stats)(struct macsec_context *ctx); int (*mdo_get_rx_sa_stats)(struct macsec_context *ctx); - bool rx_uses_md_dst; }; void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa);