From 38433aa31b4364ab5949c16fa34ea077074d8694 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 30 Apr 2025 11:05:54 +0300 Subject: [PATCH 001/114] dm: add missing unlock on in dm_keyslot_evict() commit 650266ac4c7230c89bcd1307acf5c9c92cfa85e2 upstream. We need to call dm_put_live_table() even if dm_get_live_table() returns NULL. Fixes: 9355a9eb21a5 ("dm: support key eviction from keyslot managers of underlying devices") Cc: stable@vger.kernel.org # v5.12+ Signed-off-by: Dan Carpenter Signed-off-by: Mikulas Patocka Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-table.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 319bd10548e9..7a33da2dd64b 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1242,7 +1242,7 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile, t = dm_get_live_table(md, &srcu_idx); if (!t) - return 0; + goto put_live_table; for (unsigned int i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); @@ -1253,6 +1253,7 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile, (void *)key); } +put_live_table: dm_put_live_table(md, srcu_idx); return 0; } From 0efe9965680b009f4bf7df6315b165d6bff103b9 Mon Sep 17 00:00:00 2001 From: Wojciech Dubowik Date: Thu, 24 Apr 2025 11:59:14 +0200 Subject: [PATCH 002/114] arm64: dts: imx8mm-verdin: Link reg_usdhc2_vqmmc to usdhc2 commit 5591ce0069ddda97cdbbea596bed53e698f399c2 upstream. Define vqmmc regulator-gpio for usdhc2 with vin-supply coming from LDO5. Without this definition LDO5 will be powered down, disabling SD card after bootup. This has been introduced in commit f5aab0438ef1 ("regulator: pca9450: Fix enable register for LDO5"). Fixes: 6a57f224f734 ("arm64: dts: freescale: add initial support for verdin imx8m mini") Fixes: f5aab0438ef1 ("regulator: pca9450: Fix enable register for LDO5") Tested-by: Manuel Traut Reviewed-by: Philippe Schenker Tested-by: Francesco Dolcini Reviewed-by: Francesco Dolcini Cc: stable@vger.kernel.org Signed-off-by: Wojciech Dubowik Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- .../boot/dts/freescale/imx8mm-verdin.dtsi | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi index 6c48fa4b0d0c..6457d2c37701 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi @@ -148,6 +148,19 @@ startup-delay-us = <20000>; }; + reg_usdhc2_vqmmc: regulator-usdhc2-vqmmc { + compatible = "regulator-gpio"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usdhc2_vsel>; + gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>; + regulator-max-microvolt = <3300000>; + regulator-min-microvolt = <1800000>; + states = <1800000 0x1>, + <3300000 0x0>; + regulator-name = "PMIC_USDHC_VSELECT"; + vin-supply = <®_nvcc_sd>; + }; + reserved-memory { #address-cells = <2>; #size-cells = <2>; @@ -266,7 +279,7 @@ "SODIMM_19", "", "", - "", + "PMIC_USDHC_VSELECT", "", "", "", @@ -787,6 +800,7 @@ pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_cd>; pinctrl-3 = <&pinctrl_usdhc2_sleep>, <&pinctrl_usdhc2_cd_sleep>; vmmc-supply = <®_usdhc2_vmmc>; + vqmmc-supply = <®_usdhc2_vqmmc>; }; &wdog1 { @@ -1209,13 +1223,17 @@ ; /* SODIMM 76 */ }; + pinctrl_usdhc2_vsel: usdhc2vselgrp { + fsl,pins = + ; /* PMIC_USDHC_VSELECT */ + }; + /* * Note: Due to ERR050080 we use discrete external on-module resistors pulling-up to the * on-module +V3.3_1.8_SD (LDO5) rail and explicitly disable the internal pull-ups here. */ pinctrl_usdhc2: usdhc2grp { fsl,pins = - , , /* SODIMM 78 */ , /* SODIMM 74 */ , /* SODIMM 80 */ @@ -1226,7 +1244,6 @@ pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { fsl,pins = - , , , , @@ -1237,7 +1254,6 @@ pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { fsl,pins = - , , , , @@ -1249,7 +1265,6 @@ /* Avoid backfeeding with removed card power */ pinctrl_usdhc2_sleep: usdhc2slpgrp { fsl,pins = - , , , , From d2520dc79c2a06c0ee78f7fc5a0b4e21c06ef403 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 2 May 2025 16:13:46 +0200 Subject: [PATCH 003/114] can: mcan: m_can_class_unregister(): fix order of unregistration calls commit 0713a1b3276b98c7dafbeefef00d7bc3a9119a84 upstream. If a driver is removed, the driver framework invokes the driver's remove callback. A CAN driver's remove function calls unregister_candev(), which calls net_device_ops::ndo_stop further down in the call stack for interfaces which are in the "up" state. The removal of the module causes a warning, as can_rx_offload_del() deletes the NAPI, while it is still active, because the interface is still up. To fix the warning, first unregister the network interface, which calls net_device_ops::ndo_stop, which disables the NAPI, and then call can_rx_offload_del(). Fixes: 1be37d3b0414 ("can: m_can: fix periph RX path: use rx-offload to ensure skbs are sent from softirq context") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250502-can-rx-offload-del-v1-3-59a9b131589d@pengutronix.de Reviewed-by: Markus Schneider-Pargmann Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/m_can/m_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 2a258986eed0..ba7f7de25c85 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2125,9 +2125,9 @@ EXPORT_SYMBOL_GPL(m_can_class_register); void m_can_class_unregister(struct m_can_classdev *cdev) { + unregister_candev(cdev->net); if (cdev->is_peripheral) can_rx_offload_del(&cdev->offload); - unregister_candev(cdev->net); } EXPORT_SYMBOL_GPL(m_can_class_unregister); From 73dde269a1a43e6b1aa92eba13ad2df58bfdd38e Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Thu, 24 Apr 2025 18:01:42 +0530 Subject: [PATCH 004/114] wifi: cfg80211: fix out-of-bounds access during multi-link element defragmentation commit 023c1f2f0609218103cbcb48e0104b144d4a16dc upstream. Currently during the multi-link element defragmentation process, the multi-link element length added to the total IEs length when calculating the length of remaining IEs after the multi-link element in cfg80211_defrag_mle(). This could lead to out-of-bounds access if the multi-link element or its corresponding fragment elements are the last elements in the IEs buffer. To address this issue, correctly calculate the remaining IEs length by deducting the multi-link element end offset from total IEs end offset. Cc: stable@vger.kernel.org Fixes: 2481b5da9c6b ("wifi: cfg80211: handle BSS data contained in ML probe responses") Signed-off-by: Veerendranath Jakkam Link: https://patch.msgid.link/20250424-fix_mle_defragmentation_oob_access-v1-1-84412a1743fa@quicinc.com Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index ce622a287abc..6db8c9a2a7a2 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2511,7 +2511,7 @@ cfg80211_defrag_mle(const struct element *mle, const u8 *ie, size_t ielen, /* Required length for first defragmentation */ buf_len = mle->datalen - 1; for_each_element(elem, mle->data + mle->datalen, - ielen - sizeof(*mle) + mle->datalen) { + ie + ielen - mle->data - mle->datalen) { if (elem->id != WLAN_EID_FRAGMENT) break; From 3482037d0f66cc4ce40e0332fa94ac73b9915de1 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 2 May 2025 16:13:44 +0200 Subject: [PATCH 005/114] can: mcp251xfd: mcp251xfd_remove(): fix order of unregistration calls commit 84f5eb833f53ae192baed4cfb8d9eaab43481fc9 upstream. If a driver is removed, the driver framework invokes the driver's remove callback. A CAN driver's remove function calls unregister_candev(), which calls net_device_ops::ndo_stop further down in the call stack for interfaces which are in the "up" state. With the mcp251xfd driver the removal of the module causes the following warning: | WARNING: CPU: 0 PID: 352 at net/core/dev.c:7342 __netif_napi_del_locked+0xc8/0xd8 as can_rx_offload_del() deletes the NAPI, while it is still active, because the interface is still up. To fix the warning, first unregister the network interface, which calls net_device_ops::ndo_stop, which disables the NAPI, and then call can_rx_offload_del(). Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250502-can-rx-offload-del-v1-1-59a9b131589d@pengutronix.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 6fecfe4cd080..a48996265172 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2179,8 +2179,8 @@ static void mcp251xfd_remove(struct spi_device *spi) struct mcp251xfd_priv *priv = spi_get_drvdata(spi); struct net_device *ndev = priv->ndev; - can_rx_offload_del(&priv->offload); mcp251xfd_unregister(priv); + can_rx_offload_del(&priv->offload); spi->max_speed_hz = priv->spi_max_speed_hz_orig; free_candev(ndev); } From 6ee551672c8cf36108b0cfba92ec0c7c28ac3439 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 30 Apr 2025 11:18:28 +0900 Subject: [PATCH 006/114] ksmbd: prevent rename with empty string commit 53e3e5babc0963a92d856a5ec0ce92c59f54bc12 upstream. Client can send empty newname string to ksmbd server. It will cause a kernel oops from d_alloc. This patch return the error when attempting to rename a file or directory with an empty new name string. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb2pdu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 13750a5e5ba0..9bd817427a34 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -632,6 +632,11 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls) return name; } + if (*name == '\0') { + kfree(name); + return ERR_PTR(-EINVAL); + } + if (*name == '\\') { pr_err("not allow directory name included leading slash\n"); kfree(name); From 04c8a38c60346bb5a7c49b276de7233f703ce9cb Mon Sep 17 00:00:00 2001 From: Norbert Szetei Date: Fri, 2 May 2025 08:21:58 +0900 Subject: [PATCH 007/114] ksmbd: prevent out-of-bounds stream writes by validating *pos commit 0ca6df4f40cf4c32487944aaf48319cb6c25accc upstream. ksmbd_vfs_stream_write() did not validate whether the write offset (*pos) was within the bounds of the existing stream data length (v_len). If *pos was greater than or equal to v_len, this could lead to an out-of-bounds memory write. This patch adds a check to ensure *pos is less than v_len before proceeding. If the condition fails, -EINVAL is returned. Cc: stable@vger.kernel.org Signed-off-by: Norbert Szetei Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/vfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index fa5b7e63eb83..f6616d687365 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -443,6 +443,13 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, goto out; } + if (v_len <= *pos) { + pr_err("stream write position %lld is out of bounds (stream length: %zd)\n", + *pos, v_len); + err = -EINVAL; + goto out; + } + if (v_len < size) { wbuf = kvzalloc(size, GFP_KERNEL); if (!wbuf) { From fec1f9e9a650e8e7011330a085c77e7bf2a08ea9 Mon Sep 17 00:00:00 2001 From: Sean Heelan Date: Tue, 6 May 2025 22:04:52 +0900 Subject: [PATCH 008/114] ksmbd: Fix UAF in __close_file_table_ids commit 36991c1ccde2d5a521577c448ffe07fcccfe104d upstream. A use-after-free is possible if one thread destroys the file via __ksmbd_close_fd while another thread holds a reference to it. The existing checks on fp->refcount are not sufficient to prevent this. The fix takes ft->lock around the section which removes the file from the file table. This prevents two threads acquiring the same file pointer via __close_file_table_ids, as well as the other functions which retrieve a file from the IDR and which already use this same lock. Cc: stable@vger.kernel.org Signed-off-by: Sean Heelan Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/vfs_cache.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 271a23abc82f..002f0864abee 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -644,21 +644,40 @@ __close_file_table_ids(struct ksmbd_file_table *ft, bool (*skip)(struct ksmbd_tree_connect *tcon, struct ksmbd_file *fp)) { - unsigned int id; - struct ksmbd_file *fp; - int num = 0; + struct ksmbd_file *fp; + unsigned int id = 0; + int num = 0; - idr_for_each_entry(ft->idr, fp, id) { - if (skip(tcon, fp)) + while (1) { + write_lock(&ft->lock); + fp = idr_get_next(ft->idr, &id); + if (!fp) { + write_unlock(&ft->lock); + break; + } + + if (skip(tcon, fp) || + !atomic_dec_and_test(&fp->refcount)) { + id++; + write_unlock(&ft->lock); continue; + } set_close_state_blocked_works(fp); + idr_remove(ft->idr, fp->volatile_id); + fp->volatile_id = KSMBD_NO_FID; + write_unlock(&ft->lock); + + down_write(&fp->f_ci->m_lock); + list_del_init(&fp->node); + up_write(&fp->f_ci->m_lock); - if (!atomic_dec_and_test(&fp->refcount)) - continue; __ksmbd_close_fd(ft, fp); + num++; + id++; } + return num; } From 0236742bd959332181c1fcc41a05b7b709180501 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Tue, 6 May 2025 16:28:54 +0200 Subject: [PATCH 009/114] openvswitch: Fix unsafe attribute parsing in output_userspace() commit 6beb6835c1fbb3f676aebb51a5fee6b77fed9308 upstream. This patch replaces the manual Netlink attribute iteration in output_userspace() with nla_for_each_nested(), which ensures that only well-formed attributes are processed. Fixes: ccb1352e76cf ("net: Add Open vSwitch kernel components.") Signed-off-by: Eelco Chaudron Acked-by: Ilya Maximets Acked-by: Aaron Conole Link: https://patch.msgid.link/0bd65949df61591d9171c0dc13e42cea8941da10.1746541734.git.echaudro@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/actions.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 6c5afb4ad67b..10c646b32b9d 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -959,8 +959,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.mru = OVS_CB(skb)->mru; - for (a = nla_data(attr), rem = nla_len(attr); rem > 0; - a = nla_next(a, &rem)) { + nla_for_each_nested(a, attr, rem) { switch (nla_type(a)) { case OVS_USERSPACE_ATTR_USERDATA: upcall.userdata = a; From af9e2d4732a548db8f6f5a90c2c20a789a3d7240 Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Wed, 30 Apr 2025 11:16:23 +0800 Subject: [PATCH 010/114] ksmbd: fix memory leak in parse_lease_state() [ Upstream commit eb4447bcce915b43b691123118893fca4f372a8f ] The previous patch that added bounds check for create lease context introduced a memory leak. When the bounds check fails, the function returns NULL without freeing the previously allocated lease_ctx_info structure. This patch fixes the issue by adding kfree(lreq) before returning NULL in both boundary check cases. Fixes: bab703ed8472 ("ksmbd: add bounds check for create lease context") Signed-off-by: Wang Zhaolong Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/oplock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 5a5277b4b53b..72294764d4c2 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1496,7 +1496,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req) if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) < sizeof(struct create_lease_v2) - 4) - return NULL; + goto err_out; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; @@ -1512,7 +1512,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req) if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) < sizeof(struct create_lease)) - return NULL; + goto err_out; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; @@ -1521,6 +1521,9 @@ struct lease_ctx_info *parse_lease_state(void *open_req) lreq->version = 1; } return lreq; +err_out: + kfree(lreq); + return NULL; } /** From 31ff70ad39485698cf779f2078132d80b57f6c07 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 28 Apr 2025 16:29:54 -0700 Subject: [PATCH 011/114] sch_htb: make htb_deactivate() idempotent [ Upstream commit 3769478610135e82b262640252d90f6efb05be71 ] Alan reported a NULL pointer dereference in htb_next_rb_node() after we made htb_qlen_notify() idempotent. It turns out in the following case it introduced some regression: htb_dequeue_tree(): |-> fq_codel_dequeue() |-> qdisc_tree_reduce_backlog() |-> htb_qlen_notify() |-> htb_deactivate() |-> htb_next_rb_node() |-> htb_deactivate() For htb_next_rb_node(), after calling the 1st htb_deactivate(), the clprio[prio]->ptr could be already set to NULL, which means htb_next_rb_node() is vulnerable here. For htb_deactivate(), although we checked qlen before calling it, in case of qlen==0 after qdisc_tree_reduce_backlog(), we may call it again which triggers the warning inside. To fix the issues here, we need to: 1) Make htb_deactivate() idempotent, that is, simply return if we already call it before. 2) Make htb_next_rb_node() safe against ptr==NULL. Many thanks to Alan for testing and for the reproducer. Fixes: 5ba8b837b522 ("sch_htb: make htb_qlen_notify() idempotent") Reported-by: Alan J. Wylie Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250428232955.1740419-2-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_htb.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 9a3f7ea80b34..716da8c6b3de 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -348,7 +348,8 @@ static void htb_add_to_wait_tree(struct htb_sched *q, */ static inline void htb_next_rb_node(struct rb_node **n) { - *n = rb_next(*n); + if (*n) + *n = rb_next(*n); } /** @@ -609,8 +610,8 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) */ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) { - WARN_ON(!cl->prio_activity); - + if (!cl->prio_activity) + return; htb_deactivate_prios(q, cl); cl->prio_activity = 0; } @@ -1485,8 +1486,6 @@ static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct htb_class *cl = (struct htb_class *)arg; - if (!cl->prio_activity) - return; htb_deactivate(qdisc_priv(sch), cl); } @@ -1740,8 +1739,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, if (cl->parent) cl->parent->children--; - if (cl->prio_activity) - htb_deactivate(q, cl); + htb_deactivate(q, cl); if (cl->cmode != HTB_CAN_SEND) htb_safe_rb_erase(&cl->pq_node, @@ -1949,8 +1947,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* turn parent into inner node */ qdisc_purge_queue(parent->leaf.q); parent_qdisc = parent->leaf.q; - if (parent->prio_activity) - htb_deactivate(q, parent); + htb_deactivate(q, parent); /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { From fa1fe9f3ddb916f22fc0a2678ca0a5fb8750ec70 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 3 May 2025 00:57:52 +0200 Subject: [PATCH 012/114] gre: Fix again IPv6 link-local address generation. [ Upstream commit 3e6a0243ff002ddbd7ee18a8974ae61d2e6ed00d ] Use addrconf_addr_gen() to generate IPv6 link-local addresses on GRE devices in most cases and fall back to using add_v4_addrs() only in case the GRE configuration is incompatible with addrconf_addr_gen(). GRE used to use addrconf_addr_gen() until commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") restricted this use to gretap and ip6gretap devices, and created add_v4_addrs() (borrowed from SIT) for non-Ethernet GRE ones. The original problem came when commit 9af28511be10 ("addrconf: refuse isatap eui64 for INADDR_ANY") made __ipv6_isatap_ifid() fail when its addr parameter was 0. The commit says that this would create an invalid address, however, I couldn't find any RFC saying that the generated interface identifier would be wrong. Anyway, since gre over IPv4 devices pass their local tunnel address to __ipv6_isatap_ifid(), that commit broke their IPv6 link-local address generation when the local address was unspecified. Then commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") tried to fix that case by defining add_v4_addrs() and calling it to generate the IPv6 link-local address instead of using addrconf_addr_gen() (apart for gretap and ip6gretap devices, which would still use the regular addrconf_addr_gen(), since they have a MAC address). That broke several use cases because add_v4_addrs() isn't properly integrated into the rest of IPv6 Neighbor Discovery code. Several of these shortcomings have been fixed over time, but add_v4_addrs() remains broken on several aspects. In particular, it doesn't send any Router Sollicitations, so the SLAAC process doesn't start until the interface receives a Router Advertisement. Also, add_v4_addrs() mostly ignores the address generation mode of the interface (/proc/sys/net/ipv6/conf/*/addr_gen_mode), thus breaking the IN6_ADDR_GEN_MODE_RANDOM and IN6_ADDR_GEN_MODE_STABLE_PRIVACY cases. Fix the situation by using add_v4_addrs() only in the specific scenario where the normal method would fail. That is, for interfaces that have all of the following characteristics: * run over IPv4, * transport IP packets directly, not Ethernet (that is, not gretap interfaces), * tunnel endpoint is INADDR_ANY (that is, 0), * device address generation mode is EUI64. In all other cases, revert back to the regular addrconf_addr_gen(). Also, remove the special case for ip6gre interfaces in add_v4_addrs(), since ip6gre devices now always use addrconf_addr_gen() instead. Note: This patch was originally applied as commit 183185a18ff9 ("gre: Fix IPv6 link-local address generation."). However, it was then reverted by commit fc486c2d060f ("Revert "gre: Fix IPv6 link-local address generation."") because it uncovered another bug that ended up breaking net/forwarding/ip6gre_custom_multipath_hash.sh. That other bug has now been fixed by commit 4d0ab3a6885e ("ipv6: Start path selection from the first nexthop"). Therefore we can now revive this GRE patch (no changes since original commit 183185a18ff9 ("gre: Fix IPv6 link-local address generation."). Fixes: e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/a88cc5c4811af36007645d610c95102dccb360a6.1746225214.git.gnault@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/addrconf.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index bb9add46e382..231fa4dc6cde 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3189,16 +3189,13 @@ static void add_v4_addrs(struct inet6_dev *idev) struct in6_addr addr; struct net_device *dev; struct net *net = dev_net(idev->dev); - int scope, plen, offset = 0; + int scope, plen; u32 pflags = 0; ASSERT_RTNL(); memset(&addr, 0, sizeof(struct in6_addr)); - /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */ - if (idev->dev->addr_len == sizeof(struct in6_addr)) - offset = sizeof(struct in6_addr) - 4; - memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); + memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) { scope = IPV6_ADDR_COMPATv4; @@ -3508,7 +3505,13 @@ static void addrconf_gre_config(struct net_device *dev) return; } - if (dev->type == ARPHRD_ETHER) { + /* Generate the IPv6 link-local address using addrconf_addr_gen(), + * unless we have an IPv4 GRE device not bound to an IP address and + * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this + * case). Such devices fall back to add_v4_addrs() instead. + */ + if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 && + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) { addrconf_addr_gen(idev, true); return; } From 370635397b623f5519a3b01c58954b447f16555c Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 4 Sep 2024 17:47:45 +0200 Subject: [PATCH 013/114] netdevice: add netdev_tx_reset_subqueue() shorthand [ Upstream commit 3dc95a3edd0a86b4a59670b3fafcc64c7d83e2e7 ] Add a shorthand similar to other net*_subqueue() helpers for resetting the queue by its index w/o obtaining &netdev_tx_queue beforehand manually. Reviewed-by: Przemek Kitszel Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen Stable-dep-of: 4db6c75124d8 ("net: ethernet: mtk_eth_soc: reset all TX queues on DMA free") Signed-off-by: Sasha Levin --- include/linux/netdevice.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 337a9d1c558f..0b0a172337db 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3614,6 +3614,17 @@ static inline void netdev_tx_reset_queue(struct netdev_queue *q) #endif } +/** + * netdev_tx_reset_subqueue - reset the BQL stats and state of a netdev queue + * @dev: network device + * @qid: stack index of the queue to reset + */ +static inline void netdev_tx_reset_subqueue(const struct net_device *dev, + u32 qid) +{ + netdev_tx_reset_queue(netdev_get_tx_queue(dev, qid)); +} + /** * netdev_reset_queue - reset the packets and bytes count of a network device * @dev_queue: network device @@ -3623,7 +3634,7 @@ static inline void netdev_tx_reset_queue(struct netdev_queue *q) */ static inline void netdev_reset_queue(struct net_device *dev_queue) { - netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0)); + netdev_tx_reset_subqueue(dev_queue, 0); } /** From 68f29bb97a0e0519156664cdc23e8b1f129e3254 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 5 May 2025 02:07:32 +0100 Subject: [PATCH 014/114] net: ethernet: mtk_eth_soc: reset all TX queues on DMA free [ Upstream commit 4db6c75124d871fbabf8243f947d34cc7e0697fc ] The purpose of resetting the TX queue is to reset the byte and packet count as well as to clear the software flow control XOFF bit. MediaTek developers pointed out that netdev_reset_queue would only resets queue 0 of the network device. Queues that are not reset may cause unexpected issues. Packets may stop being sent after reset and "transmit timeout" log may be displayed. Import fix from MediaTek's SDK to resolve this issue. Link: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/319c0d9905579a46dc448579f892f364f1f84818 Fixes: f63959c7eec31 ("net: ethernet: mtk_eth_soc: implement multi-queue support for per-port queues") Signed-off-by: Daniel Golle Link: https://patch.msgid.link/c9ff9adceac4f152239a0f65c397f13547639175.1746406763.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index d2ec8f642c2f..c6ccfbd42265 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -3117,11 +3117,19 @@ static int mtk_dma_init(struct mtk_eth *eth) static void mtk_dma_free(struct mtk_eth *eth) { const struct mtk_soc_data *soc = eth->soc; - int i; + int i, j, txqs = 1; + + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) + txqs = MTK_QDMA_NUM_QUEUES; + + for (i = 0; i < MTK_MAX_DEVS; i++) { + if (!eth->netdev[i]) + continue; + + for (j = 0; j < txqs; j++) + netdev_tx_reset_subqueue(eth->netdev[i], j); + } - for (i = 0; i < MTK_MAX_DEVS; i++) - if (eth->netdev[i]) - netdev_reset_queue(eth->netdev[i]); if (!MTK_HAS_CAPS(soc->caps, MTK_SRAM) && eth->scratch_ring) { dma_free_coherent(eth->dma_dev, MTK_QDMA_RING_SIZE * soc->txrx.txd_size, From 3455e6394fef9d5cefd3d3180469ed651942a5e8 Mon Sep 17 00:00:00 2001 From: Kelsey Maes Date: Wed, 30 Apr 2025 09:15:01 -0700 Subject: [PATCH 015/114] can: mcp251xfd: fix TDC setting for low data bit rates [ Upstream commit 5e1663810e11c64956aa7e280cf74b2f3284d816 ] The TDC is currently hardcoded enabled. This means that even for lower CAN-FD data bitrates (with a DBRP (data bitrate prescaler) > 2) a TDC is configured. This leads to a bus-off condition. ISO 11898-1 section 11.3.3 says "Transmitter delay compensation" (TDC) is only applicable if DBRP is 1 or 2. To fix the problem, switch the driver to use the TDC calculation provided by the CAN driver framework (which respects ISO 11898-1 section 11.3.3). This has the positive side effect that userspace can control TDC as needed. Demonstration of the feature in action: | $ ip link set can0 up type can bitrate 125000 dbitrate 500000 fd on | $ ip -details link show can0 | 3: can0: mtu 72 qdisc pfifo_fast state UP mode DEFAULT group default qlen 10 | link/can promiscuity 0 allmulti 0 minmtu 0 maxmtu 0 | can state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0 | bitrate 125000 sample-point 0.875 | tq 50 prop-seg 69 phase-seg1 70 phase-seg2 20 sjw 10 brp 2 | mcp251xfd: tseg1 2..256 tseg2 1..128 sjw 1..128 brp 1..256 brp_inc 1 | dbitrate 500000 dsample-point 0.875 | dtq 125 dprop-seg 6 dphase-seg1 7 dphase-seg2 2 dsjw 1 dbrp 5 | mcp251xfd: dtseg1 1..32 dtseg2 1..16 dsjw 1..16 dbrp 1..256 dbrp_inc 1 | tdcv 0..63 tdco 0..63 | clock 40000000 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 tso_max_size 65536 tso_max_segs 65535 gro_max_size 65536 parentbus spi parentdev spi0.0 | $ ip link set can0 up type can bitrate 1000000 dbitrate 4000000 fd on | $ ip -details link show can0 | 3: can0: mtu 72 qdisc pfifo_fast state UP mode DEFAULT group default qlen 10 | link/can promiscuity 0 allmulti 0 minmtu 0 maxmtu 0 | can state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0 | bitrate 1000000 sample-point 0.750 | tq 25 prop-seg 14 phase-seg1 15 phase-seg2 10 sjw 5 brp 1 | mcp251xfd: tseg1 2..256 tseg2 1..128 sjw 1..128 brp 1..256 brp_inc 1 | dbitrate 4000000 dsample-point 0.700 | dtq 25 dprop-seg 3 dphase-seg1 3 dphase-seg2 3 dsjw 1 dbrp 1 | tdco 7 | mcp251xfd: dtseg1 1..32 dtseg2 1..16 dsjw 1..16 dbrp 1..256 dbrp_inc 1 | tdcv 0..63 tdco 0..63 | clock 40000000 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 tso_max_size 65536 tso_max_segs 65535 gro_max_size 65536 parentbus spi parentdev spi0.0 There has been some confusion about the MCP2518FD using a relative or absolute TDCO due to the datasheet specifying a range of [-64,63]. I have a custom board with a 40 MHz clock and an estimated loop delay of 100 to 216 ns. During testing at a data bit rate of 4 Mbit/s I found that using can_get_relative_tdco() resulted in bus-off errors. The final TDCO value was 1 which corresponds to a 10% SSP in an absolute configuration. This behavior is expected if the TDCO value is really absolute and not relative. Using priv->can.tdc.tdco instead results in a final TDCO of 8, setting the SSP at exactly 80%. This configuration works. The automatic, manual, and off TDC modes were tested at speeds up to, and including, 8 Mbit/s on real hardware and behave as expected. Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Reported-by: Kelsey Maes Closes: https://lore.kernel.org/all/C2121586-C87F-4B23-A933-845362C29CA1@vpprocess.com Reviewed-by: Vincent Mailhol Signed-off-by: Kelsey Maes Link: https://patch.msgid.link/20250430161501.79370-1-kelsey@vpprocess.com [mkl: add comment] Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- .../net/can/spi/mcp251xfd/mcp251xfd-core.c | 40 +++++++++++++++---- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index a48996265172..21ae3a89924e 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -75,6 +75,24 @@ static const struct can_bittiming_const mcp251xfd_data_bittiming_const = { .brp_inc = 1, }; +/* The datasheet of the mcp2518fd (DS20006027B) specifies a range of + * [-64,63] for TDCO, indicating a relative TDCO. + * + * Manual tests have shown, that using a relative TDCO configuration + * results in bus off, while an absolute configuration works. + * + * For TDCO use the max value (63) from the data sheet, but 0 as the + * minimum. + */ +static const struct can_tdc_const mcp251xfd_tdc_const = { + .tdcv_min = 0, + .tdcv_max = 63, + .tdco_min = 0, + .tdco_max = 63, + .tdcf_min = 0, + .tdcf_max = 0, +}; + static const char *__mcp251xfd_get_model_str(enum mcp251xfd_model model) { switch (model) { @@ -510,8 +528,7 @@ static int mcp251xfd_set_bittiming(const struct mcp251xfd_priv *priv) { const struct can_bittiming *bt = &priv->can.bittiming; const struct can_bittiming *dbt = &priv->can.data_bittiming; - u32 val = 0; - s8 tdco; + u32 tdcmod, val = 0; int err; /* CAN Control Register @@ -575,11 +592,16 @@ static int mcp251xfd_set_bittiming(const struct mcp251xfd_priv *priv) return err; /* Transmitter Delay Compensation */ - tdco = clamp_t(int, dbt->brp * (dbt->prop_seg + dbt->phase_seg1), - -64, 63); - val = FIELD_PREP(MCP251XFD_REG_TDC_TDCMOD_MASK, - MCP251XFD_REG_TDC_TDCMOD_AUTO) | - FIELD_PREP(MCP251XFD_REG_TDC_TDCO_MASK, tdco); + if (priv->can.ctrlmode & CAN_CTRLMODE_TDC_AUTO) + tdcmod = MCP251XFD_REG_TDC_TDCMOD_AUTO; + else if (priv->can.ctrlmode & CAN_CTRLMODE_TDC_MANUAL) + tdcmod = MCP251XFD_REG_TDC_TDCMOD_MANUAL; + else + tdcmod = MCP251XFD_REG_TDC_TDCMOD_DISABLED; + + val = FIELD_PREP(MCP251XFD_REG_TDC_TDCMOD_MASK, tdcmod) | + FIELD_PREP(MCP251XFD_REG_TDC_TDCV_MASK, priv->can.tdc.tdcv) | + FIELD_PREP(MCP251XFD_REG_TDC_TDCO_MASK, priv->can.tdc.tdco); return regmap_write(priv->map_reg, MCP251XFD_REG_TDC, val); } @@ -2083,10 +2105,12 @@ static int mcp251xfd_probe(struct spi_device *spi) priv->can.do_get_berr_counter = mcp251xfd_get_berr_counter; priv->can.bittiming_const = &mcp251xfd_bittiming_const; priv->can.data_bittiming_const = &mcp251xfd_data_bittiming_const; + priv->can.tdc_const = &mcp251xfd_tdc_const; priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_BERR_REPORTING | CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO | - CAN_CTRLMODE_CC_LEN8_DLC; + CAN_CTRLMODE_CC_LEN8_DLC | CAN_CTRLMODE_TDC_AUTO | + CAN_CTRLMODE_TDC_MANUAL; set_bit(MCP251XFD_FLAGS_DOWN, priv->flags); priv->ndev = ndev; priv->spi = spi; From 42b7a7c962b2e65676fdd8acda4bc78349ce0ef0 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 29 Apr 2025 09:05:55 +0200 Subject: [PATCH 016/114] can: gw: fix RCU/BH usage in cgw_create_job() [ Upstream commit 511e64e13d8cc72853275832e3f372607466c18c ] As reported by Sebastian Andrzej Siewior the use of local_bh_disable() is only feasible in uni processor systems to update the modification rules. The usual use-case to update the modification rules is to update the data of the modifications but not the modification types (AND/OR/XOR/SET) or the checksum functions itself. To omit additional memory allocations to maintain fast modification switching times, the modification description space is doubled at gw-job creation time so that only the reference to the active modification description is changed under rcu protection. Rename cgw_job::mod to cf_mod and make it a RCU pointer. Allocate in cgw_create_job() and free it together with cgw_job in cgw_job_free_rcu(). Update all users to dereference cgw_job::cf_mod with a RCU accessor and if possible once. [bigeasy: Replace mod1/mod2 from the Oliver's original patch with dynamic allocation, use RCU annotation and accessor] Reported-by: Sebastian Andrzej Siewior Closes: https://lore.kernel.org/linux-can/20231031112349.y0aLoBrz@linutronix.de/ Fixes: dd895d7f21b2 ("can: cangw: introduce optional uid to reference created routing jobs") Tested-by: Oliver Hartkopp Signed-off-by: Oliver Hartkopp Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250429070555.cs-7b_eZ@linutronix.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- net/can/gw.c | 151 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 91 insertions(+), 60 deletions(-) diff --git a/net/can/gw.c b/net/can/gw.c index 37528826935e..e65500c52bf5 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -130,7 +130,7 @@ struct cgw_job { u32 handled_frames; u32 dropped_frames; u32 deleted_frames; - struct cf_mod mod; + struct cf_mod __rcu *cf_mod; union { /* CAN frame data source */ struct net_device *dev; @@ -459,6 +459,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) struct cgw_job *gwj = (struct cgw_job *)data; struct canfd_frame *cf; struct sk_buff *nskb; + struct cf_mod *mod; int modidx = 0; /* process strictly Classic CAN or CAN FD frames */ @@ -506,7 +507,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) * When there is at least one modification function activated, * we need to copy the skb as we want to modify skb->data. */ - if (gwj->mod.modfunc[0]) + mod = rcu_dereference(gwj->cf_mod); + if (mod->modfunc[0]) nskb = skb_copy(skb, GFP_ATOMIC); else nskb = skb_clone(skb, GFP_ATOMIC); @@ -529,8 +531,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) cf = (struct canfd_frame *)nskb->data; /* perform preprocessed modification functions if there are any */ - while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) - (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod); + while (modidx < MAX_MODFUNCTIONS && mod->modfunc[modidx]) + (*mod->modfunc[modidx++])(cf, mod); /* Has the CAN frame been modified? */ if (modidx) { @@ -546,11 +548,11 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) } /* check for checksum updates */ - if (gwj->mod.csumfunc.crc8) - (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + if (mod->csumfunc.crc8) + (*mod->csumfunc.crc8)(cf, &mod->csum.crc8); - if (gwj->mod.csumfunc.xor) - (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); + if (mod->csumfunc.xor) + (*mod->csumfunc.xor)(cf, &mod->csum.xor); } /* clear the skb timestamp if not configured the other way */ @@ -581,9 +583,20 @@ static void cgw_job_free_rcu(struct rcu_head *rcu_head) { struct cgw_job *gwj = container_of(rcu_head, struct cgw_job, rcu); + /* cgw_job::cf_mod is always accessed from the same cgw_job object within + * the same RCU read section. Once cgw_job is scheduled for removal, + * cf_mod can also be removed without mandating an additional grace period. + */ + kfree(rcu_access_pointer(gwj->cf_mod)); kmem_cache_free(cgw_cache, gwj); } +/* Return cgw_job::cf_mod with RTNL protected section */ +static struct cf_mod *cgw_job_cf_mod(struct cgw_job *gwj) +{ + return rcu_dereference_protected(gwj->cf_mod, rtnl_is_locked()); +} + static int cgw_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { @@ -616,6 +629,7 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, { struct rtcanmsg *rtcan; struct nlmsghdr *nlh; + struct cf_mod *mod; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtcan), flags); if (!nlh) @@ -650,82 +664,83 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } + mod = cgw_job_cf_mod(gwj); if (gwj->flags & CGW_FLAGS_CAN_FD) { struct cgw_fdframe_mod mb; - if (gwj->mod.modtype.and) { - memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.and; + if (mod->modtype.and) { + memcpy(&mb.cf, &mod->modframe.and, sizeof(mb.cf)); + mb.modtype = mod->modtype.and; if (nla_put(skb, CGW_FDMOD_AND, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.or) { - memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.or; + if (mod->modtype.or) { + memcpy(&mb.cf, &mod->modframe.or, sizeof(mb.cf)); + mb.modtype = mod->modtype.or; if (nla_put(skb, CGW_FDMOD_OR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.xor) { - memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.xor; + if (mod->modtype.xor) { + memcpy(&mb.cf, &mod->modframe.xor, sizeof(mb.cf)); + mb.modtype = mod->modtype.xor; if (nla_put(skb, CGW_FDMOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.set) { - memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.set; + if (mod->modtype.set) { + memcpy(&mb.cf, &mod->modframe.set, sizeof(mb.cf)); + mb.modtype = mod->modtype.set; if (nla_put(skb, CGW_FDMOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } else { struct cgw_frame_mod mb; - if (gwj->mod.modtype.and) { - memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.and; + if (mod->modtype.and) { + memcpy(&mb.cf, &mod->modframe.and, sizeof(mb.cf)); + mb.modtype = mod->modtype.and; if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.or) { - memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.or; + if (mod->modtype.or) { + memcpy(&mb.cf, &mod->modframe.or, sizeof(mb.cf)); + mb.modtype = mod->modtype.or; if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.xor) { - memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.xor; + if (mod->modtype.xor) { + memcpy(&mb.cf, &mod->modframe.xor, sizeof(mb.cf)); + mb.modtype = mod->modtype.xor; if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.set) { - memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.set; + if (mod->modtype.set) { + memcpy(&mb.cf, &mod->modframe.set, sizeof(mb.cf)); + mb.modtype = mod->modtype.set; if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } - if (gwj->mod.uid) { - if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0) + if (mod->uid) { + if (nla_put_u32(skb, CGW_MOD_UID, mod->uid) < 0) goto cancel; } - if (gwj->mod.csumfunc.crc8) { + if (mod->csumfunc.crc8) { if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN, - &gwj->mod.csum.crc8) < 0) + &mod->csum.crc8) < 0) goto cancel; } - if (gwj->mod.csumfunc.xor) { + if (mod->csumfunc.xor) { if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN, - &gwj->mod.csum.xor) < 0) + &mod->csum.xor) < 0) goto cancel; } @@ -1059,7 +1074,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct rtcanmsg *r; struct cgw_job *gwj; - struct cf_mod mod; + struct cf_mod *mod; struct can_can_gw ccgw; u8 limhops = 0; int err = 0; @@ -1078,37 +1093,48 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, if (r->gwtype != CGW_TYPE_CAN_CAN) return -EINVAL; - err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); - if (err < 0) - return err; + mod = kmalloc(sizeof(*mod), GFP_KERNEL); + if (!mod) + return -ENOMEM; - if (mod.uid) { + err = cgw_parse_attr(nlh, mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); + if (err < 0) + goto out_free_cf; + + if (mod->uid) { ASSERT_RTNL(); /* check for updating an existing job with identical uid */ hlist_for_each_entry(gwj, &net->can.cgw_list, list) { - if (gwj->mod.uid != mod.uid) + struct cf_mod *old_cf; + + old_cf = cgw_job_cf_mod(gwj); + if (old_cf->uid != mod->uid) continue; /* interfaces & filters must be identical */ - if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) - return -EINVAL; + if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) { + err = -EINVAL; + goto out_free_cf; + } - /* update modifications with disabled softirq & quit */ - local_bh_disable(); - memcpy(&gwj->mod, &mod, sizeof(mod)); - local_bh_enable(); + rcu_assign_pointer(gwj->cf_mod, mod); + kfree_rcu_mightsleep(old_cf); return 0; } } /* ifindex == 0 is not allowed for job creation */ - if (!ccgw.src_idx || !ccgw.dst_idx) - return -ENODEV; + if (!ccgw.src_idx || !ccgw.dst_idx) { + err = -ENODEV; + goto out_free_cf; + } gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL); - if (!gwj) - return -ENOMEM; + if (!gwj) { + err = -ENOMEM; + goto out_free_cf; + } gwj->handled_frames = 0; gwj->dropped_frames = 0; @@ -1118,7 +1144,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, gwj->limit_hops = limhops; /* insert already parsed information */ - memcpy(&gwj->mod, &mod, sizeof(mod)); + RCU_INIT_POINTER(gwj->cf_mod, mod); memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw)); err = -ENODEV; @@ -1152,9 +1178,11 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, if (!err) hlist_add_head_rcu(&gwj->list, &net->can.cgw_list); out: - if (err) + if (err) { kmem_cache_free(cgw_cache, gwj); - +out_free_cf: + kfree(mod); + } return err; } @@ -1214,19 +1242,22 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, /* remove only the first matching entry */ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { + struct cf_mod *cf_mod; + if (gwj->flags != r->flags) continue; if (gwj->limit_hops != limhops) continue; + cf_mod = cgw_job_cf_mod(gwj); /* we have a match when uid is enabled and identical */ - if (gwj->mod.uid || mod.uid) { - if (gwj->mod.uid != mod.uid) + if (cf_mod->uid || mod.uid) { + if (cf_mod->uid != mod.uid) continue; } else { /* no uid => check for identical modifications */ - if (memcmp(&gwj->mod, &mod, sizeof(mod))) + if (memcmp(cf_mod, &mod, sizeof(mod))) continue; } From adbc8cc1162951cb152ed7f147d5fbd35ce3e62f Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 3 May 2025 01:01:18 +0300 Subject: [PATCH 017/114] ipvs: fix uninit-value for saddr in do_output_route4 [ Upstream commit e34090d7214e0516eb8722aee295cb2507317c07 ] syzbot reports for uninit-value for the saddr argument [1]. commit 4754957f04f5 ("ipvs: do not use random local source address for tunnels") already implies that the input value of saddr should be ignored but the code is still reading it which can prevent to connect the route. Fix it by changing the argument to ret_saddr. [1] BUG: KMSAN: uninit-value in do_output_route4+0x42c/0x4d0 net/netfilter/ipvs/ip_vs_xmit.c:147 do_output_route4+0x42c/0x4d0 net/netfilter/ipvs/ip_vs_xmit.c:147 __ip_vs_get_out_rt+0x403/0x21d0 net/netfilter/ipvs/ip_vs_xmit.c:330 ip_vs_tunnel_xmit+0x205/0x2380 net/netfilter/ipvs/ip_vs_xmit.c:1136 ip_vs_in_hook+0x1aa5/0x35b0 net/netfilter/ipvs/ip_vs_core.c:2063 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf7/0x400 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] __ip_local_out+0x758/0x7e0 net/ipv4/ip_output.c:118 ip_local_out net/ipv4/ip_output.c:127 [inline] ip_send_skb+0x6a/0x3c0 net/ipv4/ip_output.c:1501 udp_send_skb+0xfda/0x1b70 net/ipv4/udp.c:1195 udp_sendmsg+0x2fe3/0x33c0 net/ipv4/udp.c:1483 inet_sendmsg+0x1fc/0x280 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x267/0x380 net/socket.c:727 ____sys_sendmsg+0x91b/0xda0 net/socket.c:2566 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2620 __sys_sendmmsg+0x41d/0x880 net/socket.c:2702 __compat_sys_sendmmsg net/compat.c:360 [inline] __do_compat_sys_sendmmsg net/compat.c:367 [inline] __se_compat_sys_sendmmsg net/compat.c:364 [inline] __ia32_compat_sys_sendmmsg+0xc8/0x140 net/compat.c:364 ia32_sys_call+0x3ffa/0x41f0 arch/x86/include/generated/asm/syscalls_32.h:346 do_syscall_32_irqs_on arch/x86/entry/syscall_32.c:83 [inline] __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/syscall_32.c:306 do_fast_syscall_32+0x38/0x80 arch/x86/entry/syscall_32.c:331 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/syscall_32.c:369 entry_SYSENTER_compat_after_hwframe+0x84/0x8e Uninit was created at: slab_post_alloc_hook mm/slub.c:4167 [inline] slab_alloc_node mm/slub.c:4210 [inline] __kmalloc_cache_noprof+0x8fa/0xe00 mm/slub.c:4367 kmalloc_noprof include/linux/slab.h:905 [inline] ip_vs_dest_dst_alloc net/netfilter/ipvs/ip_vs_xmit.c:61 [inline] __ip_vs_get_out_rt+0x35d/0x21d0 net/netfilter/ipvs/ip_vs_xmit.c:323 ip_vs_tunnel_xmit+0x205/0x2380 net/netfilter/ipvs/ip_vs_xmit.c:1136 ip_vs_in_hook+0x1aa5/0x35b0 net/netfilter/ipvs/ip_vs_core.c:2063 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf7/0x400 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] __ip_local_out+0x758/0x7e0 net/ipv4/ip_output.c:118 ip_local_out net/ipv4/ip_output.c:127 [inline] ip_send_skb+0x6a/0x3c0 net/ipv4/ip_output.c:1501 udp_send_skb+0xfda/0x1b70 net/ipv4/udp.c:1195 udp_sendmsg+0x2fe3/0x33c0 net/ipv4/udp.c:1483 inet_sendmsg+0x1fc/0x280 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x267/0x380 net/socket.c:727 ____sys_sendmsg+0x91b/0xda0 net/socket.c:2566 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2620 __sys_sendmmsg+0x41d/0x880 net/socket.c:2702 __compat_sys_sendmmsg net/compat.c:360 [inline] __do_compat_sys_sendmmsg net/compat.c:367 [inline] __se_compat_sys_sendmmsg net/compat.c:364 [inline] __ia32_compat_sys_sendmmsg+0xc8/0x140 net/compat.c:364 ia32_sys_call+0x3ffa/0x41f0 arch/x86/include/generated/asm/syscalls_32.h:346 do_syscall_32_irqs_on arch/x86/entry/syscall_32.c:83 [inline] __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/syscall_32.c:306 do_fast_syscall_32+0x38/0x80 arch/x86/entry/syscall_32.c:331 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/syscall_32.c:369 entry_SYSENTER_compat_after_hwframe+0x84/0x8e CPU: 0 UID: 0 PID: 22408 Comm: syz.4.5165 Not tainted 6.15.0-rc3-syzkaller-00019-gbc3372351d0c #0 PREEMPT(undef) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Reported-by: syzbot+04b9a82855c8aed20860@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68138dfa.050a0220.14dd7d.0017.GAE@google.com/ Fixes: 4754957f04f5 ("ipvs: do not use random local source address for tunnels") Signed-off-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipvs/ip_vs_xmit.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 5cd511162bc0..0103c4a4d10a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -119,13 +119,12 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) return false; } -/* Get route to daddr, update *saddr, optionally bind route to saddr */ +/* Get route to daddr, optionally bind route to saddr */ static struct rtable *do_output_route4(struct net *net, __be32 daddr, - int rt_mode, __be32 *saddr) + int rt_mode, __be32 *ret_saddr) { struct flowi4 fl4; struct rtable *rt; - bool loop = false; memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; @@ -135,23 +134,17 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, retry: rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) { - /* Invalid saddr ? */ - if (PTR_ERR(rt) == -EINVAL && *saddr && - rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { - *saddr = 0; - flowi4_update_output(&fl4, 0, daddr, 0); - goto retry; - } IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); return NULL; - } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { + } + if (rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { ip_rt_put(rt); - *saddr = fl4.saddr; flowi4_update_output(&fl4, 0, daddr, fl4.saddr); - loop = true; + rt_mode = 0; goto retry; } - *saddr = fl4.saddr; + if (ret_saddr) + *ret_saddr = fl4.saddr; return rt; } @@ -344,19 +337,15 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, if (ret_saddr) *ret_saddr = dest_dst->dst_saddr.ip; } else { - __be32 saddr = htonl(INADDR_ANY); - noref = 0; /* For such unconfigured boxes avoid many route lookups * for performance reasons because we do not remember saddr */ rt_mode &= ~IP_VS_RT_MODE_CONNECT; - rt = do_output_route4(net, daddr, rt_mode, &saddr); + rt = do_output_route4(net, daddr, rt_mode, ret_saddr); if (!rt) goto err_unreach; - if (ret_saddr) - *ret_saddr = saddr; } local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0; From a3dfec485401943e315c394c29afe2db8f9481d6 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 7 May 2025 17:01:59 +0200 Subject: [PATCH 018/114] netfilter: ipset: fix region locking in hash types [ Upstream commit 8478a729c0462273188263136880480729e9efca ] Region locking introduced in v5.6-rc4 contained three macros to handle the region locks: ahash_bucket_start(), ahash_bucket_end() which gave back the start and end hash bucket values belonging to a given region lock and ahash_region() which should give back the region lock belonging to a given hash bucket. The latter was incorrect which can lead to a race condition between the garbage collector and adding new elements when a hash type of set is defined with timeouts. Fixes: f66ee0410b1c ("netfilter: ipset: Fix "INFO: rcu detected stall in hash_xxx" reports") Reported-by: Kota Toda Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_hash_gen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 20aad81fcad7..c2d88b1b06b8 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -63,7 +63,7 @@ struct hbucket { #define ahash_sizeof_regions(htable_bits) \ (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region)) #define ahash_region(n, htable_bits) \ - ((n) % ahash_numof_locks(htable_bits)) + ((n) / jhash_size(HTABLE_REGION_BITS)) #define ahash_bucket_start(h, htable_bits) \ ((htable_bits) < HTABLE_REGION_BITS ? 0 \ : (h) * jhash_size(HTABLE_REGION_BITS)) From 355b0526336c0bf2bf7feaca033568ede524f763 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 5 May 2025 21:58:04 +0200 Subject: [PATCH 019/114] bpf: Scrub packet on bpf_redirect_peer [ Upstream commit c4327229948879814229b46aa26a750718888503 ] When bpf_redirect_peer is used to redirect packets to a device in another network namespace, the skb isn't scrubbed. That can lead skb information from one namespace to be "misused" in another namespace. As one example, this is causing Cilium to drop traffic when using bpf_redirect_peer to redirect packets that just went through IPsec decryption to a container namespace. The following pwru trace shows (1) the packet path from the host's XFRM layer to the container's XFRM layer where it's dropped and (2) the number of active skb extensions at each function. NETNS MARK IFACE TUPLE FUNC 4026533547 d00 eth0 10.244.3.124:35473->10.244.2.158:53 xfrm_rcv_cb .active_extensions = (__u8)2, 4026533547 d00 eth0 10.244.3.124:35473->10.244.2.158:53 xfrm4_rcv_cb .active_extensions = (__u8)2, 4026533547 d00 eth0 10.244.3.124:35473->10.244.2.158:53 gro_cells_receive .active_extensions = (__u8)2, [...] 4026533547 0 eth0 10.244.3.124:35473->10.244.2.158:53 skb_do_redirect .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 ip_rcv .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 ip_rcv_core .active_extensions = (__u8)2, [...] 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 udp_queue_rcv_one_skb .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 __xfrm_policy_check .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 __xfrm_decode_session .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 security_xfrm_decode_session .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 kfree_skb_reason(SKB_DROP_REASON_XFRM_POLICY) .active_extensions = (__u8)2, In this case, there are no XFRM policies in the container's network namespace so the drop is unexpected. When we decrypt the IPsec packet, the XFRM state used for decryption is set in the skb extensions. This information is preserved across the netns switch. When we reach the XFRM policy check in the container's netns, __xfrm_policy_check drops the packet with LINUX_MIB_XFRMINNOPOLS because a (container-side) XFRM policy can't be found that matches the (host-side) XFRM state used for decryption. This patch fixes this by scrubbing the packet when using bpf_redirect_peer, as is done on typical netns switches via veth devices except skb->mark and skb->tstamp are not zeroed. Fixes: 9aa1206e8f482 ("bpf: Add redirect_peer helper") Signed-off-by: Paul Chaignon Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/1728ead5e0fe45e7a6542c36bd4e3ca07a73b7d6.1746460653.git.paul.chaignon@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/filter.c b/net/core/filter.c index 066277b91a1b..5143c8a9e52c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2507,6 +2507,7 @@ int skb_do_redirect(struct sk_buff *skb) goto out_drop; skb->dev = dev; dev_sw_netstats_rx_add(dev, skb->len); + skb_scrub_packet(skb, false); return -EAGAIN; } return flags & BPF_F_NEIGH ? From 0121c19ebd53af5a2d634d8be904060ca95af23e Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:00 +0200 Subject: [PATCH 020/114] net: dsa: b53: allow leaky reserved multicast [ Upstream commit 5f93185a757ff38b36f849c659aeef368db15a68 ] Allow reserved multicast to ignore VLAN membership so STP and other management protocols work without a PVID VLAN configured when using a vlan aware bridge. Fixes: 967dd82ffc52 ("net: dsa: b53: Add support for Broadcom RoboSwitch") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-2-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/b53/b53_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index cfcda893f1a1..c438a5d9f6d8 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -373,9 +373,11 @@ static void b53_enable_vlan(struct b53_device *dev, int port, bool enable, b53_read8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL5, &vc5); } + vc1 &= ~VC1_RX_MCST_FWD_EN; + if (enable) { vc0 |= VC0_VLAN_EN | VC0_VID_CHK_EN | VC0_VID_HASH_VID; - vc1 |= VC1_RX_MCST_UNTAG_EN | VC1_RX_MCST_FWD_EN; + vc1 |= VC1_RX_MCST_UNTAG_EN; vc4 &= ~VC4_ING_VID_CHECK_MASK; if (enable_filtering) { vc4 |= VC4_ING_VID_VIO_DROP << VC4_ING_VID_CHECK_S; @@ -393,7 +395,7 @@ static void b53_enable_vlan(struct b53_device *dev, int port, bool enable, } else { vc0 &= ~(VC0_VLAN_EN | VC0_VID_CHK_EN | VC0_VID_HASH_VID); - vc1 &= ~(VC1_RX_MCST_UNTAG_EN | VC1_RX_MCST_FWD_EN); + vc1 &= ~VC1_RX_MCST_UNTAG_EN; vc4 &= ~VC4_ING_VID_CHECK_MASK; vc5 &= ~VC5_DROP_VTABLE_MISS; From aa00a30a28c7aae05f0955c03000f2b8b4596105 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:02 +0200 Subject: [PATCH 021/114] net: dsa: b53: fix clearing PVID of a port [ Upstream commit f480851981043d9bb6447ca9883ade9247b9a0ad ] Currently the PVID of ports are only set when adding/updating VLANs with PVID set or removing VLANs, but not when clearing the PVID flag of a VLAN. E.g. the following flow $ ip link add br0 type bridge vlan_filtering 1 $ ip link set sw1p1 master bridge $ bridge vlan add dev sw1p1 vid 10 pvid untagged $ bridge vlan add dev sw1p1 vid 10 untagged Would keep the PVID set as 10, despite the flag being cleared. Fix this by checking if we need to unset the PVID on vlan updates. Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-4-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/b53/b53_common.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index c438a5d9f6d8..584de37a61c7 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1521,12 +1521,21 @@ int b53_vlan_add(struct dsa_switch *ds, int port, bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; struct b53_vlan *vl; + u16 old_pvid, new_pvid; int err; err = b53_vlan_prepare(ds, port, vlan); if (err) return err; + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &old_pvid); + if (pvid) + new_pvid = vlan->vid; + else if (!pvid && vlan->vid == old_pvid) + new_pvid = b53_default_pvid(dev); + else + new_pvid = old_pvid; + vl = &dev->vlans[vlan->vid]; b53_get_vlan_entry(dev, vlan->vid, vl); @@ -1543,9 +1552,9 @@ int b53_vlan_add(struct dsa_switch *ds, int port, b53_set_vlan_entry(dev, vlan->vid, vl); b53_fast_age_vlan(dev, vlan->vid); - if (pvid && !dsa_is_cpu_port(ds, port)) { + if (!dsa_is_cpu_port(ds, port) && new_pvid != old_pvid) { b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), - vlan->vid); + new_pvid); b53_fast_age_vlan(dev, vlan->vid); } From a143c39add812fb0a54955c89866cb5ae51462b0 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:03 +0200 Subject: [PATCH 022/114] net: dsa: b53: fix flushing old pvid VLAN on pvid change [ Upstream commit 083c6b28c0cbcd83b6af1a10f2c82937129b3438 ] Presumably the intention here was to flush the VLAN of the old pvid, not the added VLAN again, which we already flushed before. Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-5-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/b53/b53_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 584de37a61c7..55893d4e405e 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1555,7 +1555,7 @@ int b53_vlan_add(struct dsa_switch *ds, int port, if (!dsa_is_cpu_port(ds, port) && new_pvid != old_pvid) { b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), new_pvid); - b53_fast_age_vlan(dev, vlan->vid); + b53_fast_age_vlan(dev, old_pvid); } return 0; From ca071649e29140f050fb42246c474639cb6146a7 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:04 +0200 Subject: [PATCH 023/114] net: dsa: b53: fix VLAN ID for untagged vlan on bridge leave [ Upstream commit a1c1901c5cc881425cc45992ab6c5418174e9e5a ] The untagged default VLAN is added to the default vlan, which may be one, but we modify the VLAN 0 entry on bridge leave. Fix this to use the correct VLAN entry for the default pvid. Fixes: fea83353177a ("net: dsa: b53: Fix default VLAN ID") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-6-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/b53/b53_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 55893d4e405e..b257757f0b9d 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1967,7 +1967,7 @@ EXPORT_SYMBOL(b53_br_join); void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) { struct b53_device *dev = ds->priv; - struct b53_vlan *vl = &dev->vlans[0]; + struct b53_vlan *vl; s8 cpu_port = dsa_to_port(ds, port)->cpu_dp->index; unsigned int i; u16 pvlan, reg, pvid; @@ -1993,6 +1993,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) dev->ports[port].vlan_ctl_mask = pvlan; pvid = b53_default_pvid(dev); + vl = &dev->vlans[pvid]; /* Make this port join all VLANs without VLAN entries */ if (is58xx(dev)) { From b1c9c58d09ed1eae1b06889cdda21000127a5e27 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:05 +0200 Subject: [PATCH 024/114] net: dsa: b53: always rejoin default untagged VLAN on bridge leave [ Upstream commit 13b152ae40495966501697693f048f47430c50fd ] While JOIN_ALL_VLAN allows to join all VLANs, we still need to keep the default VLAN enabled so that untagged traffic stays untagged. So rejoin the default VLAN even for switches with JOIN_ALL_VLAN support. Fixes: 48aea33a77ab ("net: dsa: b53: Add JOIN_ALL_VLAN support") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-7-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/b53/b53_common.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index b257757f0b9d..aa449fa18268 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2002,12 +2002,12 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) if (!(reg & BIT(cpu_port))) reg |= BIT(cpu_port); b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); - } else { - b53_get_vlan_entry(dev, pvid, vl); - vl->members |= BIT(port) | BIT(cpu_port); - vl->untag |= BIT(port) | BIT(cpu_port); - b53_set_vlan_entry(dev, pvid, vl); } + + b53_get_vlan_entry(dev, pvid, vl); + vl->members |= BIT(port) | BIT(cpu_port); + vl->untag |= BIT(port) | BIT(cpu_port); + b53_set_vlan_entry(dev, pvid, vl); } EXPORT_SYMBOL(b53_br_leave); From ce4e826dbfe788abda09e1c790168ba4c0fdff0b Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:09 +0200 Subject: [PATCH 025/114] net: dsa: b53: fix learning on VLAN unaware bridges [ Upstream commit 9f34ad89bcf0e6df6f8b01f1bdab211493fc66d1 ] When VLAN filtering is off, we configure the switch to forward, but not learn on VLAN table misses. This effectively disables learning while not filtering. Fix this by switching to forward and learn. Setting the learning disable register will still control whether learning actually happens. Fixes: dad8d7c6452b ("net: dsa: b53: Properly account for VLAN filtering") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-11-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/b53/b53_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index aa449fa18268..d2ff2c2fcbbf 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -383,7 +383,7 @@ static void b53_enable_vlan(struct b53_device *dev, int port, bool enable, vc4 |= VC4_ING_VID_VIO_DROP << VC4_ING_VID_CHECK_S; vc5 |= VC5_DROP_VTABLE_MISS; } else { - vc4 |= VC4_ING_VID_VIO_FWD << VC4_ING_VID_CHECK_S; + vc4 |= VC4_NO_ING_VID_CHK << VC4_ING_VID_CHECK_S; vc5 &= ~VC5_DROP_VTABLE_MISS; } From dadbe33fee36484bbf1d6b5e5dc2f9cf27693173 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Thu, 10 Apr 2025 14:46:32 -0400 Subject: [PATCH 026/114] Input: cyttsp5 - ensure minimum reset pulse width commit c6cb8bf79466ae66bd0d07338c7c505ce758e9d7 upstream. The current reset pulse width is measured to be 5us on a Renesas RZ/G2L SOM. The manufacturer's minimum reset pulse width is specified as 10us. Extend reset pulse width to make sure it is long enough on all platforms. Also reword confusing comments about reset pin assertion. Fixes: 5b0c03e24a06 ("Input: Add driver for Cypress Generation 5 touchscreen") Cc: stable@vger.kernel.org Acked-by: Alistair Francis Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20250410184633.1164837-1-hugo@hugovil.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/cyttsp5.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/cyttsp5.c b/drivers/input/touchscreen/cyttsp5.c index db5a885ecd72..6c9bd1f238fc 100644 --- a/drivers/input/touchscreen/cyttsp5.c +++ b/drivers/input/touchscreen/cyttsp5.c @@ -865,13 +865,16 @@ static int cyttsp5_probe(struct device *dev, struct regmap *regmap, int irq, ts->input->phys = ts->phys; input_set_drvdata(ts->input, ts); - /* Reset the gpio to be in a reset state */ + /* Assert gpio to be in a reset state */ ts->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(ts->reset_gpio)) { error = PTR_ERR(ts->reset_gpio); dev_err(dev, "Failed to request reset gpio, error %d\n", error); return error; } + + fsleep(10); /* Ensure long-enough reset pulse (minimum 10us). */ + gpiod_set_value_cansleep(ts->reset_gpio, 0); /* Need a delay to have device up */ From f72f0172079c015aa1841b66c10621bc4cef3b65 Mon Sep 17 00:00:00 2001 From: Mikael Gonella-Bolduc Date: Wed, 23 Apr 2025 09:52:43 -0400 Subject: [PATCH 027/114] Input: cyttsp5 - fix power control issue on wakeup commit 7675b5efd81fe6d524e29d5a541f43201e98afa8 upstream. The power control function ignores the "on" argument when setting the report ID, and thus is always sending HID_POWER_SLEEP. This causes a problem when trying to wakeup. Fix by sending the state variable, which contains the proper HID_POWER_ON or HID_POWER_SLEEP based on the "on" argument. Fixes: 3c98b8dbdced ("Input: cyttsp5 - implement proper sleep and wakeup procedures") Cc: stable@vger.kernel.org Signed-off-by: Mikael Gonella-Bolduc Signed-off-by: Hugo Villeneuve Reviewed-by: Alistair Francis Link: https://lore.kernel.org/r/20250423135243.1261460-1-hugo@hugovil.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/cyttsp5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/cyttsp5.c b/drivers/input/touchscreen/cyttsp5.c index 6c9bd1f238fc..a74b34d8df2a 100644 --- a/drivers/input/touchscreen/cyttsp5.c +++ b/drivers/input/touchscreen/cyttsp5.c @@ -580,7 +580,7 @@ static int cyttsp5_power_control(struct cyttsp5 *ts, bool on) int rc; SET_CMD_REPORT_TYPE(cmd[0], 0); - SET_CMD_REPORT_ID(cmd[0], HID_POWER_SLEEP); + SET_CMD_REPORT_ID(cmd[0], state); SET_CMD_OPCODE(cmd[1], HID_CMD_SET_POWER); rc = cyttsp5_write(ts, HID_COMMAND_REG, cmd, sizeof(cmd)); From 90fa6015ff83ef1c373cc61b7c924ab2bcbe1801 Mon Sep 17 00:00:00 2001 From: Gary Bisson Date: Tue, 29 Apr 2025 09:16:29 -0700 Subject: [PATCH 028/114] Input: mtk-pmic-keys - fix possible null pointer dereference commit 11cdb506d0fbf5ac05bf55f5afcb3a215c316490 upstream. In mtk_pmic_keys_probe, the regs parameter is only set if the button is parsed in the device tree. However, on hardware where the button is left floating, that node will most likely be removed not to enable that input. In that case the code will try to dereference a null pointer. Let's use the regs struct instead as it is defined for all supported platforms. Note that it is ok setting the key reg even if that latter is disabled as the interrupt won't be enabled anyway. Fixes: b581acb49aec ("Input: mtk-pmic-keys - transfer per-key bit in mtk_pmic_keys_regs") Signed-off-by: Gary Bisson Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/keyboard/mtk-pmic-keys.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/keyboard/mtk-pmic-keys.c b/drivers/input/keyboard/mtk-pmic-keys.c index 4364c3401ff1..486ca8ff86f8 100644 --- a/drivers/input/keyboard/mtk-pmic-keys.c +++ b/drivers/input/keyboard/mtk-pmic-keys.c @@ -147,8 +147,8 @@ static void mtk_pmic_keys_lp_reset_setup(struct mtk_pmic_keys *keys, u32 value, mask; int error; - kregs_home = keys->keys[MTK_PMIC_HOMEKEY_INDEX].regs; - kregs_pwr = keys->keys[MTK_PMIC_PWRKEY_INDEX].regs; + kregs_home = ®s->keys_regs[MTK_PMIC_HOMEKEY_INDEX]; + kregs_pwr = ®s->keys_regs[MTK_PMIC_PWRKEY_INDEX]; error = of_property_read_u32(keys->dev->of_node, "power-off-time-sec", &long_press_debounce); From cbc82e7db16d59c301457312a624a7de2c03cd4a Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Sat, 10 May 2025 22:59:25 -0700 Subject: [PATCH 029/114] Input: xpad - fix Share button on Xbox One controllers commit 4ef46367073b107ec22f46fe5f12176e87c238e8 upstream. The Share button, if present, is always one of two offsets from the end of the file, depending on the presence of a specific interface. As we lack parsing for the identify packet we can't automatically determine the presence of that interface, but we can hardcode which of these offsets is correct for a given controller. More controllers are probably fixable by adding the MAP_SHARE_BUTTON in the future, but for now I only added the ones that I have the ability to test directly. Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250328234345.989761-2-vi@endrift.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index b91467c8e6c4..1ee2bafe99a4 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -77,12 +77,13 @@ * xbox d-pads should map to buttons, as is required for DDR pads * but we map them to axes when possible to simplify things */ -#define MAP_DPAD_TO_BUTTONS (1 << 0) -#define MAP_TRIGGERS_TO_BUTTONS (1 << 1) -#define MAP_STICKS_TO_NULL (1 << 2) -#define MAP_SELECT_BUTTON (1 << 3) -#define MAP_PADDLES (1 << 4) -#define MAP_PROFILE_BUTTON (1 << 5) +#define MAP_DPAD_TO_BUTTONS BIT(0) +#define MAP_TRIGGERS_TO_BUTTONS BIT(1) +#define MAP_STICKS_TO_NULL BIT(2) +#define MAP_SHARE_BUTTON BIT(3) +#define MAP_PADDLES BIT(4) +#define MAP_PROFILE_BUTTON BIT(5) +#define MAP_SHARE_OFFSET BIT(6) #define DANCEPAD_MAP_CONFIG (MAP_DPAD_TO_BUTTONS | \ MAP_TRIGGERS_TO_BUTTONS | MAP_STICKS_TO_NULL) @@ -135,7 +136,7 @@ static const struct xpad_device { { 0x03f0, 0x048D, "HyperX Clutch", 0, XTYPE_XBOX360 }, /* wireless */ { 0x03f0, 0x0495, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE }, { 0x03f0, 0x07A0, "HyperX Clutch Gladiate RGB", 0, XTYPE_XBOXONE }, - { 0x03f0, 0x08B6, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE }, /* v2 */ + { 0x03f0, 0x08B6, "HyperX Clutch Gladiate", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, /* v2 */ { 0x03f0, 0x09B4, "HyperX Clutch Tanto", 0, XTYPE_XBOXONE }, { 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX }, @@ -159,7 +160,7 @@ static const struct xpad_device { { 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, { 0x045e, 0x0b00, "Microsoft X-Box One Elite 2 pad", MAP_PADDLES, XTYPE_XBOXONE }, { 0x045e, 0x0b0a, "Microsoft X-Box Adaptive Controller", MAP_PROFILE_BUTTON, XTYPE_XBOXONE }, - { 0x045e, 0x0b12, "Microsoft Xbox Series S|X Controller", MAP_SELECT_BUTTON, XTYPE_XBOXONE }, + { 0x045e, 0x0b12, "Microsoft Xbox Series S|X Controller", MAP_SHARE_BUTTON | MAP_SHARE_OFFSET, XTYPE_XBOXONE }, { 0x046d, 0xc21d, "Logitech Gamepad F310", 0, XTYPE_XBOX360 }, { 0x046d, 0xc21e, "Logitech Gamepad F510", 0, XTYPE_XBOX360 }, { 0x046d, 0xc21f, "Logitech Gamepad F710", 0, XTYPE_XBOX360 }, @@ -211,7 +212,7 @@ static const struct xpad_device { { 0x0738, 0xcb29, "Saitek Aviator Stick AV8R02", 0, XTYPE_XBOX360 }, { 0x0738, 0xf738, "Super SFIV FightStick TE S", 0, XTYPE_XBOX360 }, { 0x07ff, 0xffff, "Mad Catz GamePad", 0, XTYPE_XBOX360 }, - { 0x0b05, 0x1a38, "ASUS ROG RAIKIRI", 0, XTYPE_XBOXONE }, + { 0x0b05, 0x1a38, "ASUS ROG RAIKIRI", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x0b05, 0x1abb, "ASUS ROG RAIKIRI PRO", 0, XTYPE_XBOXONE }, { 0x0c12, 0x0005, "Intec wireless", 0, XTYPE_XBOX }, { 0x0c12, 0x8801, "Nyko Xbox Controller", 0, XTYPE_XBOX }, @@ -389,7 +390,7 @@ static const struct xpad_device { { 0x2dc8, 0x6001, "8BitDo SN30 Pro", 0, XTYPE_XBOX360 }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x2e24, 0x1688, "Hyperkin X91 X-Box One pad", 0, XTYPE_XBOXONE }, - { 0x2e95, 0x0504, "SCUF Gaming Controller", MAP_SELECT_BUTTON, XTYPE_XBOXONE }, + { 0x2e95, 0x0504, "SCUF Gaming Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x31e3, 0x1100, "Wooting One", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1200, "Wooting Two", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1210, "Wooting Lekker", 0, XTYPE_XBOX360 }, @@ -1025,7 +1026,7 @@ static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned cha * The report format was gleaned from * https://github.com/kylelemons/xbox/blob/master/xbox.go */ -static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data) +static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data, u32 len) { struct input_dev *dev = xpad->dev; bool do_sync = false; @@ -1066,8 +1067,12 @@ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char /* menu/view buttons */ input_report_key(dev, BTN_START, data[4] & BIT(2)); input_report_key(dev, BTN_SELECT, data[4] & BIT(3)); - if (xpad->mapping & MAP_SELECT_BUTTON) - input_report_key(dev, KEY_RECORD, data[22] & BIT(0)); + if (xpad->mapping & MAP_SHARE_BUTTON) { + if (xpad->mapping & MAP_SHARE_OFFSET) + input_report_key(dev, KEY_RECORD, data[len - 26] & BIT(0)); + else + input_report_key(dev, KEY_RECORD, data[len - 18] & BIT(0)); + } /* buttons A,B,X,Y */ input_report_key(dev, BTN_A, data[4] & BIT(4)); @@ -1215,7 +1220,7 @@ static void xpad_irq_in(struct urb *urb) xpad360w_process_packet(xpad, 0, xpad->idata); break; case XTYPE_XBOXONE: - xpadone_process_packet(xpad, 0, xpad->idata); + xpadone_process_packet(xpad, 0, xpad->idata, urb->actual_length); break; default: xpad_process_packet(xpad, 0, xpad->idata); @@ -1972,7 +1977,7 @@ static int xpad_init_input(struct usb_xpad *xpad) xpad->xtype == XTYPE_XBOXONE) { for (i = 0; xpad360_btn[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad360_btn[i]); - if (xpad->mapping & MAP_SELECT_BUTTON) + if (xpad->mapping & MAP_SHARE_BUTTON) input_set_capability(input_dev, EV_KEY, KEY_RECORD); } else { for (i = 0; xpad_btn[i] >= 0; i++) From 56b4e8b62124a4fa5b55a2c55881e0e2117b0334 Mon Sep 17 00:00:00 2001 From: Lode Willems Date: Tue, 22 Apr 2025 13:24:27 +0200 Subject: [PATCH 030/114] Input: xpad - add support for 8BitDo Ultimate 2 Wireless Controller commit 22cd66a5db56a07d9e621367cb4d16ff0f6baf56 upstream. This patch adds support for the 8BitDo Ultimate 2 Wireless Controller. Tested using the wireless dongle and plugged in. Signed-off-by: Lode Willems Link: https://lore.kernel.org/r/20250422112457.6728-1-me@lodewillems.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 1ee2bafe99a4..08602a0e21f8 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -387,6 +387,7 @@ static const struct xpad_device { { 0x2dc8, 0x3106, "8BitDo Ultimate Wireless / Pro 2 Wired Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x3109, "8BitDo Ultimate Wireless Bluetooth", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x310a, "8BitDo Ultimate 2C Wireless Controller", 0, XTYPE_XBOX360 }, + { 0x2dc8, 0x310b, "8BitDo Ultimate 2 Wireless Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x6001, "8BitDo SN30 Pro", 0, XTYPE_XBOX360 }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x2e24, 0x1688, "Hyperkin X91 X-Box One pad", 0, XTYPE_XBOXONE }, From 2d9d6a4cd3be9c4751c7e91ec2c34d35454b013f Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Fri, 28 Mar 2025 16:43:36 -0700 Subject: [PATCH 031/114] Input: xpad - fix two controller table values commit d05a424bea9aa3435009d5c462055008cc1545d8 upstream. Two controllers -- Mad Catz JOYTECH NEO SE Advanced and PDP Mirror's Edge Official -- were missing the value of the mapping field, and thus wouldn't detect properly. Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250328234345.989761-1-vi@endrift.com Fixes: 540602a43ae5 ("Input: xpad - add a few new VID/PID combinations") Fixes: 3492321e2e60 ("Input: xpad - add multiple supported devices") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 08602a0e21f8..c65321964131 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -206,7 +206,7 @@ static const struct xpad_device { { 0x0738, 0x9871, "Mad Catz Portable Drum", 0, XTYPE_XBOX360 }, { 0x0738, 0xb726, "Mad Catz Xbox controller - MW2", 0, XTYPE_XBOX360 }, { 0x0738, 0xb738, "Mad Catz MVC2TE Stick 2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, - { 0x0738, 0xbeef, "Mad Catz JOYTECH NEO SE Advanced GamePad", XTYPE_XBOX360 }, + { 0x0738, 0xbeef, "Mad Catz JOYTECH NEO SE Advanced GamePad", 0, XTYPE_XBOX360 }, { 0x0738, 0xcb02, "Saitek Cyborg Rumble Pad - PC/Xbox 360", 0, XTYPE_XBOX360 }, { 0x0738, 0xcb03, "Saitek P3200 Rumble Pad - PC/Xbox 360", 0, XTYPE_XBOX360 }, { 0x0738, 0xcb29, "Saitek Aviator Stick AV8R02", 0, XTYPE_XBOX360 }, @@ -241,7 +241,7 @@ static const struct xpad_device { { 0x0e6f, 0x0146, "Rock Candy Wired Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0147, "PDP Marvel Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x015c, "PDP Xbox One Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, - { 0x0e6f, 0x015d, "PDP Mirror's Edge Official Wired Controller for Xbox One", XTYPE_XBOXONE }, + { 0x0e6f, 0x015d, "PDP Mirror's Edge Official Wired Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0161, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0162, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0163, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, From c321045f088c424464e6e1a18c8cbb5a88f158cc Mon Sep 17 00:00:00 2001 From: Manuel Fombuena Date: Wed, 7 May 2025 12:05:26 -0700 Subject: [PATCH 032/114] Input: synaptics - enable InterTouch on Dynabook Portege X30-D commit 6d7ea0881000966607772451b789b5fb5766f11d upstream. [ 5.989588] psmouse serio1: synaptics: Your touchpad (PNP: TOS0213 PNP0f03) says it can support a different bus. If i2c-hid and hid-rmi are not used, you might want to try setting psmouse.synaptics_intertouch to 1 and report this to linux-input@vger.kernel.org. [ 6.039923] psmouse serio1: synaptics: Touchpad model: 1, fw: 9.32, id: 0x1e2a1, caps: 0xf00223/0x840300/0x12e800/0x52d884, board id: 3322, fw id: 2658004 The board is labelled TM3322. Present on the Toshiba / Dynabook Portege X30-D and possibly others. Confirmed working well with psmouse.synaptics_intertouch=1 and local build. Signed-off-by: Manuel Fombuena Signed-off-by: Aditya Garg Link: https://lore.kernel.org/r/PN3PR01MB9597711E7933A08389FEC31DB888A@PN3PR01MB9597.INDPRD01.PROD.OUTLOOK.COM Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 26677432ac83..b2902a495ee5 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -194,6 +194,7 @@ static const char * const smbus_pnp_ids[] = { "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ "SYN3257", /* HP Envy 13-ad105ng */ + "TOS0213", /* Dynabook Portege X30-D */ NULL }; From 82b02402eee1aa3da874a5a1ead06d6ce1c062cf Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Wed, 7 May 2025 12:06:32 -0700 Subject: [PATCH 033/114] Input: synaptics - enable InterTouch on Dynabook Portege X30L-G commit 47d768b32e644b56901bb4bbbdb1feb01ea86c85 upstream. Enable InterTouch mode on Dynabook Portege X30L-G by adding "TOS01f6" to the list of SMBus-enabled variants. Reported-by: Xuntao Chi Tested-by: Xuntao Chi Signed-off-by: Aditya Garg Link: https://lore.kernel.org/r/PN3PR01MB959786E4AC797160CDA93012B888A@PN3PR01MB9597.INDPRD01.PROD.OUTLOOK.COM Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index b2902a495ee5..5623aa0f4a44 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -194,6 +194,7 @@ static const char * const smbus_pnp_ids[] = { "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ "SYN3257", /* HP Envy 13-ad105ng */ + "TOS01f6", /* Dynabook Portege X30L-G */ "TOS0213", /* Dynabook Portege X30-D */ NULL }; From cbd085c424dbdd4af573c1620a45a178457c70f5 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Wed, 7 May 2025 12:12:15 -0700 Subject: [PATCH 034/114] Input: synaptics - enable InterTouch on Dell Precision M3800 commit a609cb4cc07aa9ab8f50466622814356c06f2c17 upstream. Enable InterTouch mode on Dell Precision M3800 by adding "DLL060d" to the list of SMBus-enabled variants. Reported-by: Markus Rathgeb Signed-off-by: Aditya Garg Link: https://lore.kernel.org/r/PN3PR01MB959789DD6D574E16141E5DC4B888A@PN3PR01MB9597.INDPRD01.PROD.OUTLOOK.COM Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 5623aa0f4a44..387fe0e7c783 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -163,6 +163,7 @@ static const char * const topbuttonpad_pnp_ids[] = { static const char * const smbus_pnp_ids[] = { /* all of the topbuttonpad_pnp_ids are valid, we just add some extras */ + "DLL060d", /* Dell Precision M3800 */ "LEN0048", /* X1 Carbon 3 */ "LEN0046", /* X250 */ "LEN0049", /* Yoga 11e */ From 85961bb7e4943ef1ccb9eebe707bfe369b429069 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 7 May 2025 14:52:55 -0700 Subject: [PATCH 035/114] Input: synaptics - enable SMBus for HP Elitebook 850 G1 commit f04f03d3e99bc8f89b6af5debf07ff67d961bc23 upstream. The kernel reports that the touchpad for this device can support SMBus mode. Reported-by: jt Link: https://lore.kernel.org/r/iys5dbv3ldddsgobfkxldazxyp54kay4bozzmagga6emy45jop@2ebvuxgaui4u Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 387fe0e7c783..8d22854900b2 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -190,6 +190,7 @@ static const char * const smbus_pnp_ids[] = { "LEN2054", /* E480 */ "LEN2055", /* E580 */ "LEN2068", /* T14 Gen 1 */ + "SYN3003", /* HP EliteBook 850 G1 */ "SYN3015", /* HP EliteBook 840 G2 */ "SYN3052", /* HP EliteBook 840 G4 */ "SYN3221", /* HP 15-ay000 */ From f7adc49438530bf140ad8a90d8701abbc7800e74 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Wed, 7 May 2025 12:09:00 -0700 Subject: [PATCH 036/114] Input: synaptics - enable InterTouch on TUXEDO InfinityBook Pro 14 v5 commit 2abc698ac77314e0de5b33a6d96a39c5159d88e4 upstream. Enable InterTouch mode on TUXEDO InfinityBook Pro 14 v5 by adding "SYN1221" to the list of SMBus-enabled variants. Add support for InterTouch on SYN1221 by adding it to the list of SMBus-enabled variants. Reported-by: Matthias Eilert Tested-by: Matthias Eilert Signed-off-by: Aditya Garg Link: https://lore.kernel.org/r/PN3PR01MB9597C033C4BC20EE2A0C4543B888A@PN3PR01MB9597.INDPRD01.PROD.OUTLOOK.COM Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 8d22854900b2..3ca6642601c7 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -190,6 +190,7 @@ static const char * const smbus_pnp_ids[] = { "LEN2054", /* E480 */ "LEN2055", /* E580 */ "LEN2068", /* T14 Gen 1 */ + "SYN1221", /* TUXEDO InfinityBook Pro 14 v5 */ "SYN3003", /* HP EliteBook 850 G1 */ "SYN3015", /* HP EliteBook 840 G2 */ "SYN3052", /* HP EliteBook 840 G4 */ From 90db122533d29357233c01401e53782d13424f27 Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Mon, 14 Apr 2025 11:40:49 -0400 Subject: [PATCH 037/114] staging: iio: adc: ad7816: Correct conditional logic for store mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2e922956277187655ed9bedf7b5c28906e51708f upstream. The mode setting logic in ad7816_store_mode was reversed due to incorrect handling of the strcmp return value. strcmp returns 0 on match, so the `if (strcmp(buf, "full"))` block executed when the input was not "full". This resulted in "full" setting the mode to AD7816_PD (power-down) and other inputs setting it to AD7816_FULL. Fix this by checking it against 0 to correctly check for "full" and "power-down", mapping them to AD7816_FULL and AD7816_PD respectively. Fixes: 7924425db04a ("staging: iio: adc: new driver for AD7816 devices") Cc: stable@vger.kernel.org Signed-off-by: Gabriel Shahrouzi Acked-by: Nuno Sá Link: https://lore.kernel.org/stable/20250414152920.467505-1-gshahrouzi%40gmail.com Link: https://patch.msgid.link/20250414154050.469482-1-gshahrouzi@gmail.com Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/adc/ad7816.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/adc/ad7816.c b/drivers/staging/iio/adc/ad7816.c index 6c14d7bcdd67..081b17f49863 100644 --- a/drivers/staging/iio/adc/ad7816.c +++ b/drivers/staging/iio/adc/ad7816.c @@ -136,7 +136,7 @@ static ssize_t ad7816_store_mode(struct device *dev, struct iio_dev *indio_dev = dev_to_iio_dev(dev); struct ad7816_chip_info *chip = iio_priv(indio_dev); - if (strcmp(buf, "full")) { + if (strcmp(buf, "full") == 0) { gpiod_set_value(chip->rdwr_pin, 1); chip->mode = AD7816_FULL; } else { From f4d6b9f413cb87a9833282066a7eb732a15a3442 Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Fri, 18 Apr 2025 20:43:06 -0400 Subject: [PATCH 038/114] staging: axis-fifo: Remove hardware resets for user errors commit c6e8d85fafa7193613db37da29c0e8d6e2515b13 upstream. The axis-fifo driver performs a full hardware reset (via reset_ip_core()) in several error paths within the read and write functions. This reset flushes both TX and RX FIFOs and resets the AXI-Stream links. Allow the user to handle the error without causing hardware disruption or data loss in other FIFO paths. Fixes: 4a965c5f89de ("staging: add driver for Xilinx AXI-Stream FIFO v4.1 IP core") Cc: stable@vger.kernel.org Signed-off-by: Gabriel Shahrouzi Link: https://lore.kernel.org/r/20250419004306.669605-1-gshahrouzi@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/axis-fifo/axis-fifo.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/staging/axis-fifo/axis-fifo.c b/drivers/staging/axis-fifo/axis-fifo.c index 727b956aa231..53de2a901d3d 100644 --- a/drivers/staging/axis-fifo/axis-fifo.c +++ b/drivers/staging/axis-fifo/axis-fifo.c @@ -398,16 +398,14 @@ static ssize_t axis_fifo_read(struct file *f, char __user *buf, bytes_available = ioread32(fifo->base_addr + XLLF_RLR_OFFSET); if (!bytes_available) { - dev_err(fifo->dt_device, "received a packet of length 0 - fifo core will be reset\n"); - reset_ip_core(fifo); + dev_err(fifo->dt_device, "received a packet of length 0\n"); ret = -EIO; goto end_unlock; } if (bytes_available > len) { - dev_err(fifo->dt_device, "user read buffer too small (available bytes=%zu user buffer bytes=%zu) - fifo core will be reset\n", + dev_err(fifo->dt_device, "user read buffer too small (available bytes=%zu user buffer bytes=%zu)\n", bytes_available, len); - reset_ip_core(fifo); ret = -EINVAL; goto end_unlock; } @@ -416,8 +414,7 @@ static ssize_t axis_fifo_read(struct file *f, char __user *buf, /* this probably can't happen unless IP * registers were previously mishandled */ - dev_err(fifo->dt_device, "received a packet that isn't word-aligned - fifo core will be reset\n"); - reset_ip_core(fifo); + dev_err(fifo->dt_device, "received a packet that isn't word-aligned\n"); ret = -EIO; goto end_unlock; } @@ -438,7 +435,6 @@ static ssize_t axis_fifo_read(struct file *f, char __user *buf, if (copy_to_user(buf + copied * sizeof(u32), tmp_buf, copy * sizeof(u32))) { - reset_ip_core(fifo); ret = -EFAULT; goto end_unlock; } @@ -547,7 +543,6 @@ static ssize_t axis_fifo_write(struct file *f, const char __user *buf, if (copy_from_user(tmp_buf, buf + copied * sizeof(u32), copy * sizeof(u32))) { - reset_ip_core(fifo); ret = -EFAULT; goto end_unlock; } From 0f2b18c0760406d8f7cfa9fa2f7e07b02e95d2b5 Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Fri, 18 Apr 2025 21:29:37 -0400 Subject: [PATCH 039/114] staging: axis-fifo: Correct handling of tx_fifo_depth for size validation commit 2ca34b508774aaa590fc3698a54204706ecca4ba upstream. Remove erroneous subtraction of 4 from the total FIFO depth read from device tree. The stored depth is for checking against total capacity, not initial vacancy. This prevented writes near the FIFO's full size. The check performed just before data transfer, which uses live reads of the TDFV register to determine current vacancy, correctly handles the initial Depth - 4 hardware state and subsequent FIFO fullness. Fixes: 4a965c5f89de ("staging: add driver for Xilinx AXI-Stream FIFO v4.1 IP core") Cc: stable@vger.kernel.org Signed-off-by: Gabriel Shahrouzi Link: https://lore.kernel.org/r/20250419012937.674924-1-gshahrouzi@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/axis-fifo/axis-fifo.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/staging/axis-fifo/axis-fifo.c b/drivers/staging/axis-fifo/axis-fifo.c index 53de2a901d3d..f667b3b62f18 100644 --- a/drivers/staging/axis-fifo/axis-fifo.c +++ b/drivers/staging/axis-fifo/axis-fifo.c @@ -775,9 +775,6 @@ static int axis_fifo_parse_dt(struct axis_fifo *fifo) goto end; } - /* IP sets TDFV to fifo depth - 4 so we will do the same */ - fifo->tx_fifo_depth -= 4; - ret = get_dts_property(fifo, "xlnx,use-rx-data", &fifo->has_rx_fifo); if (ret) { dev_err(fifo->dt_device, "missing xlnx,use-rx-data property\n"); From d41072906abec8bb8e01ed16afefbaa558908c89 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 8 May 2025 15:41:32 -0700 Subject: [PATCH 040/114] x86/mm: Eliminate window where TLB flushes may be inadvertently skipped commit fea4e317f9e7e1f449ce90dedc27a2d2a95bee5a upstream. tl;dr: There is a window in the mm switching code where the new CR3 is set and the CPU should be getting TLB flushes for the new mm. But should_flush_tlb() has a bug and suppresses the flush. Fix it by widening the window where should_flush_tlb() sends an IPI. Long Version: === History === There were a few things leading up to this. First, updating mm_cpumask() was observed to be too expensive, so it was made lazier. But being lazy caused too many unnecessary IPIs to CPUs due to the now-lazy mm_cpumask(). So code was added to cull mm_cpumask() periodically[2]. But that culling was a bit too aggressive and skipped sending TLB flushes to CPUs that need them. So here we are again. === Problem === The too-aggressive code in should_flush_tlb() strikes in this window: // Turn on IPIs for this CPU/mm combination, but only // if should_flush_tlb() agrees: cpumask_set_cpu(cpu, mm_cpumask(next)); next_tlb_gen = atomic64_read(&next->context.tlb_gen); choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); load_new_mm_cr3(need_flush); // ^ After 'need_flush' is set to false, IPIs *MUST* // be sent to this CPU and not be ignored. this_cpu_write(cpu_tlbstate.loaded_mm, next); // ^ Not until this point does should_flush_tlb() // become true! should_flush_tlb() will suppress TLB flushes between load_new_mm_cr3() and writing to 'loaded_mm', which is a window where they should not be suppressed. Whoops. === Solution === Thankfully, the fuzzy "just about to write CR3" window is already marked with loaded_mm==LOADED_MM_SWITCHING. Simply checking for that state in should_flush_tlb() is sufficient to ensure that the CPU is targeted with an IPI. This will cause more TLB flush IPIs. But the window is relatively small and I do not expect this to cause any kind of measurable performance impact. Update the comment where LOADED_MM_SWITCHING is written since it grew yet another user. Peter Z also raised a concern that should_flush_tlb() might not observe 'loaded_mm' and 'is_lazy' in the same order that switch_mm_irqs_off() writes them. Add a barrier to ensure that they are observed in the order they are written. Signed-off-by: Dave Hansen Acked-by: Rik van Riel Link: https://lore.kernel.org/oe-lkp/202411282207.6bd28eae-lkp@intel.com/ [1] Fixes: 6db2526c1d69 ("x86/mm/tlb: Only trim the mm_cpumask once a second") [2] Reported-by: Stephen Dolan Cc: stable@vger.kernel.org Acked-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/tlb.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 4872bb082b19..20ce6a1bd6c5 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -630,7 +630,11 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); - /* Let nmi_uaccess_okay() know that we're changing CR3. */ + /* + * Indicate that CR3 is about to change. nmi_uaccess_okay() + * and others are sensitive to the window where mm_cpumask(), + * CR3 and cpu_tlbstate.loaded_mm are not all in sync. + */ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); barrier(); } @@ -900,8 +904,16 @@ done: static bool should_flush_tlb(int cpu, void *data) { + struct mm_struct *loaded_mm = per_cpu(cpu_tlbstate.loaded_mm, cpu); struct flush_tlb_info *info = data; + /* + * Order the 'loaded_mm' and 'is_lazy' against their + * write ordering in switch_mm_irqs_off(). Ensure + * 'is_lazy' is at least as new as 'loaded_mm'. + */ + smp_rmb(); + /* Lazy TLB will get flushed at the next context switch. */ if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu)) return false; @@ -910,8 +922,15 @@ static bool should_flush_tlb(int cpu, void *data) if (!info->mm) return true; + /* + * While switching, the remote CPU could have state from + * either the prev or next mm. Assume the worst and flush. + */ + if (loaded_mm == LOADED_MM_SWITCHING) + return true; + /* The target mm is loaded, and the CPU is not lazy. */ - if (per_cpu(cpu_tlbstate.loaded_mm, cpu) == info->mm) + if (loaded_mm == info->mm) return true; /* In cpumask, but not the loaded mm? Periodically remove by flushing. */ From b229fa0d093c86491128d6380039f280e781f3eb Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Fri, 18 Apr 2025 16:31:59 +0800 Subject: [PATCH 041/114] drm/amd/display: Shift DMUB AUX reply command if necessary commit 5a3846648c0523fd850b7f0aec78c0139453ab8b upstream. [Why] Defined value of dmub AUX reply command field get updated but didn't adjust dm receiving side accordingly. [How] Check the received reply command value to see if it's updated version or not. Adjust it if necessary. Fixes: ead08b95fa50 ("drm/amd/display: Fix race condition in DPIA AUX transfer") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit d5c9ade755a9afa210840708a12a8f44c0d532f4) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e6bc59053319..4c731b6c020c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -11058,8 +11058,11 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( goto out; } + payload->reply[0] = adev->dm.dmub_notify->aux_reply.command & 0xF; + if (adev->dm.dmub_notify->aux_reply.command & 0xF0) + /* The reply is stored in the top nibble of the command. */ + payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; - payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; if (!payload->write && p_notify->aux_reply.length && (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) { From badda05d6b99a1da5ee0d308fe496d1d00be3329 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Fri, 18 Apr 2025 20:37:53 +0200 Subject: [PATCH 042/114] iio: adc: ad7606: fix serial register access commit f083f8a21cc785ebe3a33f756a3fa3660611f8db upstream. Fix register read/write routine as per datasheet. When reading multiple consecutive registers, only the first one is read properly. This is due to missing chip select deassert and assert again between first and second 16bit transfer, as shown in the datasheet AD7606C-16, rev 0, figure 110. Fixes: f2a22e1e172f ("iio: adc: ad7606: Add support for software mode for ad7616") Reviewed-by: David Lechner Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250418-wip-bl-ad7606-fix-reg-access-v3-1-d5eeb440c738@baylibre.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ad7606_spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7606_spi.c b/drivers/iio/adc/ad7606_spi.c index 287a0591533b..67c96572cecc 100644 --- a/drivers/iio/adc/ad7606_spi.c +++ b/drivers/iio/adc/ad7606_spi.c @@ -127,7 +127,7 @@ static int ad7606_spi_reg_read(struct ad7606_state *st, unsigned int addr) { .tx_buf = &st->d16[0], .len = 2, - .cs_change = 0, + .cs_change = 1, }, { .rx_buf = &st->d16[1], .len = 2, From 7748b1b27fbfdf73f1f719eda2afa2f5d719b4e8 Mon Sep 17 00:00:00 2001 From: Simon Xue Date: Wed, 12 Mar 2025 14:20:16 +0800 Subject: [PATCH 043/114] iio: adc: rockchip: Fix clock initialization sequence commit 839f81de397019f55161c5982d670ac19d836173 upstream. clock_set_rate should be executed after devm_clk_get_enabled. Fixes: 97ad10bb2901 ("iio: adc: rockchip_saradc: Make use of devm_clk_get_enabled") Signed-off-by: Simon Xue Reviewed-by: Heiko Stuebner Link: https://patch.msgid.link/20250312062016.137821-1-xxm@rock-chips.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/rockchip_saradc.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c index 929cba215d99..7b4eb4a200df 100644 --- a/drivers/iio/adc/rockchip_saradc.c +++ b/drivers/iio/adc/rockchip_saradc.c @@ -485,15 +485,6 @@ static int rockchip_saradc_probe(struct platform_device *pdev) if (info->reset) rockchip_saradc_reset_controller(info->reset); - /* - * Use a default value for the converter clock. - * This may become user-configurable in the future. - */ - ret = clk_set_rate(info->clk, info->data->clk_rate); - if (ret < 0) - return dev_err_probe(&pdev->dev, ret, - "failed to set adc clk rate\n"); - ret = regulator_enable(info->vref); if (ret < 0) return dev_err_probe(&pdev->dev, ret, @@ -520,6 +511,14 @@ static int rockchip_saradc_probe(struct platform_device *pdev) if (IS_ERR(info->clk)) return dev_err_probe(&pdev->dev, PTR_ERR(info->clk), "failed to get adc clock\n"); + /* + * Use a default value for the converter clock. + * This may become user-configurable in the future. + */ + ret = clk_set_rate(info->clk, info->data->clk_rate); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "failed to set adc clk rate\n"); platform_set_drvdata(pdev, indio_dev); From 3413b87a9e17b46beea2f70bfd41c5a376eab4b7 Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Mon, 21 Apr 2025 09:15:39 -0400 Subject: [PATCH 044/114] iio: adis16201: Correct inclinometer channel resolution commit 609bc31eca06c7408e6860d8b46311ebe45c1fef upstream. The inclinometer channels were previously defined with 14 realbits. However, the ADIS16201 datasheet states the resolution for these output channels is 12 bits (Page 14, text description; Page 15, table 7). Correct the realbits value to 12 to accurately reflect the hardware. Fixes: f7fe1d1dd5a5 ("staging: iio: new adis16201 driver") Cc: stable@vger.kernel.org Signed-off-by: Gabriel Shahrouzi Reviewed-by: Marcelo Schmitt Link: https://patch.msgid.link/20250421131539.912966-1-gshahrouzi@gmail.com Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/adis16201.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/adis16201.c b/drivers/iio/accel/adis16201.c index d054721859b3..99b05548b7bd 100644 --- a/drivers/iio/accel/adis16201.c +++ b/drivers/iio/accel/adis16201.c @@ -211,9 +211,9 @@ static const struct iio_chan_spec adis16201_channels[] = { BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14), ADIS_AUX_ADC_CHAN(ADIS16201_AUX_ADC_REG, ADIS16201_SCAN_AUX_ADC, 0, 12), ADIS_INCLI_CHAN(X, ADIS16201_XINCL_OUT_REG, ADIS16201_SCAN_INCLI_X, - BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 12), ADIS_INCLI_CHAN(Y, ADIS16201_YINCL_OUT_REG, ADIS16201_SCAN_INCLI_Y, - BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 12), IIO_CHAN_SOFT_TIMESTAMP(7) }; From da33c4167b9cc1266a97215114cb74679f881d0c Mon Sep 17 00:00:00 2001 From: Silvano Seva Date: Tue, 11 Mar 2025 09:49:47 +0100 Subject: [PATCH 045/114] iio: imu: st_lsm6dsx: fix possible lockup in st_lsm6dsx_read_fifo commit 159ca7f18129834b6f4c7eae67de48e96c752fc9 upstream. Prevent st_lsm6dsx_read_fifo from falling in an infinite loop in case pattern_len is equal to zero and the device FIFO is not empty. Fixes: 290a6ce11d93 ("iio: imu: add support to lsm6dsx driver") Signed-off-by: Silvano Seva Acked-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250311085030.3593-2-s.seva@4sigma.it Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c index 066fe561c5e8..56b14afed769 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c @@ -370,6 +370,9 @@ int st_lsm6dsx_read_fifo(struct st_lsm6dsx_hw *hw) if (fifo_status & cpu_to_le16(ST_LSM6DSX_FIFO_EMPTY_MASK)) return 0; + if (!pattern_len) + pattern_len = ST_LSM6DSX_SAMPLE_SIZE; + fifo_len = (le16_to_cpu(fifo_status) & fifo_diff_mask) * ST_LSM6DSX_CHAN_SIZE; fifo_len = (fifo_len / pattern_len) * pattern_len; From 9ce662851380fe2018e36e15c0bdcb1ad177ed95 Mon Sep 17 00:00:00 2001 From: Silvano Seva Date: Tue, 11 Mar 2025 09:49:49 +0100 Subject: [PATCH 046/114] iio: imu: st_lsm6dsx: fix possible lockup in st_lsm6dsx_read_tagged_fifo commit 8114ef86e2058e2554111b793596f17bee23fa15 upstream. Prevent st_lsm6dsx_read_tagged_fifo from falling in an infinite loop in case pattern_len is equal to zero and the device FIFO is not empty. Fixes: 801a6e0af0c6 ("iio: imu: st_lsm6dsx: add support to LSM6DSO") Signed-off-by: Silvano Seva Acked-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250311085030.3593-4-s.seva@4sigma.it Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c index 56b14afed769..b8119fa4768e 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c @@ -604,6 +604,9 @@ int st_lsm6dsx_read_tagged_fifo(struct st_lsm6dsx_hw *hw) if (!fifo_len) return 0; + if (!pattern_len) + pattern_len = ST_LSM6DSX_TAGGED_SAMPLE_SIZE; + for (read_len = 0; read_len < fifo_len; read_len += pattern_len) { err = st_lsm6dsx_read_block(hw, ST_LSM6DSX_REG_FIFO_OUT_TAG_ADDR, From 12125f7d9c15e6d8ac91d10373b2db2f17dcf767 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Wed, 30 Apr 2025 17:51:52 -0300 Subject: [PATCH 047/114] drm/v3d: Add job to pending list if the reset was skipped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 35e4079bf1a2570abffce6ababa631afcf8ea0e5 upstream. When a CL/CSD job times out, we check if the GPU has made any progress since the last timeout. If so, instead of resetting the hardware, we skip the reset and let the timer get rearmed. This gives long-running jobs a chance to complete. However, when `timedout_job()` is called, the job in question is removed from the pending list, which means it won't be automatically freed through `free_job()`. Consequently, when we skip the reset and keep the job running, the job won't be freed when it finally completes. This situation leads to a memory leak, as exposed in [1] and [2]. Similarly to commit 704d3d60fec4 ("drm/etnaviv: don't block scheduler when GPU is still active"), this patch ensures the job is put back on the pending list when extending the timeout. Cc: stable@vger.kernel.org # 6.0 Reported-by: Daivik Bhatia Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12227 [1] Closes: https://github.com/raspberrypi/linux/issues/6817 [2] Reviewed-by: Iago Toral Quiroga Acked-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20250430210643.57924-1-mcanal@igalia.com Signed-off-by: Maíra Canal Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/v3d/v3d_sched.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 5b729013fd26..41493cf3d03b 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -289,11 +289,16 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) return DRM_GPU_SCHED_STAT_NOMINAL; } -/* If the current address or return address have changed, then the GPU - * has probably made progress and we should delay the reset. This - * could fail if the GPU got in an infinite loop in the CL, but that - * is pretty unlikely outside of an i-g-t testcase. - */ +static void +v3d_sched_skip_reset(struct drm_sched_job *sched_job) +{ + struct drm_gpu_scheduler *sched = sched_job->sched; + + spin_lock(&sched->job_list_lock); + list_add(&sched_job->list, &sched->pending_list); + spin_unlock(&sched->job_list_lock); +} + static enum drm_gpu_sched_stat v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, u32 *timedout_ctca, u32 *timedout_ctra) @@ -303,9 +308,16 @@ v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q)); u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q)); + /* If the current address or return address have changed, then the GPU + * has probably made progress and we should delay the reset. This + * could fail if the GPU got in an infinite loop in the CL, but that + * is pretty unlikely outside of an i-g-t testcase. + */ if (*timedout_ctca != ctca || *timedout_ctra != ctra) { *timedout_ctca = ctca; *timedout_ctra = ctra; + + v3d_sched_skip_reset(sched_job); return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -345,11 +357,13 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job) struct v3d_dev *v3d = job->base.v3d; u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4); - /* If we've made progress, skip reset and let the timer get - * rearmed. + /* If we've made progress, skip reset, add the job to the pending + * list, and let the timer get rearmed. */ if (job->timedout_batches != batches) { job->timedout_batches = batches; + + v3d_sched_skip_reset(sched_job); return DRM_GPU_SCHED_STAT_NOMINAL; } From c8a91debb020298c74bba0b9b6ed720fa98dc4a9 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Wed, 16 Apr 2025 11:26:54 -0400 Subject: [PATCH 048/114] drm/amd/display: more liberal vmin/vmax update for freesync commit f1c6be3999d2be2673a51a9be0caf9348e254e52 upstream. [Why] FAMS2 expects vmin/vmax to be updated in the case when freesync is off, but supported. But we only update it when freesync is enabled. [How] Change the vsync handler such that dc_stream_adjust_vmin_vmax() its called irrespective of whether freesync is enabled. If freesync is supported, then there is no harm in updating vmin/vmax registers. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3546 Reviewed-by: ChiaHsuan Chung Signed-off-by: Aurabindo Pillai Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit cfb2d41831ee5647a4ae0ea7c24971a92d5dfa0d) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4c731b6c020c..dd643563a5e8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -610,15 +610,21 @@ static void dm_crtc_high_irq(void *interrupt_params) spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); if (acrtc->dm_irq_params.stream && - acrtc->dm_irq_params.vrr_params.supported && - acrtc->dm_irq_params.freesync_config.state == - VRR_STATE_ACTIVE_VARIABLE) { + acrtc->dm_irq_params.vrr_params.supported) { + bool replay_en = acrtc->dm_irq_params.stream->link->replay_settings.replay_feature_enabled; + bool psr_en = acrtc->dm_irq_params.stream->link->psr_settings.psr_feature_enabled; + bool fs_active_var_en = acrtc->dm_irq_params.freesync_config.state == VRR_STATE_ACTIVE_VARIABLE; + mod_freesync_handle_v_update(adev->dm.freesync_module, acrtc->dm_irq_params.stream, &acrtc->dm_irq_params.vrr_params); - dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc->dm_irq_params.stream, - &acrtc->dm_irq_params.vrr_params.adjust); + /* update vmin_vmax only if freesync is enabled, or only if PSR and REPLAY are disabled */ + if (fs_active_var_en || (!fs_active_var_en && !replay_en && !psr_en)) { + dc_stream_adjust_vmin_vmax(adev->dm.dc, + acrtc->dm_irq_params.stream, + &acrtc->dm_irq_params.vrr_params.adjust); + } } /* From cee6856fbf1c6ef641215a14324f4ac035b4d909 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Sun, 20 Apr 2025 16:29:07 +0800 Subject: [PATCH 049/114] drm/amd/display: Fix the checking condition in dmub aux handling commit bc70e11b550d37fbd9eaed0f113ba560894f1609 upstream. [Why & How] Fix the checking condition for detecting AUX_RET_ERROR_PROTOCOL_ERROR. It was wrongly checking by "not equals to" Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 1db6c9e9b62e1a8912f0a281c941099fca678da3) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index dd643563a5e8..a0bd970c09b4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -11055,7 +11055,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( * Transient states before tunneling is enabled could * lead to this error. We can ignore this for now. */ - if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) { + if (p_notify->result == AUX_RET_ERROR_PROTOCOL_ERROR) { DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n", payload->address, payload->length, p_notify->result); From 89850f11bb7cb97bfa2aabe99b8f4c7e5ef88f68 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Sun, 20 Apr 2025 16:56:54 +0800 Subject: [PATCH 050/114] drm/amd/display: Remove incorrect checking in dmub aux handler commit 396dc51b3b7ea524bf8061f478332d0039e96d5d upstream. [Why & How] "Request length != reply length" is expected behavior defined in spec. It's not an invalid reply. Besides, replied data handling logic is not designed to be written in amdgpu_dm_process_dmub_aux_transfer_sync(). Remove the incorrectly handling section. Fixes: ead08b95fa50 ("drm/amd/display: Fix race condition in DPIA AUX transfer") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 81b5c6fa62af62fe89ae9576f41aae37830b94cb) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a0bd970c09b4..b123b37941dd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -11070,19 +11070,9 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; if (!payload->write && p_notify->aux_reply.length && - (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) { - - if (payload->length != p_notify->aux_reply.length) { - DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n", - p_notify->aux_reply.length, - payload->address, payload->length); - *operation_result = AUX_RET_ERROR_INVALID_REPLY; - goto out; - } - + (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) memcpy(payload->data, p_notify->aux_reply.data, p_notify->aux_reply.length); - } /* success */ ret = p_notify->aux_reply.length; From 79d982ae2f61ad65955dce1bf5b9a7a4f0561f58 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Sun, 20 Apr 2025 19:22:14 +0800 Subject: [PATCH 051/114] drm/amd/display: Fix wrong handling for AUX_DEFER case commit 65924ec69b29296845c7f628112353438e63ea56 upstream. [Why] We incorrectly ack all bytes get written when the reply actually is defer. When it's defer, means sink is not ready for the request. We should retry the request. [How] Only reply all data get written when receive I2C_ACK|AUX_ACK. Otherwise, reply the number of actual written bytes received from the sink. Add some messages to facilitate debugging as well. Fixes: ad6756b4d773 ("drm/amd/display: Shift dc link aux to aux_payload") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 3637e457eb0000bc37d8bbbec95964aad2fb29fd) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 5858e288b3fd..c0cacd501c83 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -48,6 +48,9 @@ #define PEAK_FACTOR_X1000 1006 +/* + * This function handles both native AUX and I2C-Over-AUX transactions. + */ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { @@ -84,15 +87,25 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, if (adev->dm.aux_hpd_discon_quirk) { if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE && operation_result == AUX_RET_ERROR_HPD_DISCON) { - result = 0; + result = msg->size; operation_result = AUX_RET_SUCCESS; } } - if (payload.write && result >= 0) - result = msg->size; + /* + * result equals to 0 includes the cases of AUX_DEFER/I2C_DEFER + */ + if (payload.write && result >= 0) { + if (result) { + /*one byte indicating partially written bytes. Force 0 to retry*/ + drm_info(adev_to_drm(adev), "amdgpu: AUX partially written\n"); + result = 0; + } else if (!payload.reply[0]) + /*I2C_ACK|AUX_ACK*/ + result = msg->size; + } - if (result < 0) + if (result < 0) { switch (operation_result) { case AUX_RET_SUCCESS: break; @@ -111,6 +124,13 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, break; } + drm_info(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result); + } + + if (payload.reply[0]) + drm_info(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.", + payload.reply[0]); + return result; } From f647ce6d9b5d410a2a468d67c093d7f6e5a03b45 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Sun, 20 Apr 2025 17:50:03 +0800 Subject: [PATCH 052/114] drm/amd/display: Copy AUX read reply data whenever length > 0 commit 3924f45d4de7250a603fd7b50379237a6a0e5adf upstream. [Why] amdgpu_dm_process_dmub_aux_transfer_sync() should return all exact data reply from the sink side. Don't do the analysis job in it. [How] Remove unnecessary check condition AUX_TRANSACTION_REPLY_AUX_ACK. Fixes: ead08b95fa50 ("drm/amd/display: Fix race condition in DPIA AUX transfer") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 9b540e3fe6796fec4fb1344f3be8952fc2f084d4) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b123b37941dd..f6017be8f995 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -11069,8 +11069,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( /* The reply is stored in the top nibble of the command. */ payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; - if (!payload->write && p_notify->aux_reply.length && - (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) + if (!payload->write && p_notify->aux_reply.length) memcpy(payload->data, p_notify->aux_reply.data, p_notify->aux_reply.length); From 05340a423c92d36b781f81b4d8c8760a4a487823 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:45:04 -0400 Subject: [PATCH 053/114] drm/amdgpu/hdp4: use memcfg register to post the write for HDP flush commit f690e3974755a650259a45d71456decc9c96a282 upstream. Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: c9b8dcabb52a ("drm/amdgpu/hdp4.0: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit 5c937b4a6050316af37ef214825b6340b5e9e391) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 30210613dc5c..2f3054ed7b1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -42,7 +42,12 @@ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From 7133ff3bedf33043d83280244922d77944502d11 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:47:37 -0400 Subject: [PATCH 054/114] drm/amdgpu/hdp5.2: use memcfg register to post the write for HDP flush commit dbc988c689333faeeed44d5561f372ff20395304 upstream. Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: f756dbac1ce1 ("drm/amdgpu/hdp5.2: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit 4a89b7698e771914b4d5b571600c76e2fdcbe2a9) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c index f52552c5fa27..6b9f2e1d9d69 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c @@ -34,7 +34,17 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev, if (!ring || !ring->funcs->emit_wreg) { WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + if (amdgpu_sriov_vf(adev)) { + /* this is fine because SR_IOV doesn't remap the register */ + RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + } else { + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); + } } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, From 1824e914e291a2e638828896acf8bc008b1018a1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:46:56 -0400 Subject: [PATCH 055/114] drm/amdgpu/hdp5: use memcfg register to post the write for HDP flush commit 0e33e0f339b91eecd9558311449a3d1e728722d4 upstream. Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: cf424020e040 ("drm/amdgpu/hdp5.0: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit a5cb344033c7598762e89255e8ff52827abb57a4) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index d3962d469088..40705e13ca56 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -33,7 +33,12 @@ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From 03f108a718a7dfaeb9f0501fc0f5e34379172cba Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:48:51 -0400 Subject: [PATCH 056/114] drm/amdgpu/hdp6: use memcfg register to post the write for HDP flush commit ca28e80abe4219c8f1a2961ae05102d70af6dc87 upstream. Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: abe1cbaec6cf ("drm/amdgpu/hdp6.0: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit 84141ff615951359c9a99696fd79a36c465ed847) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index b6d71ec1debf..0d0c568f3839 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -33,7 +33,12 @@ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From 671c05434d9178fdb244bb17ba49add793d4d9c4 Mon Sep 17 00:00:00 2001 From: Alexey Charkov Date: Fri, 25 Apr 2025 18:11:11 +0400 Subject: [PATCH 057/114] usb: uhci-platform: Make the clock really optional commit a5c7973539b010874a37a0e846e62ac6f00553ba upstream. Device tree bindings state that the clock is optional for UHCI platform controllers, and some existing device trees don't provide those - such as those for VIA/WonderMedia devices. The driver however fails to probe now if no clock is provided, because devm_clk_get returns an error pointer in such case. Switch to devm_clk_get_optional instead, so that it could probe again on those platforms where no clocks are given. Cc: stable Fixes: 26c502701c52 ("usb: uhci: Add clk support to uhci-platform") Signed-off-by: Alexey Charkov Link: https://lore.kernel.org/r/20250425-uhci-clock-optional-v1-1-a1d462592f29@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/uhci-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/uhci-platform.c b/drivers/usb/host/uhci-platform.c index 3dec5dd3a0d5..712389599d46 100644 --- a/drivers/usb/host/uhci-platform.c +++ b/drivers/usb/host/uhci-platform.c @@ -121,7 +121,7 @@ static int uhci_hcd_platform_probe(struct platform_device *pdev) } /* Get and enable clock if any specified */ - uhci->clk = devm_clk_get(&pdev->dev, NULL); + uhci->clk = devm_clk_get_optional(&pdev->dev, NULL); if (IS_ERR(uhci->clk)) { ret = PTR_ERR(uhci->clk); goto err_rmr; From 2ed98e89ebc2e1bc73534dc3c18cb7843a889ff9 Mon Sep 17 00:00:00 2001 From: Paul Aurich Date: Tue, 6 May 2025 22:28:09 -0700 Subject: [PATCH 058/114] smb: client: Avoid race in open_cached_dir with lease breaks commit 3ca02e63edccb78ef3659bebc68579c7224a6ca2 upstream. A pre-existing valid cfid returned from find_or_create_cached_dir might race with a lease break, meaning open_cached_dir doesn't consider it valid, and thinks it's newly-constructed. This leaks a dentry reference if the allocation occurs before the queued lease break work runs. Avoid the race by extending holding the cfid_list_lock across find_or_create_cached_dir and when the result is checked. Cc: stable@vger.kernel.org Reviewed-by: Henrique Carvalho Signed-off-by: Paul Aurich Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cached_dir.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 9c0ef4195b58..749794667295 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -29,7 +29,6 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, { struct cached_fid *cfid; - spin_lock(&cfids->cfid_list_lock); list_for_each_entry(cfid, &cfids->entries, entry) { if (!strcmp(cfid->path, path)) { /* @@ -38,25 +37,20 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, * being deleted due to a lease break. */ if (!cfid->time || !cfid->has_lease) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } kref_get(&cfid->refcount); - spin_unlock(&cfids->cfid_list_lock); return cfid; } } if (lookup_only) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } if (cfids->num_entries >= max_cached_dirs) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } cfid = init_cached_dir(path); if (cfid == NULL) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } cfid->cfids = cfids; @@ -74,7 +68,6 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, */ cfid->has_lease = true; - spin_unlock(&cfids->cfid_list_lock); return cfid; } @@ -185,8 +178,10 @@ replay_again: if (!utf16_path) return -ENOMEM; + spin_lock(&cfids->cfid_list_lock); cfid = find_or_create_cached_dir(cfids, path, lookup_only, tcon->max_cached_dirs); if (cfid == NULL) { + spin_unlock(&cfids->cfid_list_lock); kfree(utf16_path); return -ENOENT; } @@ -195,7 +190,6 @@ replay_again: * Otherwise, it is either a new entry or laundromat worker removed it * from @cfids->entries. Caller will put last reference if the latter. */ - spin_lock(&cfids->cfid_list_lock); if (cfid->has_lease && cfid->time) { spin_unlock(&cfids->cfid_list_lock); *ret_cfid = cfid; From ffa14d4dc7797f029c3a9780f504073f1a0e19ba Mon Sep 17 00:00:00 2001 From: John Ernberg Date: Fri, 2 May 2025 11:40:55 +0000 Subject: [PATCH 059/114] xen: swiotlb: Use swiotlb bouncing if kmalloc allocation demands it commit cd9c058489053e172a6654cad82ee936d1b09fab upstream. Xen swiotlb support was missed when the patch set starting with 4ab5f8ec7d71 ("mm/slab: decouple ARCH_KMALLOC_MINALIGN from ARCH_DMA_MINALIGN") was merged. When running Xen on iMX8QXP, a SoC without IOMMU, the effect was that USB transfers ended up corrupted when there was more than one URB inflight at the same time. Add a call to dma_kmalloc_needs_bounce() to make sure that allocations too small for DMA get bounced via swiotlb. Closes: https://lore.kernel.org/linux-usb/ab2776f0-b838-4cf6-a12a-c208eb6aad59@actia.se/ Fixes: 4ab5f8ec7d71 ("mm/slab: decouple ARCH_KMALLOC_MINALIGN from ARCH_DMA_MINALIGN") Cc: stable@kernel.org # v6.5+ Signed-off-by: John Ernberg Reviewed-by: Stefano Stabellini Signed-off-by: Juergen Gross Message-ID: <20250502114043.1968976-2-john.ernberg@actia.se> Signed-off-by: Greg Kroah-Hartman --- drivers/xen/swiotlb-xen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 0b3bd9a7575e..5770f3b374ec 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -216,6 +216,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, * buffering it. */ if (dma_capable(dev, dev_addr, size, true) && + !dma_kmalloc_needs_bounce(dev, size, dir) && !range_straddles_page_boundary(phys, size) && !xen_arch_need_swiotlb(dev, phys, dev_addr) && !is_swiotlb_force_bounce(dev)) From cbfaf46b88a4c01b64c4186cdccd766c19ae644c Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Tue, 6 May 2025 17:09:33 -0400 Subject: [PATCH 060/114] xenbus: Use kref to track req lifetime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1f0304dfd9d217c2f8b04a9ef4b3258a66eedd27 upstream. Marek reported seeing a NULL pointer fault in the xenbus_thread callstack: BUG: kernel NULL pointer dereference, address: 0000000000000000 RIP: e030:__wake_up_common+0x4c/0x180 Call Trace: __wake_up_common_lock+0x82/0xd0 process_msg+0x18e/0x2f0 xenbus_thread+0x165/0x1c0 process_msg+0x18e is req->cb(req). req->cb is set to xs_wake_up(), a thin wrapper around wake_up(), or xenbus_dev_queue_reply(). It seems like it was xs_wake_up() in this case. It seems like req may have woken up the xs_wait_for_reply(), which kfree()ed the req. When xenbus_thread resumes, it faults on the zero-ed data. Linux Device Drivers 2nd edition states: "Normally, a wake_up call can cause an immediate reschedule to happen, meaning that other processes might run before wake_up returns." ... which would match the behaviour observed. Change to keeping two krefs on each request. One for the caller, and one for xenbus_thread. Each will kref_put() when finished, and the last will free it. This use of kref matches the description in Documentation/core-api/kref.rst Link: https://lore.kernel.org/xen-devel/ZO0WrR5J0xuwDIxW@mail-itl/ Reported-by: Marek Marczykowski-Górecki Fixes: fd8aa9095a95 ("xen: optimize xenbus driver for multiple concurrent xenstore accesses") Cc: stable@vger.kernel.org Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Message-ID: <20250506210935.5607-1-jason.andryuk@amd.com> Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xenbus/xenbus.h | 2 ++ drivers/xen/xenbus/xenbus_comms.c | 9 ++++----- drivers/xen/xenbus/xenbus_dev_frontend.c | 2 +- drivers/xen/xenbus/xenbus_xs.c | 18 ++++++++++++++++-- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h index 2754bdfadcb8..4ba73320694a 100644 --- a/drivers/xen/xenbus/xenbus.h +++ b/drivers/xen/xenbus/xenbus.h @@ -77,6 +77,7 @@ enum xb_req_state { struct xb_req_data { struct list_head list; wait_queue_head_t wq; + struct kref kref; struct xsd_sockmsg msg; uint32_t caller_req_id; enum xsd_sockmsg_type type; @@ -103,6 +104,7 @@ int xb_init_comms(void); void xb_deinit_comms(void); int xs_watch_msg(struct xs_watch_event *event); void xs_request_exit(struct xb_req_data *req); +void xs_free_req(struct kref *kref); int xenbus_match(struct device *_dev, struct device_driver *_drv); int xenbus_dev_probe(struct device *_dev); diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index e5fda0256feb..82df2da1b880 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -309,8 +309,8 @@ static int process_msg(void) virt_wmb(); req->state = xb_req_state_got_reply; req->cb(req); - } else - kfree(req); + } + kref_put(&req->kref, xs_free_req); } mutex_unlock(&xs_response_mutex); @@ -386,14 +386,13 @@ static int process_writes(void) state.req->msg.type = XS_ERROR; state.req->err = err; list_del(&state.req->list); - if (state.req->state == xb_req_state_aborted) - kfree(state.req); - else { + if (state.req->state != xb_req_state_aborted) { /* write err, then update state */ virt_wmb(); state.req->state = xb_req_state_got_reply; wake_up(&state.req->wq); } + kref_put(&state.req->kref, xs_free_req); mutex_unlock(&xb_write_mutex); diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 0792fda49a15..c495cff3da30 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -406,7 +406,7 @@ void xenbus_dev_queue_reply(struct xb_req_data *req) mutex_unlock(&u->reply_mutex); kfree(req->body); - kfree(req); + kref_put(&req->kref, xs_free_req); kref_put(&u->kref, xenbus_file_free); diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 028a182bcc9e..f84c87ee2839 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -112,6 +112,12 @@ static void xs_suspend_exit(void) wake_up_all(&xs_state_enter_wq); } +void xs_free_req(struct kref *kref) +{ + struct xb_req_data *req = container_of(kref, struct xb_req_data, kref); + kfree(req); +} + static uint32_t xs_request_enter(struct xb_req_data *req) { uint32_t rq_id; @@ -237,6 +243,12 @@ static void xs_send(struct xb_req_data *req, struct xsd_sockmsg *msg) req->caller_req_id = req->msg.req_id; req->msg.req_id = xs_request_enter(req); + /* + * Take 2nd ref. One for this thread, and the second for the + * xenbus_thread. + */ + kref_get(&req->kref); + mutex_lock(&xb_write_mutex); list_add_tail(&req->list, &xb_write_list); notify = list_is_singular(&xb_write_list); @@ -261,8 +273,8 @@ static void *xs_wait_for_reply(struct xb_req_data *req, struct xsd_sockmsg *msg) if (req->state == xb_req_state_queued || req->state == xb_req_state_wait_reply) req->state = xb_req_state_aborted; - else - kfree(req); + + kref_put(&req->kref, xs_free_req); mutex_unlock(&xb_write_mutex); return ret; @@ -291,6 +303,7 @@ int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par) req->cb = xenbus_dev_queue_reply; req->par = par; req->user_req = true; + kref_init(&req->kref); xs_send(req, msg); @@ -319,6 +332,7 @@ static void *xs_talkv(struct xenbus_transaction t, req->num_vecs = num_vecs; req->cb = xs_wake_up; req->user_req = false; + kref_init(&req->kref); msg.req_id = 0; msg.tx_id = t.id; From 7e1c7748404baa6f80f4baf705367ac104a40d70 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Apr 2025 15:31:16 +0200 Subject: [PATCH 061/114] clocksource/i8253: Use raw_spinlock_irqsave() in clockevent_i8253_disable() commit 94cff94634e506a4a44684bee1875d2dbf782722 upstream. On x86 during boot, clockevent_i8253_disable() can be invoked via x86_late_time_init -> hpet_time_init() -> pit_timer_init() which happens with enabled interrupts. If some of the old i8253 hardware is actually used then lockdep will notice that i8253_lock is used in hard interrupt context. This causes lockdep to complain because it observed the lock being acquired with interrupts enabled and in hard interrupt context. Make clockevent_i8253_disable() acquire the lock with raw_spinlock_irqsave() to cure this. [ tglx: Massage change log and use guard() ] Fixes: c8c4076723dac ("x86/timer: Skip PIT initialization on modern chipsets") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250404133116.p-XRWJXf@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/i8253.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c index 39f7c2d736d1..b603c25f3dfa 100644 --- a/drivers/clocksource/i8253.c +++ b/drivers/clocksource/i8253.c @@ -103,7 +103,7 @@ int __init clocksource_i8253_init(void) #ifdef CONFIG_CLKEVT_I8253 void clockevent_i8253_disable(void) { - raw_spin_lock(&i8253_lock); + guard(raw_spinlock_irqsave)(&i8253_lock); /* * Writing the MODE register should stop the counter, according to @@ -132,8 +132,6 @@ void clockevent_i8253_disable(void) outb_p(0, PIT_CH0); outb_p(0x30, PIT_MODE); - - raw_spin_unlock(&i8253_lock); } static int pit_shutdown(struct clock_event_device *evt) From faa9059631d3491d699c69ecf512de9e1a3d6649 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Wed, 7 May 2025 09:50:44 +0300 Subject: [PATCH 062/114] module: ensure that kobject_put() is safe for module type kobjects commit a6aeb739974ec73e5217c75a7c008a688d3d5cf1 upstream. In 'lookup_or_create_module_kobject()', an internal kobject is created using 'module_ktype'. So call to 'kobject_put()' on error handling path causes an attempt to use an uninitialized completion pointer in 'module_kobject_release()'. In this scenario, we just want to release kobject without an extra synchronization required for a regular module unloading process, so adding an extra check whether 'complete()' is actually required makes 'kobject_put()' safe. Reported-by: syzbot+7fb8a372e1f6add936dd@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7fb8a372e1f6add936dd Fixes: 942e443127e9 ("module: Fix mod->mkobj.kobj potentially freed too early") Cc: stable@vger.kernel.org Suggested-by: Petr Pavlu Signed-off-by: Dmitry Antipov Link: https://lore.kernel.org/r/20250507065044.86529-1-dmantipov@yandex.ru Signed-off-by: Petr Pavlu Signed-off-by: Greg Kroah-Hartman --- kernel/params.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/params.c b/kernel/params.c index c7aed3c51cd5..e39ac5420cd6 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -945,7 +945,9 @@ struct kset *module_kset; static void module_kobj_release(struct kobject *kobj) { struct module_kobject *mk = to_module_kobject(kobj); - complete(mk->kobj_completion); + + if (mk->kobj_completion) + complete(mk->kobj_completion); } const struct kobj_type module_ktype = { From cbd5108119d7c2877737fc76480cfc50ba7088b6 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Mon, 14 Apr 2025 11:59:33 +0200 Subject: [PATCH 063/114] x86/microcode: Consolidate the loader enablement checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5214a9f6c0f56644acb9d2cbb58facf1856d322b upstream. Consolidate the whole logic which determines whether the microcode loader should be enabled or not into a single function and call it everywhere. Well, almost everywhere - not in mk_early_pgtbl_32() because there the kernel is running without paging enabled and checking dis_ucode_ldr et al would require physical addresses and uglification of the code. But since this is 32-bit, the easier thing to do is to simply map the initrd unconditionally especially since that mapping is getting removed later anyway by zap_early_initrd_mapping() and avoid the uglification. In doing so, address the issue of old 486er machines without CPUID support, not booting current kernels. [ mingo: Fix no previous prototype for ‘microcode_loader_disabled’ [-Wmissing-prototypes] ] Fixes: 4c585af7180c1 ("x86/boot/32: Temporarily map initrd for microcode loading") Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/CANpbe9Wm3z8fy9HbgS8cuhoj0TREYEEkBipDuhgkWFvqX0UoVQ@mail.gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/microcode.h | 2 + arch/x86/kernel/cpu/microcode/amd.c | 6 ++- arch/x86/kernel/cpu/microcode/core.c | 60 ++++++++++++++---------- arch/x86/kernel/cpu/microcode/intel.c | 2 +- arch/x86/kernel/cpu/microcode/internal.h | 1 - arch/x86/kernel/head32.c | 4 -- 6 files changed, 42 insertions(+), 33 deletions(-) diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 0ee6ed0ff2bf..79c35947e20c 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -17,10 +17,12 @@ struct ucode_cpu_info { void load_ucode_bsp(void); void load_ucode_ap(void); void microcode_bsp_resume(void); +bool __init microcode_loader_disabled(void); #else static inline void load_ucode_bsp(void) { } static inline void load_ucode_ap(void) { } static inline void microcode_bsp_resume(void) { } +static inline bool __init microcode_loader_disabled(void) { return false; } #endif extern unsigned long initrd_start_early; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index ce78e39004e0..9b0570f769eb 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -1102,15 +1102,17 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz static int __init save_microcode_in_initrd(void) { - unsigned int cpuid_1_eax = native_cpuid_eax(1); struct cpuinfo_x86 *c = &boot_cpu_data; struct cont_desc desc = { 0 }; + unsigned int cpuid_1_eax; enum ucode_state ret; struct cpio_data cp; - if (dis_ucode_ldr || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) + if (microcode_loader_disabled() || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) return 0; + cpuid_1_eax = native_cpuid_eax(1); + if (!find_blobs_in_containers(&cp)) return -EINVAL; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index c15c7b862bec..5b47c320f17a 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -43,8 +43,8 @@ #define DRIVER_VERSION "2.2" -static struct microcode_ops *microcode_ops; -bool dis_ucode_ldr = true; +static struct microcode_ops *microcode_ops; +static bool dis_ucode_ldr = false; bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV); module_param(force_minrev, bool, S_IRUSR | S_IWUSR); @@ -91,6 +91,9 @@ static bool amd_check_current_patch_level(void) u32 lvl, dummy, i; u32 *levels; + if (x86_cpuid_vendor() != X86_VENDOR_AMD) + return false; + native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy); levels = final_levels; @@ -102,27 +105,29 @@ static bool amd_check_current_patch_level(void) return false; } -static bool __init check_loader_disabled_bsp(void) +bool __init microcode_loader_disabled(void) { - static const char *__dis_opt_str = "dis_ucode_ldr"; - const char *cmdline = boot_command_line; - const char *option = __dis_opt_str; - - /* - * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not - * completely accurate as xen pv guests don't see that CPUID bit set but - * that's good enough as they don't land on the BSP path anyway. - */ - if (native_cpuid_ecx(1) & BIT(31)) + if (dis_ucode_ldr) return true; - if (x86_cpuid_vendor() == X86_VENDOR_AMD) { - if (amd_check_current_patch_level()) - return true; - } - - if (cmdline_find_option_bool(cmdline, option) <= 0) - dis_ucode_ldr = false; + /* + * Disable when: + * + * 1) The CPU does not support CPUID. + * + * 2) Bit 31 in CPUID[1]:ECX is clear + * The bit is reserved for hypervisor use. This is still not + * completely accurate as XEN PV guests don't see that CPUID bit + * set, but that's good enough as they don't land on the BSP + * path anyway. + * + * 3) Certain AMD patch levels are not allowed to be + * overwritten. + */ + if (!have_cpuid_p() || + native_cpuid_ecx(1) & BIT(31) || + amd_check_current_patch_level()) + dis_ucode_ldr = true; return dis_ucode_ldr; } @@ -132,7 +137,10 @@ void __init load_ucode_bsp(void) unsigned int cpuid_1_eax; bool intel = true; - if (!have_cpuid_p()) + if (cmdline_find_option_bool(boot_command_line, "dis_ucode_ldr") > 0) + dis_ucode_ldr = true; + + if (microcode_loader_disabled()) return; cpuid_1_eax = native_cpuid_eax(1); @@ -153,9 +161,6 @@ void __init load_ucode_bsp(void) return; } - if (check_loader_disabled_bsp()) - return; - if (intel) load_ucode_intel_bsp(&early_data); else @@ -166,6 +171,11 @@ void load_ucode_ap(void) { unsigned int cpuid_1_eax; + /* + * Can't use microcode_loader_disabled() here - .init section + * hell. It doesn't have to either - the BSP variant must've + * parsed cmdline already anyway. + */ if (dis_ucode_ldr) return; @@ -817,7 +827,7 @@ static int __init microcode_init(void) struct cpuinfo_x86 *c = &boot_cpu_data; int error; - if (dis_ucode_ldr) + if (microcode_loader_disabled()) return -EINVAL; if (c->x86_vendor == X86_VENDOR_INTEL) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 9d7baf2573bc..4e5a71796fbd 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -389,7 +389,7 @@ static int __init save_builtin_microcode(void) if (xchg(&ucode_patch_va, NULL) != UCODE_BSP_LOADED) return 0; - if (dis_ucode_ldr || boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + if (microcode_loader_disabled() || boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return 0; uci.mc = get_microcode_blob(&uci, true); diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h index 21776c529fa9..54f2b9582a7c 100644 --- a/arch/x86/kernel/cpu/microcode/internal.h +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -94,7 +94,6 @@ static inline unsigned int x86_cpuid_family(void) return x86_family(eax); } -extern bool dis_ucode_ldr; extern bool force_minrev; #ifdef CONFIG_CPU_SUP_AMD diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index de001b2146ab..375f2d7f1762 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -145,10 +145,6 @@ void __init __no_stack_protector mk_early_pgtbl_32(void) *ptr = (unsigned long)ptep + PAGE_OFFSET; #ifdef CONFIG_MICROCODE_INITRD32 - /* Running on a hypervisor? */ - if (native_cpuid_ecx(1) & BIT(31)) - return; - params = (struct boot_params *)__pa_nodebug(&boot_params); if (!params->hdr.ramdisk_size || !params->hdr.ramdisk_image) return; From c7441aa8d0781967d7194a0929742056821336b7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 24 Apr 2025 15:45:11 +0200 Subject: [PATCH 064/114] ocfs2: switch osb->disable_recovery to enum commit c0fb83088f0cc4ee4706e0495ee8b06f49daa716 upstream. Patch series "ocfs2: Fix deadlocks in quota recovery", v3. This implements another approach to fixing quota recovery deadlocks. We avoid grabbing sb->s_umount semaphore from ocfs2_finish_quota_recovery() and instead stop quota recovery early in ocfs2_dismount_volume(). This patch (of 3): We will need more recovery states than just pure enable / disable to fix deadlocks with quota recovery. Switch osb->disable_recovery to enum. Link: https://lkml.kernel.org/r/20250424134301.1392-1-jack@suse.cz Link: https://lkml.kernel.org/r/20250424134515.18933-4-jack@suse.cz Fixes: 5f530de63cfc ("ocfs2: Use s_umount for quota recovery protection") Signed-off-by: Jan Kara Reviewed-by: Heming Zhao Tested-by: Heming Zhao Acked-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Murad Masimov Cc: Shichangkuo Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/journal.c | 14 ++++++++------ fs/ocfs2/ocfs2.h | 7 ++++++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index cbe3c12ff5f7..ca8571a8a54b 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -174,7 +174,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb) struct ocfs2_recovery_map *rm; mutex_init(&osb->recovery_lock); - osb->disable_recovery = 0; + osb->recovery_state = OCFS2_REC_ENABLED; osb->recovery_thread_task = NULL; init_waitqueue_head(&osb->recovery_event); @@ -206,7 +206,7 @@ void ocfs2_recovery_exit(struct ocfs2_super *osb) /* disable any new recovery threads and wait for any currently * running ones to exit. Do this before setting the vol_state. */ mutex_lock(&osb->recovery_lock); - osb->disable_recovery = 1; + osb->recovery_state = OCFS2_REC_DISABLED; mutex_unlock(&osb->recovery_lock); wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); @@ -1582,14 +1582,16 @@ bail: void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) { + int was_set = -1; + mutex_lock(&osb->recovery_lock); + if (osb->recovery_state < OCFS2_REC_DISABLED) + was_set = ocfs2_recovery_map_set(osb, node_num); trace_ocfs2_recovery_thread(node_num, osb->node_num, - osb->disable_recovery, osb->recovery_thread_task, - osb->disable_recovery ? - -1 : ocfs2_recovery_map_set(osb, node_num)); + osb->recovery_state, osb->recovery_thread_task, was_set); - if (osb->disable_recovery) + if (osb->recovery_state == OCFS2_REC_DISABLED) goto out; if (osb->recovery_thread_task) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 8fe826143d7b..acfe18d005b3 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -308,6 +308,11 @@ enum ocfs2_journal_trigger_type { void ocfs2_initialize_journal_triggers(struct super_block *sb, struct ocfs2_triggers triggers[]); +enum ocfs2_recovery_state { + OCFS2_REC_ENABLED = 0, + OCFS2_REC_DISABLED, +}; + struct ocfs2_journal; struct ocfs2_slot_info; struct ocfs2_recovery_map; @@ -370,7 +375,7 @@ struct ocfs2_super struct ocfs2_recovery_map *recovery_map; struct ocfs2_replay_map *replay_map; struct task_struct *recovery_thread_task; - int disable_recovery; + enum ocfs2_recovery_state recovery_state; wait_queue_head_t checkpoint_event; struct ocfs2_journal *journal; unsigned long osb_commit_interval; From cc335d4f4e4f2042c80c2b61a42f093c62f92d26 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 24 Apr 2025 15:45:12 +0200 Subject: [PATCH 065/114] ocfs2: implement handshaking with ocfs2 recovery thread commit 8f947e0fd595951460f5a6e1ac29baa82fa02eab upstream. We will need ocfs2 recovery thread to acknowledge transitions of recovery_state when disabling particular types of recovery. This is similar to what currently happens when disabling recovery completely, just more general. Implement the handshake and use it for exit from recovery. Link: https://lkml.kernel.org/r/20250424134515.18933-5-jack@suse.cz Fixes: 5f530de63cfc ("ocfs2: Use s_umount for quota recovery protection") Signed-off-by: Jan Kara Reviewed-by: Heming Zhao Tested-by: Heming Zhao Acked-by: Joseph Qi Cc: Changwei Ge Cc: Joel Becker Cc: Jun Piao Cc: Junxiao Bi Cc: Mark Fasheh Cc: Murad Masimov Cc: Shichangkuo Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/journal.c | 52 +++++++++++++++++++++++++++++++--------------- fs/ocfs2/ocfs2.h | 4 ++++ 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index ca8571a8a54b..abaa3143c5c6 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -190,31 +190,48 @@ int ocfs2_recovery_init(struct ocfs2_super *osb) return 0; } -/* we can't grab the goofy sem lock from inside wait_event, so we use - * memory barriers to make sure that we'll see the null task before - * being woken up */ static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) { - mb(); return osb->recovery_thread_task != NULL; } +static void ocfs2_recovery_disable(struct ocfs2_super *osb, + enum ocfs2_recovery_state state) +{ + mutex_lock(&osb->recovery_lock); + /* + * If recovery thread is not running, we can directly transition to + * final state. + */ + if (!ocfs2_recovery_thread_running(osb)) { + osb->recovery_state = state + 1; + goto out_lock; + } + osb->recovery_state = state; + /* Wait for recovery thread to acknowledge state transition */ + wait_event_cmd(osb->recovery_event, + !ocfs2_recovery_thread_running(osb) || + osb->recovery_state >= state + 1, + mutex_unlock(&osb->recovery_lock), + mutex_lock(&osb->recovery_lock)); +out_lock: + mutex_unlock(&osb->recovery_lock); + + /* + * At this point we know that no more recovery work can be queued so + * wait for any recovery completion work to complete. + */ + if (osb->ocfs2_wq) + flush_workqueue(osb->ocfs2_wq); +} + void ocfs2_recovery_exit(struct ocfs2_super *osb) { struct ocfs2_recovery_map *rm; /* disable any new recovery threads and wait for any currently * running ones to exit. Do this before setting the vol_state. */ - mutex_lock(&osb->recovery_lock); - osb->recovery_state = OCFS2_REC_DISABLED; - mutex_unlock(&osb->recovery_lock); - wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); - - /* At this point, we know that no more recovery threads can be - * launched, so wait for any recovery completion work to - * complete. */ - if (osb->ocfs2_wq) - flush_workqueue(osb->ocfs2_wq); + ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE); /* * Now that recovery is shut down, and the osb is about to be @@ -1569,7 +1586,8 @@ bail: ocfs2_free_replay_slots(osb); osb->recovery_thread_task = NULL; - mb(); /* sync with ocfs2_recovery_thread_running */ + if (osb->recovery_state == OCFS2_REC_WANT_DISABLE) + osb->recovery_state = OCFS2_REC_DISABLED; wake_up(&osb->recovery_event); mutex_unlock(&osb->recovery_lock); @@ -1585,13 +1603,13 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) int was_set = -1; mutex_lock(&osb->recovery_lock); - if (osb->recovery_state < OCFS2_REC_DISABLED) + if (osb->recovery_state < OCFS2_REC_WANT_DISABLE) was_set = ocfs2_recovery_map_set(osb, node_num); trace_ocfs2_recovery_thread(node_num, osb->node_num, osb->recovery_state, osb->recovery_thread_task, was_set); - if (osb->recovery_state == OCFS2_REC_DISABLED) + if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE) goto out; if (osb->recovery_thread_task) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index acfe18d005b3..9764a2904a7d 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -310,6 +310,10 @@ void ocfs2_initialize_journal_triggers(struct super_block *sb, enum ocfs2_recovery_state { OCFS2_REC_ENABLED = 0, + OCFS2_REC_WANT_DISABLE, + /* + * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work + */ OCFS2_REC_DISABLED, }; From 8c133a08524a649135aaea0182bacfc4c6abbe32 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 24 Apr 2025 15:45:13 +0200 Subject: [PATCH 066/114] ocfs2: stop quota recovery before disabling quotas commit fcaf3b2683b05a9684acdebda706a12025a6927a upstream. Currently quota recovery is synchronized with unmount using sb->s_umount semaphore. That is however prone to deadlocks because flush_workqueue(osb->ocfs2_wq) called from umount code can wait for quota recovery to complete while ocfs2_finish_quota_recovery() waits for sb->s_umount semaphore. Grabbing of sb->s_umount semaphore in ocfs2_finish_quota_recovery() is only needed to protect that function from disabling of quotas from ocfs2_dismount_volume(). Handle this problem by disabling quota recovery early during unmount in ocfs2_dismount_volume() instead so that we can drop acquisition of sb->s_umount from ocfs2_finish_quota_recovery(). Link: https://lkml.kernel.org/r/20250424134515.18933-6-jack@suse.cz Fixes: 5f530de63cfc ("ocfs2: Use s_umount for quota recovery protection") Signed-off-by: Jan Kara Reported-by: Shichangkuo Reported-by: Murad Masimov Reviewed-by: Heming Zhao Tested-by: Heming Zhao Acked-by: Joseph Qi Cc: Changwei Ge Cc: Joel Becker Cc: Jun Piao Cc: Junxiao Bi Cc: Mark Fasheh Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/journal.c | 20 ++++++++++++++++++-- fs/ocfs2/journal.h | 1 + fs/ocfs2/ocfs2.h | 6 ++++++ fs/ocfs2/quota_local.c | 9 ++------- fs/ocfs2/super.c | 3 +++ 5 files changed, 30 insertions(+), 9 deletions(-) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index abaa3143c5c6..b246e5327111 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -225,6 +225,11 @@ out_lock: flush_workqueue(osb->ocfs2_wq); } +void ocfs2_recovery_disable_quota(struct ocfs2_super *osb) +{ + ocfs2_recovery_disable(osb, OCFS2_REC_QUOTA_WANT_DISABLE); +} + void ocfs2_recovery_exit(struct ocfs2_super *osb) { struct ocfs2_recovery_map *rm; @@ -1489,6 +1494,18 @@ static int __ocfs2_recovery_thread(void *arg) } } restart: + if (quota_enabled) { + mutex_lock(&osb->recovery_lock); + /* Confirm that recovery thread will no longer recover quotas */ + if (osb->recovery_state == OCFS2_REC_QUOTA_WANT_DISABLE) { + osb->recovery_state = OCFS2_REC_QUOTA_DISABLED; + wake_up(&osb->recovery_event); + } + if (osb->recovery_state >= OCFS2_REC_QUOTA_DISABLED) + quota_enabled = 0; + mutex_unlock(&osb->recovery_lock); + } + status = ocfs2_super_lock(osb, 1); if (status < 0) { mlog_errno(status); @@ -1592,8 +1609,7 @@ bail: mutex_unlock(&osb->recovery_lock); - if (quota_enabled) - kfree(rm_quota); + kfree(rm_quota); return status; } diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index e3c3a35dc5e0..6397170f302f 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -148,6 +148,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb); int ocfs2_recovery_init(struct ocfs2_super *osb); void ocfs2_recovery_exit(struct ocfs2_super *osb); +void ocfs2_recovery_disable_quota(struct ocfs2_super *osb); int ocfs2_compute_replay_slots(struct ocfs2_super *osb); void ocfs2_free_replay_slots(struct ocfs2_super *osb); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 9764a2904a7d..5e3ebbab698a 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -310,6 +310,12 @@ void ocfs2_initialize_journal_triggers(struct super_block *sb, enum ocfs2_recovery_state { OCFS2_REC_ENABLED = 0, + OCFS2_REC_QUOTA_WANT_DISABLE, + /* + * Must be OCFS2_REC_QUOTA_WANT_DISABLE + 1 for + * ocfs2_recovery_disable_quota() to work. + */ + OCFS2_REC_QUOTA_DISABLED, OCFS2_REC_WANT_DISABLE, /* * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 4b4fa58cd32f..0ca8975a1df4 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -453,8 +453,7 @@ out: /* Sync changes in local quota file into global quota file and * reinitialize local quota file. - * The function expects local quota file to be already locked and - * s_umount locked in shared mode. */ + * The function expects local quota file to be already locked. */ static int ocfs2_recover_local_quota_file(struct inode *lqinode, int type, struct ocfs2_quota_recovery *rec) @@ -585,7 +584,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, { unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, LOCAL_GROUP_QUOTA_SYSTEM_INODE }; - struct super_block *sb = osb->sb; struct ocfs2_local_disk_dqinfo *ldinfo; struct buffer_head *bh; handle_t *handle; @@ -597,7 +595,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for " "slot %u\n", osb->dev_str, slot_num); - down_read(&sb->s_umount); for (type = 0; type < OCFS2_MAXQUOTAS; type++) { if (list_empty(&(rec->r_list[type]))) continue; @@ -674,7 +671,6 @@ out_put: break; } out: - up_read(&sb->s_umount); kfree(rec); return status; } @@ -840,8 +836,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk); /* - * s_umount held in exclusive mode protects us against racing with - * recovery thread... + * ocfs2_dismount_volume() has already aborted quota recovery... */ if (oinfo->dqi_rec) { ocfs2_free_quota_recovery(oinfo->dqi_rec); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 84fa585c6513..e585e77cdc88 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1870,6 +1870,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) /* Orphan scan should be stopped as early as possible */ ocfs2_orphan_scan_stop(osb); + /* Stop quota recovery so that we can disable quotas */ + ocfs2_recovery_disable_quota(osb); + ocfs2_disable_quotas(osb); /* All dquots should be freed by now */ From d84603122591c1c0ff1777da8b8156b50b8b287d Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Fri, 18 Apr 2025 04:55:16 +0000 Subject: [PATCH 067/114] usb: cdnsp: Fix issue with resuming from L1 commit 241e2ce88e5a494be7a5d44c0697592f1632fbee upstream. In very rare cases after resuming controller from L1 to L0 it reads registers before the clock UTMI have been enabled and as the result driver reads incorrect value. Most of registers are in APB domain clock but some of them (e.g. PORTSC) are in UTMI domain clock. After entering to L1 state the UTMI clock can be disabled. When controller transition from L1 to L0 the port status change event is reported and in interrupt runtime function driver reads PORTSC. During this read operation controller synchronize UTMI and APB domain but UTMI clock is still disabled and in result it reads 0xFFFFFFFF value. To fix this issue driver increases APB timeout value. The issue is platform specific and if the default value of APB timeout is not sufficient then this time should be set Individually for each platform. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Cc: stable Signed-off-by: Pawel Laszczak Acked-by: Peter Chen Link: https://lore.kernel.org/r/PH7PR07MB953846C57973E4DB134CAA71DDBF2@PH7PR07MB9538.namprd07.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-gadget.c | 29 +++++++++++++++++++++++++++++ drivers/usb/cdns3/cdnsp-gadget.h | 3 +++ drivers/usb/cdns3/cdnsp-pci.c | 12 ++++++++++-- drivers/usb/cdns3/core.h | 3 +++ 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index 4b67749edb99..c21b770b79f6 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -138,6 +138,26 @@ static void cdnsp_clear_port_change_bit(struct cdnsp_device *pdev, (portsc & PORT_CHANGE_BITS), port_regs); } +static void cdnsp_set_apb_timeout_value(struct cdnsp_device *pdev) +{ + struct cdns *cdns = dev_get_drvdata(pdev->dev); + __le32 __iomem *reg; + void __iomem *base; + u32 offset = 0; + u32 val; + + if (!cdns->override_apb_timeout) + return; + + base = &pdev->cap_regs->hc_capbase; + offset = cdnsp_find_next_ext_cap(base, offset, D_XEC_PRE_REGS_CAP); + reg = base + offset + REG_CHICKEN_BITS_3_OFFSET; + + val = le32_to_cpu(readl(reg)); + val = CHICKEN_APB_TIMEOUT_SET(val, cdns->override_apb_timeout); + writel(cpu_to_le32(val), reg); +} + static void cdnsp_set_chicken_bits_2(struct cdnsp_device *pdev, u32 bit) { __le32 __iomem *reg; @@ -1801,6 +1821,15 @@ static int cdnsp_gen_setup(struct cdnsp_device *pdev) pdev->hci_version = HC_VERSION(pdev->hcc_params); pdev->hcc_params = readl(&pdev->cap_regs->hcc_params); + /* + * Override the APB timeout value to give the controller more time for + * enabling UTMI clock and synchronizing APB and UTMI clock domains. + * This fix is platform specific and is required to fixes issue with + * reading incorrect value from PORTSC register after resuming + * from L1 state. + */ + cdnsp_set_apb_timeout_value(pdev); + cdnsp_get_rev_cap(pdev); /* Make sure the Device Controller is halted. */ diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h index 9a5577a772af..44b4d784555e 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.h +++ b/drivers/usb/cdns3/cdnsp-gadget.h @@ -520,6 +520,9 @@ struct cdnsp_rev_cap { #define REG_CHICKEN_BITS_2_OFFSET 0x48 #define CHICKEN_XDMA_2_TP_CACHE_DIS BIT(28) +#define REG_CHICKEN_BITS_3_OFFSET 0x4C +#define CHICKEN_APB_TIMEOUT_SET(p, val) (((p) & ~GENMASK(21, 0)) | (val)) + /* XBUF Extended Capability ID. */ #define XBUF_CAP_ID 0xCB #define XBUF_RX_TAG_MASK_0_OFFSET 0x1C diff --git a/drivers/usb/cdns3/cdnsp-pci.c b/drivers/usb/cdns3/cdnsp-pci.c index 0725668ffea4..159c2eae2660 100644 --- a/drivers/usb/cdns3/cdnsp-pci.c +++ b/drivers/usb/cdns3/cdnsp-pci.c @@ -33,6 +33,8 @@ #define CDNS_DRD_ID 0x0100 #define CDNS_DRD_IF (PCI_CLASS_SERIAL_USB << 8 | 0x80) +#define CHICKEN_APB_TIMEOUT_VALUE 0x1C20 + static struct pci_dev *cdnsp_get_second_fun(struct pci_dev *pdev) { /* @@ -144,6 +146,14 @@ static int cdnsp_pci_probe(struct pci_dev *pdev, cdnsp->otg_irq = pdev->irq; } + /* + * Cadence PCI based platform require some longer timeout for APB + * to fixes domain clock synchronization issue after resuming + * controller from L1 state. + */ + cdnsp->override_apb_timeout = CHICKEN_APB_TIMEOUT_VALUE; + pci_set_drvdata(pdev, cdnsp); + if (pci_is_enabled(func)) { cdnsp->dev = dev; cdnsp->gadget_init = cdnsp_gadget_init; @@ -153,8 +163,6 @@ static int cdnsp_pci_probe(struct pci_dev *pdev, goto free_cdnsp; } - pci_set_drvdata(pdev, cdnsp); - device_wakeup_enable(&pdev->dev); if (pci_dev_run_wake(pdev)) pm_runtime_put_noidle(&pdev->dev); diff --git a/drivers/usb/cdns3/core.h b/drivers/usb/cdns3/core.h index 57d47348dc19..ac30ee21309d 100644 --- a/drivers/usb/cdns3/core.h +++ b/drivers/usb/cdns3/core.h @@ -79,6 +79,8 @@ struct cdns3_platform_data { * @pdata: platform data from glue layer * @lock: spinlock structure * @xhci_plat_data: xhci private data structure pointer + * @override_apb_timeout: hold value of APB timeout. For value 0 the default + * value in CHICKEN_BITS_3 will be preserved. * @gadget_init: pointer to gadget initialization function */ struct cdns { @@ -117,6 +119,7 @@ struct cdns { struct cdns3_platform_data *pdata; spinlock_t lock; struct xhci_plat_priv *xhci_plat_data; + u32 override_apb_timeout; int (*gadget_init)(struct cdns *cdns); }; From 1981926eb34aeda0a1378e937d6a9d30a8735d45 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Fri, 25 Apr 2025 05:55:40 +0000 Subject: [PATCH 068/114] usb: cdnsp: fix L1 resume issue for RTL_REVISION_NEW_LPM version commit 8614ecdb1570e4fffe87ebdc62b613ed66f1f6a6 upstream. The controllers with rtl version larger than RTL_REVISION_NEW_LPM (0x00002700) has bug which causes that controller doesn't resume from L1 state. It happens if after receiving LPM packet controller starts transitioning to L1 and in this moment the driver force resuming by write operation to PORTSC.PLS. It's corner case and happens when write operation to PORTSC occurs during device delay before transitioning to L1 after transmitting ACK time (TL1TokenRetry). Forcing transition from L1->L0 by driver for revision larger than RTL_REVISION_NEW_LPM is not needed, so driver can simply fix this issue through block call of cdnsp_force_l0_go function. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Cc: stable Signed-off-by: Pawel Laszczak Acked-by: Peter Chen Link: https://lore.kernel.org/r/PH7PR07MB9538B55C3A6E71F9ED29E980DD842@PH7PR07MB9538.namprd07.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-gadget.c | 2 ++ drivers/usb/cdns3/cdnsp-gadget.h | 3 +++ drivers/usb/cdns3/cdnsp-ring.c | 3 ++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index c21b770b79f6..601a60a28022 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -1796,6 +1796,8 @@ static void cdnsp_get_rev_cap(struct cdnsp_device *pdev) reg += cdnsp_find_next_ext_cap(reg, 0, RTL_REV_CAP); pdev->rev_cap = reg; + pdev->rtl_revision = readl(&pdev->rev_cap->rtl_revision); + dev_info(pdev->dev, "Rev: %08x/%08x, eps: %08x, buff: %08x/%08x\n", readl(&pdev->rev_cap->ctrl_revision), readl(&pdev->rev_cap->rtl_revision), diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h index 44b4d784555e..ed84dbb9fd6f 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.h +++ b/drivers/usb/cdns3/cdnsp-gadget.h @@ -1362,6 +1362,7 @@ struct cdnsp_port { * @rev_cap: Controller Capabilities Registers. * @hcs_params1: Cached register copies of read-only HCSPARAMS1 * @hcc_params: Cached register copies of read-only HCCPARAMS1 + * @rtl_revision: Cached controller rtl revision. * @setup: Temporary buffer for setup packet. * @ep0_preq: Internal allocated request used during enumeration. * @ep0_stage: ep0 stage during enumeration process. @@ -1416,6 +1417,8 @@ struct cdnsp_device { __u32 hcs_params1; __u32 hcs_params3; __u32 hcc_params; + #define RTL_REVISION_NEW_LPM 0x2700 + __u32 rtl_revision; /* Lock used in interrupt thread context. */ spinlock_t lock; struct usb_ctrlrequest setup; diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 1d18d5002ef0..080a3f17a35d 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -308,7 +308,8 @@ static bool cdnsp_ring_ep_doorbell(struct cdnsp_device *pdev, writel(db_value, reg_addr); - cdnsp_force_l0_go(pdev); + if (pdev->rtl_revision < RTL_REVISION_NEW_LPM) + cdnsp_force_l0_go(pdev); /* Doorbell was set. */ return true; From dffa51cf2d3f5bcbc5cf4471b9f4dfbf2ef3c810 Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Tue, 22 Apr 2025 16:02:29 +0530 Subject: [PATCH 069/114] usb: gadget: f_ecm: Add get_status callback commit 8e3820271c517ceb89ab7442656ba49fa23ee1d0 upstream. When host sends GET_STATUS to ECM interface, handle the request from the function driver. Since the interface is wakeup capable, set the corresponding bit, and set RW bit if the function is already armed for wakeup by the host. Cc: stable Fixes: 481c225c4802 ("usb: gadget: Handle function suspend feature selector") Signed-off-by: Prashanth K Reviewed-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250422103231.1954387-2-prashanth.k@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ecm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c index f55f60639e42..2afc30de54ce 100644 --- a/drivers/usb/gadget/function/f_ecm.c +++ b/drivers/usb/gadget/function/f_ecm.c @@ -892,6 +892,12 @@ static void ecm_resume(struct usb_function *f) gether_resume(&ecm->port); } +static int ecm_get_status(struct usb_function *f) +{ + return (f->func_wakeup_armed ? USB_INTRF_STAT_FUNC_RW : 0) | + USB_INTRF_STAT_FUNC_RW_CAP; +} + static void ecm_free(struct usb_function *f) { struct f_ecm *ecm; @@ -960,6 +966,7 @@ static struct usb_function *ecm_alloc(struct usb_function_instance *fi) ecm->port.func.disable = ecm_disable; ecm->port.func.free_func = ecm_free; ecm->port.func.suspend = ecm_suspend; + ecm->port.func.get_status = ecm_get_status; ecm->port.func.resume = ecm_resume; return &ecm->port.func; From 0b32d03e79c1f02b32cde41a9838a6e21d2e8e60 Mon Sep 17 00:00:00 2001 From: Wayne Chang Date: Fri, 18 Apr 2025 16:12:28 +0800 Subject: [PATCH 070/114] usb: gadget: tegra-xudc: ACK ST_RC after clearing CTRL_RUN commit 59820fde001500c167342257650541280c622b73 upstream. We identified a bug where the ST_RC bit in the status register was not being acknowledged after clearing the CTRL_RUN bit in the control register. This could lead to unexpected behavior in the USB gadget drivers. This patch resolves the issue by adding the necessary code to explicitly acknowledge ST_RC after clearing CTRL_RUN based on the programming sequence, ensuring proper state transition. Fixes: 49db427232fe ("usb: gadget: Add UDC driver for tegra XUSB device mode controller") Cc: stable Signed-off-by: Wayne Chang Link: https://lore.kernel.org/r/20250418081228.1194779-1-waynec@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/tegra-xudc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index 7aa46d426f31..9bb54da8a6ae 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -1749,6 +1749,10 @@ static int __tegra_xudc_ep_disable(struct tegra_xudc_ep *ep) val = xudc_readl(xudc, CTRL); val &= ~CTRL_RUN; xudc_writel(xudc, val, CTRL); + + val = xudc_readl(xudc, ST); + if (val & ST_RC) + xudc_writel(xudc, ST_RC, ST); } dev_info(xudc->dev, "ep %u disabled\n", ep->index); From 9b09b99a95106d85e3f03c264a427ea67e211fab Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Tue, 22 Apr 2025 16:02:30 +0530 Subject: [PATCH 071/114] usb: gadget: Use get_status callback to set remote wakeup capability commit 5977a58dd5a4865198b0204b998adb0f634abe19 upstream. Currently when the host sends GET_STATUS request for an interface, we use get_status callbacks to set/clear remote wakeup capability of that interface. And if get_status callback isn't present for that interface, then we assume its remote wakeup capability based on bmAttributes. Now consider a scenario, where we have a USB configuration with multiple interfaces (say ECM + ADB), here ECM is remote wakeup capable and as of now ADB isn't. And bmAttributes will indicate the device as wakeup capable. With the current implementation, when host sends GET_STATUS request for both interfaces, we will set FUNC_RW_CAP for both. This results in USB3 CV Chapter 9.15 (Function Remote Wakeup Test) failures as host expects remote wakeup from both interfaces. The above scenario is just an example, and the failure can be observed if we use configuration with any interface except ECM. Hence avoid configuring remote wakeup capability from composite driver based on bmAttributes, instead use get_status callbacks and let the function drivers decide this. Cc: stable Fixes: 481c225c4802 ("usb: gadget: Handle function suspend feature selector") Signed-off-by: Prashanth K Reviewed-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250422103231.1954387-3-prashanth.k@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 69ce7d384ba8..4f326988be86 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2011,15 +2011,13 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) if (f->get_status) { status = f->get_status(f); + if (status < 0) break; - } else { - /* Set D0 and D1 bits based on func wakeup capability */ - if (f->config->bmAttributes & USB_CONFIG_ATT_WAKEUP) { - status |= USB_INTRF_STAT_FUNC_RW_CAP; - if (f->func_wakeup_armed) - status |= USB_INTRF_STAT_FUNC_RW; - } + + /* if D5 is not set, then device is not wakeup capable */ + if (!(f->config->bmAttributes & USB_CONFIG_ATT_WAKEUP)) + status &= ~(USB_INTRF_STAT_FUNC_RW_CAP | USB_INTRF_STAT_FUNC_RW); } put_unaligned_le16(status & 0x0000ffff, req->buf); From 2d44ee69e6f51751b6caaa0eb52a21f3788c8072 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Tue, 22 Apr 2025 19:40:01 +0800 Subject: [PATCH 072/114] usb: host: tegra: Prevent host controller crash when OTG port is used commit 732f35cf8bdfece582f6e4a9c659119036577308 upstream. When a USB device is connected to the OTG port, the tegra_xhci_id_work() routine transitions the PHY to host mode and calls xhci_hub_control() with the SetPortFeature command to enable port power. In certain cases, the XHCI controller may be in a low-power state when this operation occurs. If xhci_hub_control() is invoked while the controller is suspended, the PORTSC register may return 0xFFFFFFFF, indicating a read failure. This causes xhci_hc_died() to be triggered, leading to host controller shutdown. Example backtrace: [ 105.445736] Workqueue: events tegra_xhci_id_work [ 105.445747] dump_backtrace+0x0/0x1e8 [ 105.445759] xhci_hc_died.part.48+0x40/0x270 [ 105.445769] tegra_xhci_set_port_power+0xc0/0x240 [ 105.445774] tegra_xhci_id_work+0x130/0x240 To prevent this, ensure the controller is fully resumed before interacting with hardware registers by calling pm_runtime_get_sync() prior to the host mode transition and xhci_hub_control(). Fixes: f836e7843036 ("usb: xhci-tegra: Add OTG support") Cc: stable Signed-off-by: Jim Lin Signed-off-by: Wayne Chang Link: https://lore.kernel.org/r/20250422114001.126367-1-waynec@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-tegra.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index 76f228e7443c..89b3079194d7 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -1363,6 +1363,7 @@ static void tegra_xhci_id_work(struct work_struct *work) tegra->otg_usb3_port = tegra_xusb_padctl_get_usb3_companion(tegra->padctl, tegra->otg_usb2_port); + pm_runtime_get_sync(tegra->dev); if (tegra->host_mode) { /* switch to host mode */ if (tegra->otg_usb3_port >= 0) { @@ -1392,6 +1393,7 @@ static void tegra_xhci_id_work(struct work_struct *work) } tegra_xhci_set_port_power(tegra, true, true); + pm_runtime_mark_last_busy(tegra->dev); } else { if (tegra->otg_usb3_port >= 0) @@ -1399,6 +1401,7 @@ static void tegra_xhci_id_work(struct work_struct *work) tegra_xhci_set_port_power(tegra, true, false); } + pm_runtime_put_autosuspend(tegra->dev); } #if IS_ENABLED(CONFIG_PM) || IS_ENABLED(CONFIG_PM_SLEEP) From c9d8b0932e2bc2378086f7e16670fa50ca2fedde Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Tue, 29 Apr 2025 23:47:01 +0000 Subject: [PATCH 073/114] usb: typec: tcpm: delay SNK_TRY_WAIT_DEBOUNCE to SRC_TRYWAIT transition commit e918d3959b5ae0e793b8f815ce62240e10ba03a4 upstream. This patch fixes Type-C Compliance Test TD 4.7.6 - Try.SNK DRP Connect SNKAS. The compliance tester moves into SNK_UNATTACHED during toggling and expects the PUT to apply Rp after tPDDebounce of detection. If the port is in SNK_TRY_WAIT_DEBOUNCE, it will move into SRC_TRYWAIT immediately and apply Rp. This violates TD 4.7.5.V.3, where the tester confirms that the PUT attaches Rp after the transitions to Unattached.SNK for tPDDebounce. Change the tcpm_set_state delay between SNK_TRY_WAIT_DEBOUNCE and SRC_TRYWAIT to tPDDebounce. Fixes: a0a3e04e6b2c ("staging: typec: tcpm: Check for Rp for tPDDebounce") Cc: stable Signed-off-by: RD Babiera Reviewed-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250429234703.3748506-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 790aadab72a3..bfcbccb400c3 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5102,7 +5102,7 @@ static void _tcpm_cc_change(struct tcpm_port *port, enum typec_cc_status cc1, case SNK_TRY_WAIT_DEBOUNCE: if (!tcpm_port_is_sink(port)) { port->max_wait = 0; - tcpm_set_state(port, SRC_TRYWAIT, 0); + tcpm_set_state(port, SRC_TRYWAIT, PD_T_PD_DEBOUNCE); } break; case SRC_TRY_WAIT: From 14f298c52188c34acde9760bf5abc669c5c36fdb Mon Sep 17 00:00:00 2001 From: Andrei Kuchynski Date: Thu, 24 Apr 2025 08:44:29 +0000 Subject: [PATCH 074/114] usb: typec: ucsi: displayport: Fix NULL pointer access commit 312d79669e71283d05c05cc49a1a31e59e3d9e0e upstream. This patch ensures that the UCSI driver waits for all pending tasks in the ucsi_displayport_work workqueue to finish executing before proceeding with the partner removal. Cc: stable Fixes: af8622f6a585 ("usb: typec: ucsi: Support for DisplayPort alt mode") Signed-off-by: Andrei Kuchynski Reviewed-by: Heikki Krogerus Reviewed-by: Benson Leung Link: https://lore.kernel.org/r/20250424084429.3220757-3-akuchynski@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/displayport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index 2431febc4615..8c19081c3255 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -296,6 +296,8 @@ void ucsi_displayport_remove_partner(struct typec_altmode *alt) if (!dp) return; + cancel_work_sync(&dp->work); + dp->data.conf = 0; dp->data.status = 0; dp->initialized = false; From e2fef620e5e0b9d8e479ec0bff674e67ce1726d5 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 30 Apr 2025 15:48:10 +0200 Subject: [PATCH 075/114] USB: usbtmc: use interruptible sleep in usbtmc_read commit 054c5145540e5ad5b80adf23a5e3e2fc281fb8aa upstream. usbtmc_read() calls usbtmc_generic_read() which uses interruptible sleep, but usbtmc_read() itself uses uninterruptble sleep for mutual exclusion between threads. That makes no sense. Both should use interruptible sleep. Fixes: 5b775f672cc99 ("USB: add USB test and measurement class driver") Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20250430134810.226015-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index c2e666e82857..e483832b8d3a 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -1380,7 +1380,10 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf, if (!buffer) return -ENOMEM; - mutex_lock(&data->io_mutex); + retval = mutex_lock_interruptible(&data->io_mutex); + if (retval < 0) + goto exit_nolock; + if (data->zombie) { retval = -ENODEV; goto exit; @@ -1503,6 +1506,7 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf, exit: mutex_unlock(&data->io_mutex); +exit_nolock: kfree(buffer); return retval; } From e96be8bd53ab15bb91f2201905eab4b33f8fbae4 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 2 May 2025 09:09:39 +0200 Subject: [PATCH 076/114] usb: usbtmc: Fix erroneous get_stb ioctl error returns commit cac01bd178d6a2a23727f138d647ce1a0e8a73a1 upstream. wait_event_interruptible_timeout returns a long The return was being assigned to an int causing an integer overflow when the remaining jiffies > INT_MAX resulting in random error returns. Use a long return value and convert to int ioctl return only on error. When the return value of wait_event_interruptible_timeout was <= INT_MAX the number of remaining jiffies was returned which has no meaning for the user. Return 0 on success. Reported-by: Michael Katzmann Fixes: dbf3e7f654c0 ("Implement an ioctl to support the USMTMC-USB488 READ_STATUS_BYTE operation.") Cc: stable@vger.kernel.org Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250502070941.31819-2-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index e483832b8d3a..23d69b79357b 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -482,6 +482,7 @@ static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb) u8 *buffer; u8 tag; int rv; + long wait_rv; dev_dbg(dev, "Enter ioctl_read_stb iin_ep_present: %d\n", data->iin_ep_present); @@ -511,16 +512,17 @@ static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb) } if (data->iin_ep_present) { - rv = wait_event_interruptible_timeout( + wait_rv = wait_event_interruptible_timeout( data->waitq, atomic_read(&data->iin_data_valid) != 0, file_data->timeout); - if (rv < 0) { - dev_dbg(dev, "wait interrupted %d\n", rv); + if (wait_rv < 0) { + dev_dbg(dev, "wait interrupted %ld\n", wait_rv); + rv = wait_rv; goto exit; } - if (rv == 0) { + if (wait_rv == 0) { dev_dbg(dev, "wait timed out\n"); rv = -ETIMEDOUT; goto exit; @@ -539,6 +541,8 @@ static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb) dev_dbg(dev, "stb:0x%02x received %d\n", (unsigned int)*stb, rv); + rv = 0; + exit: /* bump interrupt bTag */ data->iin_bTag += 1; From 1991ed796d9a3f5aca1108c6d054a2502b3b70f7 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 2 May 2025 09:09:40 +0200 Subject: [PATCH 077/114] usb: usbtmc: Fix erroneous wait_srq ioctl return commit a9747c9b8b59ab4207effd20eb91a890acb44e16 upstream. wait_event_interruptible_timeout returns a long The return was being assigned to an int causing an integer overflow when the remaining jiffies > INT_MAX resulting in random error returns. Use a long return value, converting to the int ioctl return only on error. Fixes: 739240a9f6ac ("usb: usbtmc: Add ioctl USBTMC488_IOCTL_WAIT_SRQ") Cc: stable@vger.kernel.org Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250502070941.31819-3-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 23d69b79357b..d23de1bc7eed 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -606,9 +606,9 @@ static int usbtmc488_ioctl_wait_srq(struct usbtmc_file_data *file_data, { struct usbtmc_device_data *data = file_data->data; struct device *dev = &data->intf->dev; - int rv; u32 timeout; unsigned long expire; + long wait_rv; if (!data->iin_ep_present) { dev_dbg(dev, "no interrupt endpoint present\n"); @@ -622,25 +622,24 @@ static int usbtmc488_ioctl_wait_srq(struct usbtmc_file_data *file_data, mutex_unlock(&data->io_mutex); - rv = wait_event_interruptible_timeout( - data->waitq, - atomic_read(&file_data->srq_asserted) != 0 || - atomic_read(&file_data->closing), - expire); + wait_rv = wait_event_interruptible_timeout( + data->waitq, + atomic_read(&file_data->srq_asserted) != 0 || + atomic_read(&file_data->closing), + expire); mutex_lock(&data->io_mutex); /* Note! disconnect or close could be called in the meantime */ if (atomic_read(&file_data->closing) || data->zombie) - rv = -ENODEV; + return -ENODEV; - if (rv < 0) { - /* dev can be invalid now! */ - pr_debug("%s - wait interrupted %d\n", __func__, rv); - return rv; + if (wait_rv < 0) { + dev_dbg(dev, "%s - wait interrupted %ld\n", __func__, wait_rv); + return wait_rv; } - if (rv == 0) { + if (wait_rv == 0) { dev_dbg(dev, "%s - wait timed out\n", __func__); return -ETIMEDOUT; } From 37a55b692d42e190b7c59d471044962e25a82694 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 2 May 2025 09:09:41 +0200 Subject: [PATCH 078/114] usb: usbtmc: Fix erroneous generic_read ioctl return commit 4e77d3ec7c7c0d9535ccf1138827cb9bb5480b9b upstream. wait_event_interruptible_timeout returns a long The return value was being assigned to an int causing an integer overflow when the remaining jiffies > INT_MAX which resulted in random error returns. Use a long return value, converting to the int ioctl return only on error. Fixes: bb99794a4792 ("usb: usbtmc: Add ioctl for vendor specific read") Cc: stable@vger.kernel.org Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250502070941.31819-4-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index d23de1bc7eed..2f92905e05ca 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -833,6 +833,7 @@ static ssize_t usbtmc_generic_read(struct usbtmc_file_data *file_data, unsigned long expire; int bufcount = 1; int again = 0; + long wait_rv; /* mutex already locked */ @@ -945,19 +946,24 @@ static ssize_t usbtmc_generic_read(struct usbtmc_file_data *file_data, if (!(flags & USBTMC_FLAG_ASYNC)) { dev_dbg(dev, "%s: before wait time %lu\n", __func__, expire); - retval = wait_event_interruptible_timeout( + wait_rv = wait_event_interruptible_timeout( file_data->wait_bulk_in, usbtmc_do_transfer(file_data), expire); - dev_dbg(dev, "%s: wait returned %d\n", - __func__, retval); + dev_dbg(dev, "%s: wait returned %ld\n", + __func__, wait_rv); - if (retval <= 0) { - if (retval == 0) - retval = -ETIMEDOUT; + if (wait_rv < 0) { + retval = wait_rv; goto error; } + + if (wait_rv == 0) { + retval = -ETIMEDOUT; + goto error; + } + } urb = usb_get_from_anchor(&file_data->in_anchor); From 2b58e7c1e4b2bd7829443907d9f1009863c589a9 Mon Sep 17 00:00:00 2001 From: Lothar Rubusch Date: Sun, 9 Mar 2025 19:35:15 +0000 Subject: [PATCH 079/114] iio: accel: adxl367: fix setting odr for activity time update [ Upstream commit 38f67d0264929762e54ae5948703a21f841fe706 ] Fix setting the odr value to update activity time based on frequency derrived by recent odr, and not by obsolete odr value. The [small] bug: When _adxl367_set_odr() is called with a new odr value, it first writes the new odr value to the hardware register ADXL367_REG_FILTER_CTL. Second, it calls _adxl367_set_act_time_ms(), which calls adxl367_time_ms_to_samples(). Here st->odr still holds the old odr value. This st->odr member is used to derrive a frequency value, which is applied to update ADXL367_REG_TIME_ACT. Hence, the idea is to update activity time, based on possibilities and power consumption by the current ODR rate. Finally, when the function calls return, again in _adxl367_set_odr() the new ODR is assigned to st->odr. The fix: When setting a new ODR value is set to ADXL367_REG_FILTER_CTL, also ADXL367_REG_TIME_ACT should probably be updated with a frequency based on the recent ODR value and not the old one. Changing the location of the assignment to st->odr fixes this. Fixes: cbab791c5e2a5 ("iio: accel: add ADXL367 driver") Signed-off-by: Lothar Rubusch Reviewed-by: Marcelo Schmitt Link: https://patch.msgid.link/20250309193515.2974-1-l.rubusch@gmail.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/accel/adxl367.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c index 484fe2e9fb17..3a55475691de 100644 --- a/drivers/iio/accel/adxl367.c +++ b/drivers/iio/accel/adxl367.c @@ -620,18 +620,14 @@ static int _adxl367_set_odr(struct adxl367_state *st, enum adxl367_odr odr) if (ret) return ret; + st->odr = odr; + /* Activity timers depend on ODR */ ret = _adxl367_set_act_time_ms(st, st->act_time_ms); if (ret) return ret; - ret = _adxl367_set_inact_time_ms(st, st->inact_time_ms); - if (ret) - return ret; - - st->odr = odr; - - return 0; + return _adxl367_set_inact_time_ms(st, st->inact_time_ms); } static int adxl367_set_odr(struct iio_dev *indio_dev, enum adxl367_odr odr) From 85d430aef40ad5c426ec21a9e2f98ca6e401ea36 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 13 Apr 2025 11:34:36 +0100 Subject: [PATCH 080/114] iio: temp: maxim-thermocouple: Fix potential lack of DMA safe buffer. [ Upstream commit f79aeb6c631b57395f37acbfbe59727e355a714c ] The trick of using __aligned(IIO_DMA_MINALIGN) ensures that there is no overlap between buffers used for DMA and those used for driver state storage that are before the marking. It doesn't ensure anything above state variables found after the marking. Hence move this particular bit of state earlier in the structure. Fixes: 10897f34309b ("iio: temp: maxim_thermocouple: Fix alignment for DMA safety") Reviewed-by: David Lechner Link: https://patch.msgid.link/20250413103443.2420727-14-jic23@kernel.org Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/temperature/maxim_thermocouple.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/temperature/maxim_thermocouple.c b/drivers/iio/temperature/maxim_thermocouple.c index c28a7a6dea5f..555a61e2f3fd 100644 --- a/drivers/iio/temperature/maxim_thermocouple.c +++ b/drivers/iio/temperature/maxim_thermocouple.c @@ -121,9 +121,9 @@ static const struct maxim_thermocouple_chip maxim_thermocouple_chips[] = { struct maxim_thermocouple_data { struct spi_device *spi; const struct maxim_thermocouple_chip *chip; + char tc_type; u8 buffer[16] __aligned(IIO_DMA_MINALIGN); - char tc_type; }; static int maxim_thermocouple_read(struct maxim_thermocouple_data *data, From 9519771908fc3562a9c70b90db20c90c969b700a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Sep 2024 20:59:04 +0300 Subject: [PATCH 081/114] types: Complement the aligned types with signed 64-bit one [ Upstream commit e4ca0e59c39442546866f3dd514a3a5956577daf ] Some user may want to use aligned signed 64-bit type. Provide it for them. Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20240903180218.3640501-2-andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Cameron Stable-dep-of: 1bb942287e05 ("iio: accel: adxl355: Make timestamp 64-bit aligned using aligned_s64") Signed-off-by: Sasha Levin --- include/linux/types.h | 3 ++- include/uapi/linux/types.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/types.h b/include/linux/types.h index 253168bb3fe1..78d87c751ff5 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -115,8 +115,9 @@ typedef u64 u_int64_t; typedef s64 int64_t; #endif -/* this is a special 64bit data type that is 8-byte aligned */ +/* These are the special 64-bit data types that are 8-byte aligned */ #define aligned_u64 __aligned_u64 +#define aligned_s64 __aligned_s64 #define aligned_be64 __aligned_be64 #define aligned_le64 __aligned_le64 diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index 6375a0684052..48b933938877 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -53,6 +53,7 @@ typedef __u32 __bitwise __wsum; * No conversions are necessary between 32-bit user-space and a 64-bit kernel. */ #define __aligned_u64 __u64 __attribute__((aligned(8))) +#define __aligned_s64 __s64 __attribute__((aligned(8))) #define __aligned_be64 __be64 __attribute__((aligned(8))) #define __aligned_le64 __le64 __attribute__((aligned(8))) From 8b5273051b611118a55ce750eff21c4c6af4009a Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 13 Apr 2025 11:34:27 +0100 Subject: [PATCH 082/114] iio: accel: adxl355: Make timestamp 64-bit aligned using aligned_s64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1bb942287e05dc4c304a003ea85e6dd9a5e7db39 ] The IIO ABI requires 64-bit aligned timestamps. In this case insufficient padding would have been added on architectures where an s64 is only 32-bit aligned. Use aligned_s64 to enforce the correct alignment. Fixes: 327a0eaf19d5 ("iio: accel: adxl355: Add triggered buffer support") Reported-by: David Lechner Reviewed-by: Nuno Sá Reviewed-by: David Lechner Link: https://patch.msgid.link/20250413103443.2420727-5-jic23@kernel.org Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/accel/adxl355_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/adxl355_core.c b/drivers/iio/accel/adxl355_core.c index 0c9225d18fb2..4973e8da5399 100644 --- a/drivers/iio/accel/adxl355_core.c +++ b/drivers/iio/accel/adxl355_core.c @@ -231,7 +231,7 @@ struct adxl355_data { u8 transf_buf[3]; struct { u8 buf[14]; - s64 ts; + aligned_s64 ts; } buffer; } __aligned(IIO_DMA_MINALIGN); }; From 66c4ec15e3295e7e635995a1ad4f752fda2e851b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 13 Apr 2025 11:34:26 +0100 Subject: [PATCH 083/114] iio: adc: dln2: Use aligned_s64 for timestamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5097eaae98e53f9ab9d35801c70da819b92ca907 ] Here the lack of marking allows the overall structure to not be sufficiently aligned resulting in misplacement of the timestamp in iio_push_to_buffers_with_timestamp(). Use aligned_s64 to force the alignment on all architectures. Fixes: 7c0299e879dd ("iio: adc: Add support for DLN2 ADC") Reported-by: David Lechner Reviewed-by: Andy Shevchenko Reviewed-by: Nuno Sá Reviewed-by: David Lechner Link: https://patch.msgid.link/20250413103443.2420727-4-jic23@kernel.org Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/dln2-adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/dln2-adc.c b/drivers/iio/adc/dln2-adc.c index 97d162a3cba4..49a2588e7431 100644 --- a/drivers/iio/adc/dln2-adc.c +++ b/drivers/iio/adc/dln2-adc.c @@ -483,7 +483,7 @@ static irqreturn_t dln2_adc_trigger_h(int irq, void *p) struct iio_dev *indio_dev = pf->indio_dev; struct { __le16 values[DLN2_ADC_MAX_CHANNELS]; - int64_t timestamp_space; + aligned_s64 timestamp_space; } data; struct dln2_adc_get_all_vals dev_data; struct dln2_adc *dln2 = iio_priv(indio_dev); From 1c0620213f379a0d76952753681c60da91eb50a1 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 27 Apr 2025 13:34:24 +0200 Subject: [PATCH 084/114] MIPS: Fix MAX_REG_OFFSET [ Upstream commit c44572e0cc13c9afff83fd333135a0aa9b27ba26 ] Fix MAX_REG_OFFSET to point to the last register in 'pt_regs' and not to the marker itself, which could allow regs_get_register() to return an invalid offset. Fixes: 40e084a506eb ("MIPS: Add uprobes support.") Suggested-by: Maciej W. Rozycki Signed-off-by: Thorsten Blum Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/include/asm/ptrace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index 4a2b40ce39e0..841612913f0d 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -65,7 +65,8 @@ static inline void instruction_pointer_set(struct pt_regs *regs, /* Query offset/name of register from its name/offset */ extern int regs_query_register_offset(const char *name); -#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last)) +#define MAX_REG_OFFSET \ + (offsetof(struct pt_regs, __last) - sizeof(unsigned long)) /** * regs_get_register() - get register value from its offset From f5a7d616a5470318e875f4ddd3c4ec9083f4b2b4 Mon Sep 17 00:00:00 2001 From: Kevin Baker Date: Mon, 5 May 2025 12:02:56 -0500 Subject: [PATCH 085/114] drm/panel: simple: Update timings for AUO G101EVN010 [ Upstream commit 7c6fa1797a725732981f2d77711c867166737719 ] Switch to panel timings based on datasheet for the AUO G101EVN01.0 LVDS panel. Default timings were tested on the panel. Previous mode-based timings resulted in horizontal display shift. Signed-off-by: Kevin Baker Fixes: 4fb86404a977 ("drm/panel: simple: Add AUO G101EVN010 panel support") Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250505170256.1385113-1-kevinb@ventureresearch.com Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250505170256.1385113-1-kevinb@ventureresearch.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-simple.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 37fe54c34b14..0d69098eddd9 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -979,27 +979,28 @@ static const struct panel_desc auo_g070vvn01 = { }, }; -static const struct drm_display_mode auo_g101evn010_mode = { - .clock = 68930, - .hdisplay = 1280, - .hsync_start = 1280 + 82, - .hsync_end = 1280 + 82 + 2, - .htotal = 1280 + 82 + 2 + 84, - .vdisplay = 800, - .vsync_start = 800 + 8, - .vsync_end = 800 + 8 + 2, - .vtotal = 800 + 8 + 2 + 6, +static const struct display_timing auo_g101evn010_timing = { + .pixelclock = { 64000000, 68930000, 85000000 }, + .hactive = { 1280, 1280, 1280 }, + .hfront_porch = { 8, 64, 256 }, + .hback_porch = { 8, 64, 256 }, + .hsync_len = { 40, 168, 767 }, + .vactive = { 800, 800, 800 }, + .vfront_porch = { 4, 8, 100 }, + .vback_porch = { 4, 8, 100 }, + .vsync_len = { 8, 16, 223 }, }; static const struct panel_desc auo_g101evn010 = { - .modes = &auo_g101evn010_mode, - .num_modes = 1, + .timings = &auo_g101evn010_timing, + .num_timings = 1, .bpc = 6, .size = { .width = 216, .height = 135, }, .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, .connector_type = DRM_MODE_CONNECTOR_LVDS, }; From eba09f42393abe8c70e78dd643166999b3144b75 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Fri, 2 May 2025 10:58:00 +0200 Subject: [PATCH 086/114] nvme: unblock ctrl state transition for firmware update [ Upstream commit 650415fca0a97472fdd79725e35152614d1aad76 ] The original nvme subsystem design didn't have a CONNECTING state; the state machine allowed transitions from RESETTING to LIVE directly. With the introduction of nvme fabrics the CONNECTING state was introduce. Over time the nvme-pci started to use the CONNECTING state as well. Eventually, a bug fix for the nvme-fc started to depend that the only valid transition to LIVE was from CONNECTING. Though this change didn't update the firmware update handler which was still depending on RESETTING to LIVE transition. The simplest way to address it for the time being is to switch into CONNECTING state before going to LIVE state. Fixes: d2fe192348f9 ("nvme: only allow entering LIVE from CONNECTING state") Reported-by: Guenter Roeck Signed-off-by: Daniel Wagner Closes: https://lore.kernel.org/all/0134ea15-8d5f-41f7-9e9a-d7e6d82accaa@roeck-us.net Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c6b0637e61de..6e2d0fda3ba4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4156,7 +4156,8 @@ static void nvme_fw_act_work(struct work_struct *work) msleep(100); } - if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) + if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING) || + !nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) return; nvme_unquiesce_io_queues(ctrl); From 00f0dd1a0166e1ed8adeb50a6e345b4fd7878431 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 28 Apr 2025 23:56:14 -0400 Subject: [PATCH 087/114] do_umount(): add missing barrier before refcount checks in sync case [ Upstream commit 65781e19dcfcb4aed1167d87a3ffcc2a0c071d47 ] do_umount() analogue of the race fixed in 119e1ef80ecf "fix __legitimize_mnt()/mntput() race". Here we want to make sure that if __legitimize_mnt() doesn't notice our lock_mount_hash(), we will notice their refcount increment. Harder to hit than mntput_no_expire() one, fortunately, and consequences are milder (sync umount acting like umount -l on a rare race with RCU pathwalk hitting at just the wrong time instead of use-after-free galore mntput_no_expire() counterpart used to be hit). Still a bug... Fixes: 48a066e72d97 ("RCU'd vfsmounts") Reviewed-by: Christian Brauner Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/namespace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 5a885d35efe9..450f4198b8cd 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -633,7 +633,7 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) return 0; mnt = real_mount(bastard); mnt_add_count(mnt, 1); - smp_mb(); // see mntput_no_expire() + smp_mb(); // see mntput_no_expire() and do_umount() if (likely(!read_seqretry(&mount_lock, seq))) return 0; if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { @@ -1786,6 +1786,7 @@ static int do_umount(struct mount *mnt, int flags) umount_tree(mnt, UMOUNT_PROPAGATE); retval = 0; } else { + smp_mb(); // paired with __legitimize_mnt() shrink_submounts(mnt); retval = -EBUSY; if (!propagate_mount_busy(mnt, 2)) { From 51f1389b5fe1a1eb3c50d1806f8dcaaa34bdc466 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 5 May 2025 08:34:39 -0600 Subject: [PATCH 088/114] io_uring: always arm linked timeouts prior to issue Commit b53e523261bf058ea4a518b482222e7a277b186b upstream. There are a few spots where linked timeouts are armed, and not all of them adhere to the pre-arm, attempt issue, post-arm pattern. This can be problematic if the linked request returns that it will trigger a callback later, and does so before the linked timeout is fully armed. Consolidate all the linked timeout handling into __io_issue_sqe(), rather than have it spread throughout the various issue entry points. Cc: stable@vger.kernel.org Link: https://github.com/axboe/liburing/issues/1390 Reported-by: Chase Hiltz Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 53 ++++++++++++++------------------------------- 1 file changed, 16 insertions(+), 37 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 3ce93418e015..4f4ac40fc605 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -422,24 +422,6 @@ static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) return req->link; } -static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) -{ - if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT))) - return NULL; - return __io_prep_linked_timeout(req); -} - -static noinline void __io_arm_ltimeout(struct io_kiocb *req) -{ - io_queue_linked_timeout(__io_prep_linked_timeout(req)); -} - -static inline void io_arm_ltimeout(struct io_kiocb *req) -{ - if (unlikely(req->flags & REQ_F_ARM_LTIMEOUT)) - __io_arm_ltimeout(req); -} - static void io_prep_async_work(struct io_kiocb *req) { const struct io_issue_def *def = &io_issue_defs[req->opcode]; @@ -493,7 +475,6 @@ static void io_prep_async_link(struct io_kiocb *req) static void io_queue_iowq(struct io_kiocb *req) { - struct io_kiocb *link = io_prep_linked_timeout(req); struct io_uring_task *tctx = req->task->io_uring; BUG_ON(!tctx); @@ -518,8 +499,6 @@ static void io_queue_iowq(struct io_kiocb *req) trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work)); io_wq_enqueue(tctx->io_wq, &req->work); - if (link) - io_queue_linked_timeout(link); } static __cold void io_queue_deferred(struct io_ring_ctx *ctx) @@ -1863,17 +1842,24 @@ static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def, return !!req->file; } +#define REQ_ISSUE_SLOW_FLAGS (REQ_F_CREDS | REQ_F_ARM_LTIMEOUT) + static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) { const struct io_issue_def *def = &io_issue_defs[req->opcode]; const struct cred *creds = NULL; + struct io_kiocb *link = NULL; int ret; if (unlikely(!io_assign_file(req, def, issue_flags))) return -EBADF; - if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred())) - creds = override_creds(req->creds); + if (unlikely(req->flags & REQ_ISSUE_SLOW_FLAGS)) { + if ((req->flags & REQ_F_CREDS) && req->creds != current_cred()) + creds = override_creds(req->creds); + if (req->flags & REQ_F_ARM_LTIMEOUT) + link = __io_prep_linked_timeout(req); + } if (!def->audit_skip) audit_uring_entry(req->opcode); @@ -1883,8 +1869,12 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) if (!def->audit_skip) audit_uring_exit(!ret, ret); - if (creds) - revert_creds(creds); + if (unlikely(creds || link)) { + if (creds) + revert_creds(creds); + if (link) + io_queue_linked_timeout(link); + } if (ret == IOU_OK) { if (issue_flags & IO_URING_F_COMPLETE_DEFER) @@ -1939,8 +1929,6 @@ void io_wq_submit_work(struct io_wq_work *work) else req_ref_get(req); - io_arm_ltimeout(req); - /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ if (work->flags & IO_WQ_WORK_CANCEL) { fail: @@ -2036,15 +2024,11 @@ struct file *io_file_get_normal(struct io_kiocb *req, int fd) static void io_queue_async(struct io_kiocb *req, int ret) __must_hold(&req->ctx->uring_lock) { - struct io_kiocb *linked_timeout; - if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) { io_req_defer_failed(req, ret); return; } - linked_timeout = io_prep_linked_timeout(req); - switch (io_arm_poll_handler(req, 0)) { case IO_APOLL_READY: io_kbuf_recycle(req, 0); @@ -2057,9 +2041,6 @@ static void io_queue_async(struct io_kiocb *req, int ret) case IO_APOLL_OK: break; } - - if (linked_timeout) - io_queue_linked_timeout(linked_timeout); } static inline void io_queue_sqe(struct io_kiocb *req) @@ -2073,9 +2054,7 @@ static inline void io_queue_sqe(struct io_kiocb *req) * We async punt it if the file wasn't marked NOWAIT, or if the file * doesn't support non-blocking read/write attempts */ - if (likely(!ret)) - io_arm_ltimeout(req); - else + if (unlikely(ret)) io_queue_async(req, ret); } From 746e7d285dcb96caa1845fbbb62b14bf4010cdfb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 7 May 2025 08:07:09 -0600 Subject: [PATCH 089/114] io_uring: ensure deferred completions are posted for multishot Commit 687b2bae0efff9b25e071737d6af5004e6e35af5 upstream. Multishot normally uses io_req_post_cqe() to post completions, but when stopping it, it may finish up with a deferred completion. This is fine, except if another multishot event triggers before the deferred completions get flushed. If this occurs, then CQEs may get reordered in the CQ ring, and cause confusion on the application side. When multishot posting via io_req_post_cqe(), flush any pending deferred completions first, if any. Cc: stable@vger.kernel.org # 6.1+ Reported-by: Norman Maurer Reported-by: Christian Mazakas Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 4f4ac40fc605..db592fa549b7 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -919,6 +919,14 @@ static bool __io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u { bool filled; + /* + * If multishot has already posted deferred completions, ensure that + * those are flushed first before posting this one. If not, CQEs + * could get reordered. + */ + if (!wq_list_empty(&ctx->submit_state.compl_reqs)) + __io_submit_flush_completions(ctx); + io_cq_lock(ctx); filled = io_fill_cqe_aux(ctx, user_data, res, cflags); if (!filled && allow_overflow) From 054fc98d691a282661b8032cb3bdcba1027f2ac3 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 9 Dec 2021 15:12:19 +0000 Subject: [PATCH 090/114] arm64: insn: Add support for encoding DSB commit 63de8abd97ddb9b758bd8f915ecbd18e1f1a87a0 upstream. To generate code in the eBPF epilogue that uses the DSB instruction, insn.c needs a heler to encode the type and domain. Re-use the crm encoding logic from the DMB instruction. Signed-off-by: James Morse Reviewed-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/insn.h | 1 + arch/arm64/lib/insn.c | 76 +++++++++++++++++++++-------------- 2 files changed, 46 insertions(+), 31 deletions(-) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 0ccf51afde31..12c0278294e3 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -687,6 +687,7 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, } #endif u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type); +u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type); s32 aarch64_get_branch_offset(u32 insn); u32 aarch64_set_branch_offset(u32 insn, s32 offset); diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index a635ab83fee3..7232b1e70a12 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -5,6 +5,7 @@ * * Copyright (C) 2014-2016 Zi Shen Lim */ +#include #include #include #include @@ -1471,43 +1472,41 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm); } +static u32 __get_barrier_crm_val(enum aarch64_insn_mb_type type) +{ + switch (type) { + case AARCH64_INSN_MB_SY: + return 0xf; + case AARCH64_INSN_MB_ST: + return 0xe; + case AARCH64_INSN_MB_LD: + return 0xd; + case AARCH64_INSN_MB_ISH: + return 0xb; + case AARCH64_INSN_MB_ISHST: + return 0xa; + case AARCH64_INSN_MB_ISHLD: + return 0x9; + case AARCH64_INSN_MB_NSH: + return 0x7; + case AARCH64_INSN_MB_NSHST: + return 0x6; + case AARCH64_INSN_MB_NSHLD: + return 0x5; + default: + pr_err("%s: unknown barrier type %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } +} + u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) { u32 opt; u32 insn; - switch (type) { - case AARCH64_INSN_MB_SY: - opt = 0xf; - break; - case AARCH64_INSN_MB_ST: - opt = 0xe; - break; - case AARCH64_INSN_MB_LD: - opt = 0xd; - break; - case AARCH64_INSN_MB_ISH: - opt = 0xb; - break; - case AARCH64_INSN_MB_ISHST: - opt = 0xa; - break; - case AARCH64_INSN_MB_ISHLD: - opt = 0x9; - break; - case AARCH64_INSN_MB_NSH: - opt = 0x7; - break; - case AARCH64_INSN_MB_NSHST: - opt = 0x6; - break; - case AARCH64_INSN_MB_NSHLD: - opt = 0x5; - break; - default: - pr_err("%s: unknown dmb type %d\n", __func__, type); + opt = __get_barrier_crm_val(type); + if (opt == AARCH64_BREAK_FAULT) return AARCH64_BREAK_FAULT; - } insn = aarch64_insn_get_dmb_value(); insn &= ~GENMASK(11, 8); @@ -1515,3 +1514,18 @@ u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) return insn; } + +u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type) +{ + u32 opt, insn; + + opt = __get_barrier_crm_val(type); + if (opt == AARCH64_BREAK_FAULT) + return AARCH64_BREAK_FAULT; + + insn = aarch64_insn_get_dsb_base_value(); + insn &= ~GENMASK(11, 8); + insn |= (opt << 8); + + return insn; +} From 854da0ed067165be7d1b5bd843b58c4ec897e9bd Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 19 Aug 2024 14:15:53 +0100 Subject: [PATCH 091/114] arm64: proton-pack: Expose whether the platform is mitigated by firmware commit e7956c92f396a44eeeb6eaf7a5b5e1ad24db6748 upstream. is_spectre_bhb_fw_affected() allows the caller to determine if the CPU is known to need a firmware mitigation. CPUs are either on the list of CPUs we know about, or firmware has been queried and reported that the platform is affected - and mitigated by firmware. This helper is not useful to determine if the platform is mitigated by firmware. A CPU could be on the know list, but the firmware may not be implemented. Its affected but not mitigated. spectre_bhb_enable_mitigation() handles this distinction by checking the firmware state before enabling the mitigation. Add a helper to expose this state. This will be used by the BPF JIT to determine if calling firmware for a mitigation is necessary and supported. Signed-off-by: James Morse Reviewed-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/spectre.h | 1 + arch/arm64/kernel/proton-pack.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 0c2b47673922..f3e0f98d3bd2 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -97,6 +97,7 @@ enum mitigation_state arm64_get_meltdown_state(void); enum mitigation_state arm64_get_spectre_bhb_state(void); bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); +bool is_spectre_bhb_fw_mitigated(void); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index edc4c727783d..7befcb545ac9 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -1093,6 +1093,11 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) update_mitigation_state(&spectre_bhb_state, state); } +bool is_spectre_bhb_fw_mitigated(void) +{ + return test_bit(BHB_FW, &system_bhb_mitigations); +} + /* Patched to NOP when enabled */ void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, __le32 *origptr, From 73591041a551cfe390861bf30067d3d2810b82ad Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Apr 2025 13:55:17 +0100 Subject: [PATCH 092/114] arm64: proton-pack: Expose whether the branchy loop k value commit a1152be30a043d2d4dcb1683415f328bf3c51978 upstream. Add a helper to expose the k value of the branchy loop. This is needed by the BPF JIT to generate the mitigation sequence in BPF programs. Signed-off-by: James Morse Reviewed-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/spectre.h | 1 + arch/arm64/kernel/proton-pack.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index f3e0f98d3bd2..da1998bc112a 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -97,6 +97,7 @@ enum mitigation_state arm64_get_meltdown_state(void); enum mitigation_state arm64_get_spectre_bhb_state(void); bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); +u8 get_spectre_bhb_loop_value(void); bool is_spectre_bhb_fw_mitigated(void); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 7befcb545ac9..81c473540d0c 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -998,6 +998,11 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, return true; } +u8 get_spectre_bhb_loop_value(void) +{ + return max_bhb_k; +} + static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) { const char *v = arm64_get_bp_hardening_vector(slot); From 42a20cf51011788f04cf2adbcd7681f02bdb6c27 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 9 Dec 2021 15:13:24 +0000 Subject: [PATCH 093/114] arm64: bpf: Add BHB mitigation to the epilogue for cBPF programs commit 0dfefc2ea2f29ced2416017d7e5b1253a54c2735 upstream. A malicious BPF program may manipulate the branch history to influence what the hardware speculates will happen next. On exit from a BPF program, emit the BHB mititgation sequence. This is only applied for 'classic' cBPF programs that are loaded by seccomp. Signed-off-by: James Morse Reviewed-by: Catalin Marinas Acked-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/spectre.h | 1 + arch/arm64/kernel/proton-pack.c | 2 +- arch/arm64/net/bpf_jit_comp.c | 54 +++++++++++++++++++++++++++++--- 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index da1998bc112a..32475d19c15f 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -97,6 +97,7 @@ enum mitigation_state arm64_get_meltdown_state(void); enum mitigation_state arm64_get_spectre_bhb_state(void); bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); +extern bool __nospectre_bhb; u8 get_spectre_bhb_loop_value(void); bool is_spectre_bhb_fw_mitigated(void); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 81c473540d0c..5fc2e9bdb386 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -1020,7 +1020,7 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) isb(); } -static bool __read_mostly __nospectre_bhb; +bool __read_mostly __nospectre_bhb; static int __init parse_spectre_bhb_param(char *str) { __nospectre_bhb = true; diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 5074bd1d37b5..027775e6c9b1 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "bpf_jit: " fmt +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -653,7 +655,48 @@ static void build_plt(struct jit_ctx *ctx) plt->target = (u64)&dummy_tramp; } -static void build_epilogue(struct jit_ctx *ctx) +/* Clobbers BPF registers 1-4, aka x0-x3 */ +static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx) +{ + const u8 r1 = bpf2a64[BPF_REG_1]; /* aka x0 */ + u8 k = get_spectre_bhb_loop_value(); + + if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) || + cpu_mitigations_off() || __nospectre_bhb || + arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) + return; + + if (supports_clearbhb(SCOPE_SYSTEM)) { + emit(aarch64_insn_gen_hint(AARCH64_INSN_HINT_CLEARBHB), ctx); + return; + } + + if (k) { + emit_a64_mov_i64(r1, k, ctx); + emit(A64_B(1), ctx); + emit(A64_SUBS_I(true, r1, r1, 1), ctx); + emit(A64_B_(A64_COND_NE, -2), ctx); + emit(aarch64_insn_gen_dsb(AARCH64_INSN_MB_ISH), ctx); + emit(aarch64_insn_get_isb_value(), ctx); + } + + if (is_spectre_bhb_fw_mitigated()) { + emit(A64_ORR_I(false, r1, AARCH64_INSN_REG_ZR, + ARM_SMCCC_ARCH_WORKAROUND_3), ctx); + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: + emit(aarch64_insn_get_hvc_value(), ctx); + break; + case SMCCC_CONDUIT_SMC: + emit(aarch64_insn_get_smc_value(), ctx); + break; + default: + pr_err_once("Firmware mitigation enabled with unknown conduit\n"); + } + } +} + +static void build_epilogue(struct jit_ctx *ctx, bool was_classic) { const u8 r0 = bpf2a64[BPF_REG_0]; const u8 r6 = bpf2a64[BPF_REG_6]; @@ -675,10 +718,13 @@ static void build_epilogue(struct jit_ctx *ctx) emit(A64_POP(r8, r9, A64_SP), ctx); emit(A64_POP(r6, r7, A64_SP), ctx); + if (was_classic) + build_bhb_mitigation(ctx); + /* Restore FP/LR registers */ emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); - /* Set return value */ + /* Move the return value from bpf:r0 (aka x7) to x0 */ emit(A64_MOV(1, A64_R(0), r0), ctx); /* Authenticate lr */ @@ -1586,7 +1632,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } ctx.epilogue_offset = ctx.idx; - build_epilogue(&ctx); + build_epilogue(&ctx, was_classic); build_plt(&ctx); extable_align = __alignof__(struct exception_table_entry); @@ -1622,7 +1668,7 @@ skip_init_ctx: goto out_off; } - build_epilogue(&ctx); + build_epilogue(&ctx, was_classic); build_plt(&ctx); /* 3. Extra pass to validate JITed code. */ From 80251f62028f1ab2e09be5ca3123f84e8b00389a Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Apr 2025 16:03:38 +0100 Subject: [PATCH 094/114] arm64: bpf: Only mitigate cBPF programs loaded by unprivileged users commit f300769ead032513a68e4a02e806393402e626f8 upstream. Support for eBPF programs loaded by unprivileged users is typically disabled. This means only cBPF programs need to be mitigated for BHB. In addition, only mitigate cBPF programs that were loaded by an unprivileged user. Privileged users can also load the same program via eBPF, making the mitigation pointless. Signed-off-by: James Morse Reviewed-by: Catalin Marinas Acked-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- arch/arm64/net/bpf_jit_comp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 027775e6c9b1..75523c1be073 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -666,6 +666,9 @@ static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx) arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) return; + if (capable(CAP_SYS_ADMIN)) + return; + if (supports_clearbhb(SCOPE_SYSTEM)) { emit(aarch64_insn_gen_hint(AARCH64_INSN_HINT_CLEARBHB), ctx); return; From ca8a5626ca0c2ce98414495e8f722faf18b730fb Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 12 Aug 2024 17:50:22 +0100 Subject: [PATCH 095/114] arm64: proton-pack: Add new CPUs 'k' values for branch mitigation commit efe676a1a7554219eae0b0dcfe1e0cdcc9ef9aef upstream. Update the list of 'k' values for the branch mitigation from arm's website. Add the values for Cortex-X1C. The MIDR_EL1 value can be found here: https://developer.arm.com/documentation/101968/0002/Register-descriptions/AArch> Link: https://developer.arm.com/documentation/110280/2-0/?lang=en Signed-off-by: James Morse Reviewed-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cputype.h | 2 ++ arch/arm64/kernel/proton-pack.c | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 2a4e686e633c..8a6b7feca3e4 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -81,6 +81,7 @@ #define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_X1C 0xD4C #define ARM_CPU_PART_CORTEX_A520 0xD80 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_CORTEX_A715 0xD4D @@ -166,6 +167,7 @@ #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) #define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 5fc2e9bdb386..28c48bc9c095 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -891,6 +891,7 @@ static u8 spectre_bhb_loop_affected(void) MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), From 1afebfeaf1eb31eadf28ec6902ef6eda548c2cc7 Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Mon, 5 May 2025 14:35:12 -0700 Subject: [PATCH 096/114] x86/bpf: Call branch history clearing sequence on exit commit d4e89d212d401672e9cdfe825d947ee3a9fbe3f5 upstream. Classic BPF programs have been identified as potential vectors for intra-mode Branch Target Injection (BTI) attacks. Classic BPF programs can be run by unprivileged users. They allow unprivileged code to execute inside the kernel. Attackers can use unprivileged cBPF to craft branch history in kernel mode that can influence the target of indirect branches. Introduce a branch history buffer (BHB) clearing sequence during the JIT compilation of classic BPF programs. The clearing sequence is the same as is used in previous mitigations to protect syscalls. Since eBPF programs already have their own mitigations in place, only insert the call on classic programs that aren't run by privileged users. Signed-off-by: Daniel Sneddon Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Acked-by: Daniel Borkmann Reviewed-by: Alexandre Chartre Signed-off-by: Greg Kroah-Hartman --- arch/x86/net/bpf_jit_comp.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index a50c99e9b5c0..4cdc301952b9 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1072,6 +1072,30 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op) #define RESTORE_TAIL_CALL_CNT(stack) \ EMIT3_off32(0x48, 0x8B, 0x85, -round_up(stack, 8) - 8) +static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip, + struct bpf_prog *bpf_prog) +{ + u8 *prog = *pprog; + u8 *func; + + if (cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP)) { + /* The clearing sequence clobbers eax and ecx. */ + EMIT1(0x50); /* push rax */ + EMIT1(0x51); /* push rcx */ + ip += 2; + + func = (u8 *)clear_bhb_loop; + ip += x86_call_depth_emit_accounting(&prog, func); + + if (emit_call(&prog, func, ip)) + return -EINVAL; + EMIT1(0x59); /* pop rcx */ + EMIT1(0x58); /* pop rax */ + } + *pprog = prog; + return 0; +} + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image, int oldproglen, struct jit_context *ctx, bool jmp_padding) { @@ -1945,6 +1969,15 @@ emit_jmp: seen_exit = true; /* Update cleanup_addr */ ctx->cleanup_addr = proglen; + + if (bpf_prog_was_classic(bpf_prog) && + !capable(CAP_SYS_ADMIN)) { + u8 *ip = image + addrs[i - 1]; + + if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog)) + return -EINVAL; + } + pop_callee_regs(&prog, callee_regs_used); EMIT1(0xC9); /* leave */ emit_return(&prog, image + addrs[i - 1] + (prog - temp)); From a0ff7f679b5d212454d31e6d2ab0b61b5171567a Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Mon, 5 May 2025 14:35:12 -0700 Subject: [PATCH 097/114] x86/bpf: Add IBHF call at end of classic BPF commit 9f725eec8fc0b39bdc07dcc8897283c367c1a163 upstream. Classic BPF programs can be run by unprivileged users, allowing unprivileged code to execute inside the kernel. Attackers can use this to craft branch history in kernel mode that can influence the target of indirect branches. BHI_DIS_S provides user-kernel isolation of branch history, but cBPF can be used to bypass this protection by crafting branch history in kernel mode. To stop intra-mode attacks via cBPF programs, Intel created a new instruction Indirect Branch History Fence (IBHF). IBHF prevents the predicted targets of subsequent indirect branches from being influenced by branch history prior to the IBHF. IBHF is only effective while BHI_DIS_S is enabled. Add the IBHF instruction to cBPF jitted code's exit path. Add the new fence when the hardware mitigation is enabled (i.e., X86_FEATURE_CLEAR_BHB_HW is set) or after the software sequence (X86_FEATURE_CLEAR_BHB_LOOP) is being used in a virtual machine. Note that X86_FEATURE_CLEAR_BHB_HW and X86_FEATURE_CLEAR_BHB_LOOP are mutually exclusive, so the JIT compiler will only emit the new fence, not the SW sequence, when X86_FEATURE_CLEAR_BHB_HW is set. Hardware that enumerates BHI_NO basically has BHI_DIS_S protections always enabled, regardless of the value of BHI_DIS_S. Since BHI_DIS_S doesn't protect against intra-mode attacks, enumerate BHI bug on BHI_NO hardware as well. Signed-off-by: Daniel Sneddon Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Acked-by: Daniel Borkmann Reviewed-by: Alexandre Chartre Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 9 ++++++--- arch/x86/net/bpf_jit_comp.c | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a844110691f9..5bc7943aa515 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1476,9 +1476,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (vulnerable_to_rfds(x86_arch_cap_msr)) setup_force_cpu_bug(X86_BUG_RFDS); - /* When virtualized, eIBRS could be hidden, assume vulnerable */ - if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) && - !cpu_matches(cpu_vuln_whitelist, NO_BHI) && + /* + * Intel parts with eIBRS are vulnerable to BHI attacks. Parts with + * BHI_NO still need to use the BHI mitigation to prevent Intra-mode + * attacks. When virtualized, eIBRS could be hidden, assume vulnerable. + */ + if (!cpu_matches(cpu_vuln_whitelist, NO_BHI) && (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || boot_cpu_has(X86_FEATURE_HYPERVISOR))) setup_force_cpu_bug(X86_BUG_BHI); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4cdc301952b9..2f93555a1745 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -37,6 +37,8 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) +#define EMIT5(b1, b2, b3, b4, b5) \ + do { EMIT1(b1); EMIT4(b2, b3, b4, b5); } while (0) #define EMIT1_off32(b1, off) \ do { EMIT1(b1); EMIT(off, 4); } while (0) @@ -1092,6 +1094,23 @@ static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip, EMIT1(0x59); /* pop rcx */ EMIT1(0x58); /* pop rax */ } + /* Insert IBHF instruction */ + if ((cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP) && + cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) || + (cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_HW) && + IS_ENABLED(CONFIG_X86_64))) { + /* + * Add an Indirect Branch History Fence (IBHF). IBHF acts as a + * fence preventing branch history from before the fence from + * affecting indirect branches after the fence. This is + * specifically used in cBPF jitted code to prevent Intra-mode + * BHI attacks. The IBHF instruction is designed to be a NOP on + * hardware that doesn't need or support it. The REP and REX.W + * prefixes are required by the microcode, and they also ensure + * that the NOP is unlikely to be used in existing code. + */ + EMIT5(0xF3, 0x48, 0x0F, 0x1E, 0xF8); /* ibhf */ + } *pprog = prog; return 0; } From a8a8826bf6559b1106699bc4e92ac4e15770301b Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 5 May 2025 14:35:12 -0700 Subject: [PATCH 098/114] x86/bhi: Do not set BHI_DIS_S in 32-bit mode commit 073fdbe02c69c43fb7c0d547ec265c7747d4a646 upstream. With the possibility of intra-mode BHI via cBPF, complete mitigation for BHI is to use IBHF (history fence) instruction with BHI_DIS_S set. Since this new instruction is only available in 64-bit mode, setting BHI_DIS_S in 32-bit mode is only a partial mitigation. Do not set BHI_DIS_S in 32-bit mode so as to avoid reporting misleading mitigated status. With this change IBHF won't be used in 32-bit mode, also remove the CONFIG_X86_64 check from emit_spectre_bhb_barrier(). Suggested-by: Josh Poimboeuf Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 5 +++-- arch/x86/net/bpf_jit_comp.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 78545f7e9cc6..51f58776765b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1677,10 +1677,11 @@ static void __init bhi_select_mitigation(void) return; } - if (spec_ctrl_bhi_dis()) + if (!IS_ENABLED(CONFIG_X86_64)) return; - if (!IS_ENABLED(CONFIG_X86_64)) + /* Mitigate in hardware if supported */ + if (spec_ctrl_bhi_dis()) return; /* Mitigate KVM by default */ diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 2f93555a1745..5a3ce74d132b 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1097,8 +1097,7 @@ static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip, /* Insert IBHF instruction */ if ((cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP) && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) || - (cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_HW) && - IS_ENABLED(CONFIG_X86_64))) { + cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_HW)) { /* * Add an Indirect Branch History Fence (IBHF). IBHF acts as a * fence preventing branch history from before the fence from @@ -1108,6 +1107,8 @@ static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip, * hardware that doesn't need or support it. The REP and REX.W * prefixes are required by the microcode, and they also ensure * that the NOP is unlikely to be used in existing code. + * + * IBHF is not a valid instruction in 32-bit mode. */ EMIT5(0xF3, 0x48, 0x0F, 0x1E, 0xF8); /* ibhf */ } From 2eecf5cf21cbb0c063671829d04dfb52a238ddfd Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 28 Feb 2025 18:35:43 -0800 Subject: [PATCH 099/114] x86/speculation: Simplify and make CALL_NOSPEC consistent commit cfceff8526a426948b53445c02bcb98453c7330d upstream. CALL_NOSPEC macro is used to generate Spectre-v2 mitigation friendly indirect branches. At compile time the macro defaults to indirect branch, and at runtime those can be patched to thunk based mitigations. This approach is opposite of what is done for the rest of the kernel, where the compile time default is to replace indirect calls with retpoline thunk calls. Make CALL_NOSPEC consistent with the rest of the kernel, default to retpoline thunk at compile time when CONFIG_RETPOLINE is enabled. [ pawan: s/CONFIG_MITIGATION_RETPOLINE/CONFIG_RETPOLINE/ ] Signed-off-by: Pawan Gupta Signed-off-by: Ingo Molnar Cc: Andrew Cooper Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250228-call-nospec-v3-1-96599fed0f33@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index ee642d26e304..c57b9a106680 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -458,16 +458,11 @@ static inline void x86_set_skl_return_thunk(void) {} * Inline asm uses the %V modifier which is only in newer GCC * which is ensured when CONFIG_RETPOLINE is defined. */ -# define CALL_NOSPEC \ - ALTERNATIVE_2( \ - ANNOTATE_RETPOLINE_SAFE \ - "call *%[thunk_target]\n", \ - "call __x86_indirect_thunk_%V[thunk_target]\n", \ - X86_FEATURE_RETPOLINE, \ - "lfence;\n" \ - ANNOTATE_RETPOLINE_SAFE \ - "call *%[thunk_target]\n", \ - X86_FEATURE_RETPOLINE_LFENCE) +#ifdef CONFIG_RETPOLINE +#define CALL_NOSPEC "call __x86_indirect_thunk_%V[thunk_target]\n" +#else +#define CALL_NOSPEC "call *%[thunk_target]\n" +#endif # define THUNK_TARGET(addr) [thunk_target] "r" (addr) From 0a90b50ebf24ccd94f61106b621e32a671f9b333 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 28 Feb 2025 18:35:58 -0800 Subject: [PATCH 100/114] x86/speculation: Add a conditional CS prefix to CALL_NOSPEC commit 052040e34c08428a5a388b85787e8531970c0c67 upstream. Retpoline mitigation for spectre-v2 uses thunks for indirect branches. To support this mitigation compilers add a CS prefix with -mindirect-branch-cs-prefix. For an indirect branch in asm, this needs to be added manually. CS prefix is already being added to indirect branches in asm files, but not in inline asm. Add CS prefix to CALL_NOSPEC for inline asm as well. There is no JMP_NOSPEC for inline asm. Reported-by: Josh Poimboeuf Signed-off-by: Pawan Gupta Signed-off-by: Ingo Molnar Cc: Andrew Cooper Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250228-call-nospec-v3-2-96599fed0f33@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index c57b9a106680..53edb7cd8f22 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -219,9 +219,8 @@ .endm /* - * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call - * to the retpoline thunk with a CS prefix when the register requires - * a RAX prefix byte to encode. Also see apply_retpolines(). + * Emits a conditional CS prefix that is compatible with + * -mindirect-branch-cs-prefix. */ .macro __CS_PREFIX reg:req .irp rs,r8,r9,r10,r11,r12,r13,r14,r15 @@ -454,12 +453,24 @@ static inline void x86_set_skl_return_thunk(void) {} #ifdef CONFIG_X86_64 +/* + * Emits a conditional CS prefix that is compatible with + * -mindirect-branch-cs-prefix. + */ +#define __CS_PREFIX(reg) \ + ".irp rs,r8,r9,r10,r11,r12,r13,r14,r15\n" \ + ".ifc \\rs," reg "\n" \ + ".byte 0x2e\n" \ + ".endif\n" \ + ".endr\n" + /* * Inline asm uses the %V modifier which is only in newer GCC * which is ensured when CONFIG_RETPOLINE is defined. */ #ifdef CONFIG_RETPOLINE -#define CALL_NOSPEC "call __x86_indirect_thunk_%V[thunk_target]\n" +#define CALL_NOSPEC __CS_PREFIX("%V[thunk_target]") \ + "call __x86_indirect_thunk_%V[thunk_target]\n" #else #define CALL_NOSPEC "call *%[thunk_target]\n" #endif From a42e9162314fbaad62eebe3001cd0520afdd11d0 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 20 Mar 2025 11:13:15 -0700 Subject: [PATCH 101/114] x86/speculation: Remove the extra #ifdef around CALL_NOSPEC commit c8c81458863ab686cda4fe1e603fccaae0f12460 upstream. Commit: 010c4a461c1d ("x86/speculation: Simplify and make CALL_NOSPEC consistent") added an #ifdef CONFIG_RETPOLINE around the CALL_NOSPEC definition. This is not required as this code is already under a larger #ifdef. Remove the extra #ifdef, no functional change. vmlinux size remains same before and after this change: CONFIG_RETPOLINE=y: text data bss dec hex filename 25434752 7342290 2301212 35078254 217406e vmlinux.before 25434752 7342290 2301212 35078254 217406e vmlinux.after # CONFIG_RETPOLINE is not set: text data bss dec hex filename 22943094 6214994 1550152 30708240 1d49210 vmlinux.before 22943094 6214994 1550152 30708240 1d49210 vmlinux.after [ pawan: s/CONFIG_MITIGATION_RETPOLINE/CONFIG_RETPOLINE/ ] Signed-off-by: Pawan Gupta Signed-off-by: Ingo Molnar Reviewed-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20250320-call-nospec-extra-ifdef-v1-1-d9b084d24820@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 53edb7cd8f22..7e4c91eb8c10 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -468,12 +468,8 @@ static inline void x86_set_skl_return_thunk(void) {} * Inline asm uses the %V modifier which is only in newer GCC * which is ensured when CONFIG_RETPOLINE is defined. */ -#ifdef CONFIG_RETPOLINE #define CALL_NOSPEC __CS_PREFIX("%V[thunk_target]") \ "call __x86_indirect_thunk_%V[thunk_target]\n" -#else -#define CALL_NOSPEC "call *%[thunk_target]\n" -#endif # define THUNK_TARGET(addr) [thunk_target] "r" (addr) From c6c1319d19fc0bb4fe0e911249340176c2aa1c62 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 11 Apr 2025 15:36:38 -0700 Subject: [PATCH 102/114] Documentation: x86/bugs/its: Add ITS documentation commit 1ac116ce6468670eeda39345a5585df308243dca upstream. Add the admin-guide for Indirect Target Selection (ITS). Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/index.rst | 1 + .../hw-vuln/indirect-target-selection.rst | 168 ++++++++++++++++++ 2 files changed, 169 insertions(+) create mode 100644 Documentation/admin-guide/hw-vuln/indirect-target-selection.rst diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index ff0b440ef2dc..d2caa390395e 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -22,3 +22,4 @@ are configurable at compile, boot or run time. srso gather_data_sampling reg-file-data-sampling + indirect-target-selection diff --git a/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst b/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst new file mode 100644 index 000000000000..d9ca64108d23 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst @@ -0,0 +1,168 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Indirect Target Selection (ITS) +=============================== + +ITS is a vulnerability in some Intel CPUs that support Enhanced IBRS and were +released before Alder Lake. ITS may allow an attacker to control the prediction +of indirect branches and RETs located in the lower half of a cacheline. + +ITS is assigned CVE-2024-28956 with a CVSS score of 4.7 (Medium). + +Scope of Impact +--------------- +- **eIBRS Guest/Host Isolation**: Indirect branches in KVM/kernel may still be + predicted with unintended target corresponding to a branch in the guest. + +- **Intra-Mode BTI**: In-kernel training such as through cBPF or other native + gadgets. + +- **Indirect Branch Prediction Barrier (IBPB)**: After an IBPB, indirect + branches may still be predicted with targets corresponding to direct branches + executed prior to the IBPB. This is fixed by the IPU 2025.1 microcode, which + should be available via distro updates. Alternatively microcode can be + obtained from Intel's github repository [#f1]_. + +Affected CPUs +------------- +Below is the list of ITS affected CPUs [#f2]_ [#f3]_: + + ======================== ============ ==================== =============== + Common name Family_Model eIBRS Intra-mode BTI + Guest/Host Isolation + ======================== ============ ==================== =============== + SKYLAKE_X (step >= 6) 06_55H Affected Affected + ICELAKE_X 06_6AH Not affected Affected + ICELAKE_D 06_6CH Not affected Affected + ICELAKE_L 06_7EH Not affected Affected + TIGERLAKE_L 06_8CH Not affected Affected + TIGERLAKE 06_8DH Not affected Affected + KABYLAKE_L (step >= 12) 06_8EH Affected Affected + KABYLAKE (step >= 13) 06_9EH Affected Affected + COMETLAKE 06_A5H Affected Affected + COMETLAKE_L 06_A6H Affected Affected + ROCKETLAKE 06_A7H Not affected Affected + ======================== ============ ==================== =============== + +- All affected CPUs enumerate Enhanced IBRS feature. +- IBPB isolation is affected on all ITS affected CPUs, and need a microcode + update for mitigation. +- None of the affected CPUs enumerate BHI_CTRL which was introduced in Golden + Cove (Alder Lake and Sapphire Rapids). This can help guests to determine the + host's affected status. +- Intel Atom CPUs are not affected by ITS. + +Mitigation +---------- +As only the indirect branches and RETs that have their last byte of instruction +in the lower half of the cacheline are vulnerable to ITS, the basic idea behind +the mitigation is to not allow indirect branches in the lower half. + +This is achieved by relying on existing retpoline support in the kernel, and in +compilers. ITS-vulnerable retpoline sites are runtime patched to point to newly +added ITS-safe thunks. These safe thunks consists of indirect branch in the +second half of the cacheline. Not all retpoline sites are patched to thunks, if +a retpoline site is evaluated to be ITS-safe, it is replaced with an inline +indirect branch. + +Dynamic thunks +~~~~~~~~~~~~~~ +From a dynamically allocated pool of safe-thunks, each vulnerable site is +replaced with a new thunk, such that they get a unique address. This could +improve the branch prediction accuracy. Also, it is a defense-in-depth measure +against aliasing. + +Note, for simplicity, indirect branches in eBPF programs are always replaced +with a jump to a static thunk in __x86_indirect_its_thunk_array. If required, +in future this can be changed to use dynamic thunks. + +All vulnerable RETs are replaced with a static thunk, they do not use dynamic +thunks. This is because RETs get their prediction from RSB mostly that does not +depend on source address. RETs that underflow RSB may benefit from dynamic +thunks. But, RETs significantly outnumber indirect branches, and any benefit +from a unique source address could be outweighed by the increased icache +footprint and iTLB pressure. + +Retpoline +~~~~~~~~~ +Retpoline sequence also mitigates ITS-unsafe indirect branches. For this +reason, when retpoline is enabled, ITS mitigation only relocates the RETs to +safe thunks. Unless user requested the RSB-stuffing mitigation. + +RSB Stuffing +~~~~~~~~~~~~ +RSB-stuffing via Call Depth Tracking is a mitigation for Retbleed RSB-underflow +attacks. And it also mitigates RETs that are vulnerable to ITS. + +Mitigation in guests +^^^^^^^^^^^^^^^^^^^^ +All guests deploy ITS mitigation by default, irrespective of eIBRS enumeration +and Family/Model of the guest. This is because eIBRS feature could be hidden +from a guest. One exception to this is when a guest enumerates BHI_DIS_S, which +indicates that the guest is running on an unaffected host. + +To prevent guests from unnecessarily deploying the mitigation on unaffected +platforms, Intel has defined ITS_NO bit(62) in MSR IA32_ARCH_CAPABILITIES. When +a guest sees this bit set, it should not enumerate the ITS bug. Note, this bit +is not set by any hardware, but is **intended for VMMs to synthesize** it for +guests as per the host's affected status. + +Mitigation options +^^^^^^^^^^^^^^^^^^ +The ITS mitigation can be controlled using the "indirect_target_selection" +kernel parameter. The available options are: + + ======== =================================================================== + on (default) Deploy the "Aligned branch/return thunks" mitigation. + If spectre_v2 mitigation enables retpoline, aligned-thunks are only + deployed for the affected RET instructions. Retpoline mitigates + indirect branches. + + off Disable ITS mitigation. + + vmexit Equivalent to "=on" if the CPU is affected by guest/host isolation + part of ITS. Otherwise, mitigation is not deployed. This option is + useful when host userspace is not in the threat model, and only + attacks from guest to host are considered. + + stuff Deploy RSB-fill mitigation when retpoline is also deployed. + Otherwise, deploy the default mitigation. When retpoline mitigation + is enabled, RSB-stuffing via Call-Depth-Tracking also mitigates + ITS. + + force Force the ITS bug and deploy the default mitigation. + ======== =================================================================== + +Sysfs reporting +--------------- + +The sysfs file showing ITS mitigation status is: + + /sys/devices/system/cpu/vulnerabilities/indirect_target_selection + +Note, microcode mitigation status is not reported in this file. + +The possible values in this file are: + +.. list-table:: + + * - Not affected + - The processor is not vulnerable. + * - Vulnerable + - System is vulnerable and no mitigation has been applied. + * - Vulnerable, KVM: Not affected + - System is vulnerable to intra-mode BTI, but not affected by eIBRS + guest/host isolation. + * - Mitigation: Aligned branch/return thunks + - The mitigation is enabled, affected indirect branches and RETs are + relocated to safe thunks. + * - Mitigation: Retpolines, Stuffing RSB + - The mitigation is enabled using retpoline and RSB stuffing. + +References +---------- +.. [#f1] Microcode repository - https://github.com/intel/Intel-Linux-Processor-Microcode-Data-Files + +.. [#f2] Affected Processors list - https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html + +.. [#f3] Affected Processors list (machine readable) - https://github.com/intel/Intel-affected-processor-list From 195579752c2323215f3a00ce831aa18e1dd2c692 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 21 Jun 2024 17:40:41 -0700 Subject: [PATCH 103/114] x86/its: Enumerate Indirect Target Selection (ITS) bug commit 159013a7ca18c271ff64192deb62a689b622d860 upstream. ITS bug in some pre-Alderlake Intel CPUs may allow indirect branches in the first half of a cache line get predicted to a target of a branch located in the second half of the cache line. Set X86_BUG_ITS on affected CPUs. Mitigation to follow in later commits. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 8 +++++ arch/x86/kernel/cpu/common.c | 58 +++++++++++++++++++++++------- arch/x86/kvm/x86.c | 4 ++- 4 files changed, 58 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 55d18eef6775..0fde2feb16c6 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -518,4 +518,5 @@ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ #define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ #define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ +#define X86_BUG_ITS X86_BUG(1*32 + 5) /* CPU is affected by Indirect Target Selection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 623bb48774d4..9fbad4cb971b 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -180,6 +180,14 @@ * VERW clears CPU Register * File. */ +#define ARCH_CAP_ITS_NO BIT_ULL(62) /* + * Not susceptible to + * Indirect Target Selection. + * This bit is not set by + * HW, but is synthesized by + * VMMs for guests to know + * their affected status. + */ #define ARCH_CAP_XAPIC_DISABLE BIT(21) /* * IA32_XAPIC_DISABLE_STATUS MSR diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5bc7943aa515..1b1df06f6c23 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1272,6 +1272,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define GDS BIT(6) /* CPU is affected by Register File Data Sampling */ #define RFDS BIT(7) +/* CPU is affected by Indirect Target Selection */ +#define ITS BIT(8) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1283,22 +1285,25 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x0, 0x5), MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | ITS), VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xb), MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS | ITS), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xc), MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS | ITS), VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), - VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS), - VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS), - VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS), - VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS | ITS), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS | ITS), + VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED | ITS), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS | ITS), + VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS | ITS), VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | ITS), VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS), VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS), VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS), @@ -1362,6 +1367,32 @@ static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr) return cpu_matches(cpu_vuln_blacklist, RFDS); } +static bool __init vulnerable_to_its(u64 x86_arch_cap_msr) +{ + /* The "immunity" bit trumps everything else: */ + if (x86_arch_cap_msr & ARCH_CAP_ITS_NO) + return false; + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + /* None of the affected CPUs have BHI_CTRL */ + if (boot_cpu_has(X86_FEATURE_BHI_CTRL)) + return false; + + /* + * If a VMM did not expose ITS_NO, assume that a guest could + * be running on a vulnerable hardware or may migrate to such + * hardware. + */ + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return true; + + if (cpu_matches(cpu_vuln_blacklist, ITS)) + return true; + + return false; +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 x86_arch_cap_msr = x86_read_arch_cap_msr(); @@ -1489,6 +1520,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET)) setup_force_cpu_bug(X86_BUG_IBPB_NO_RET); + if (vulnerable_to_its(x86_arch_cap_msr)) + setup_force_cpu_bug(X86_BUG_ITS); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1eeb01afa40b..55185670e0e5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr) ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ - ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO) + ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO | ARCH_CAP_ITS_NO) static u64 kvm_get_arch_capabilities(void) { @@ -1655,6 +1655,8 @@ static u64 kvm_get_arch_capabilities(void) data |= ARCH_CAP_MDS_NO; if (!boot_cpu_has_bug(X86_BUG_RFDS)) data |= ARCH_CAP_RFDS_NO; + if (!boot_cpu_has_bug(X86_BUG_ITS)) + data |= ARCH_CAP_ITS_NO; if (!boot_cpu_has(X86_FEATURE_RTM)) { /* From c5a5d8075231e59aa2ed8b3a2e80698f068a39d8 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 21 Jun 2024 21:17:21 -0700 Subject: [PATCH 104/114] x86/its: Add support for ITS-safe indirect thunk commit 8754e67ad4ac692c67ff1f99c0d07156f04ae40c upstream. Due to ITS, indirect branches in the lower half of a cacheline may be vulnerable to branch target injection attack. Introduce ITS-safe thunks to patch indirect branches in the lower half of cacheline with the thunk. Also thunk any eBPF generated indirect branches in emit_indirect_jump(). Below category of indirect branches are not mitigated: - Indirect branches in the .init section are not mitigated because they are discarded after boot. - Indirect branches that are explicitly marked retpoline-safe. Note that retpoline also mitigates the indirect branches against ITS. This is because the retpoline sequence fills an RSB entry before RET, and it does not suffer from RSB-underflow part of the ITS. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 11 +++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/nospec-branch.h | 4 +++ arch/x86/kernel/alternative.c | 45 ++++++++++++++++++++++++++-- arch/x86/kernel/vmlinux.lds.S | 6 ++++ arch/x86/lib/retpoline.S | 28 +++++++++++++++++ arch/x86/net/bpf_jit_comp.c | 6 +++- 7 files changed, 97 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d874ea22512b..4372657ab0d6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2610,6 +2610,17 @@ config MITIGATION_SPECTRE_BHI indirect branches. See +config MITIGATION_ITS + bool "Enable Indirect Target Selection mitigation" + depends on CPU_SUP_INTEL && X86_64 + depends on RETPOLINE && RETHUNK + default y + help + Enable Indirect Target Selection (ITS) mitigation. ITS is a bug in + BPU on some Intel CPUs that may allow Spectre V2 style attacks. If + disabled, mitigation cannot be enabled via cmdline. + See + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0fde2feb16c6..8cc47fc986e9 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -468,6 +468,7 @@ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32 + 5) /* "" Use thunk for indirect branches in lower half of cacheline */ /* * BUG word(s) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 7e4c91eb8c10..8633f7bff764 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -364,10 +364,14 @@ ".long 999b\n\t" \ ".popsection\n\t" +#define ITS_THUNK_SIZE 64 + typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; +typedef u8 its_thunk_t[ITS_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; extern retpoline_thunk_t __x86_indirect_jump_thunk_array[]; +extern its_thunk_t __x86_indirect_its_thunk_array[]; #ifdef CONFIG_RETHUNK extern void __x86_return_thunk(void); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index aae7456ece07..9be01147727e 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -521,7 +521,8 @@ static int emit_indirect(int op, int reg, u8 *bytes) return i; } -static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes) +static int __emit_trampoline(void *addr, struct insn *insn, u8 *bytes, + void *call_dest, void *jmp_dest) { u8 op = insn->opcode.bytes[0]; int i = 0; @@ -542,7 +543,7 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 switch (op) { case CALL_INSN_OPCODE: __text_gen_insn(bytes+i, op, addr+i, - __x86_indirect_call_thunk_array[reg], + call_dest, CALL_INSN_SIZE); i += CALL_INSN_SIZE; break; @@ -550,7 +551,7 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 case JMP32_INSN_OPCODE: clang_jcc: __text_gen_insn(bytes+i, op, addr+i, - __x86_indirect_jump_thunk_array[reg], + jmp_dest, JMP32_INSN_SIZE); i += JMP32_INSN_SIZE; break; @@ -565,6 +566,35 @@ clang_jcc: return i; } +static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes) +{ + return __emit_trampoline(addr, insn, bytes, + __x86_indirect_call_thunk_array[reg], + __x86_indirect_jump_thunk_array[reg]); +} + +#ifdef CONFIG_MITIGATION_ITS +static int emit_its_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes) +{ + return __emit_trampoline(addr, insn, bytes, + __x86_indirect_its_thunk_array[reg], + __x86_indirect_its_thunk_array[reg]); +} + +/* Check if an indirect branch is at ITS-unsafe address */ +static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg) +{ + if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) + return false; + + /* Indirect branch opcode is 2 or 3 bytes depending on reg */ + addr += 1 + reg / 8; + + /* Lower-half of the cacheline? */ + return !(addr & 0x20); +} +#endif + /* * Rewrite the compiler generated retpoline thunk calls. * @@ -639,6 +669,15 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) bytes[i++] = 0xe8; /* LFENCE */ } +#ifdef CONFIG_MITIGATION_ITS + /* + * Check if the address of last byte of emitted-indirect is in + * lower-half of the cacheline. Such branches need ITS mitigation. + */ + if (cpu_wants_indirect_its_thunk_at((unsigned long)addr + i, reg)) + return emit_its_trampoline(addr, insn, reg, bytes); +#endif + ret = emit_indirect(op, reg, bytes + i); if (ret < 0) return ret; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 60eb8baa44d7..27323cda907a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -541,4 +541,10 @@ INIT_PER_CPU(irq_stack_backing_store); "SRSO function pair won't alias"); #endif +#if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B) +. = ASSERT(__x86_indirect_its_thunk_rax & 0x20, "__x86_indirect_thunk_rax not in second half of cacheline"); +. = ASSERT(((__x86_indirect_its_thunk_rcx - __x86_indirect_its_thunk_rax) % 64) == 0, "Indirect thunks are not cacheline apart"); +. = ASSERT(__x86_indirect_its_thunk_array == __x86_indirect_its_thunk_rax, "Gap in ITS thunk array"); +#endif + #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index ffa51f392e17..40d17f27499f 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -360,6 +360,34 @@ SYM_FUNC_END(__x86_return_skl) #endif /* CONFIG_CALL_DEPTH_TRACKING */ +#ifdef CONFIG_MITIGATION_ITS + +.macro ITS_THUNK reg + +SYM_INNER_LABEL(__x86_indirect_its_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_UNDEFINED + ANNOTATE_NOENDBR + ANNOTATE_RETPOLINE_SAFE + jmp *%\reg + int3 + .align 32, 0xcc /* fill to the end of the line */ + .skip 32, 0xcc /* skip to the next upper half */ +.endm + +/* ITS mitigation requires thunks be aligned to upper half of cacheline */ +.align 64, 0xcc +.skip 32, 0xcc +SYM_CODE_START(__x86_indirect_its_thunk_array) + +#define GEN(reg) ITS_THUNK reg +#include +#undef GEN + + .align 64, 0xcc +SYM_CODE_END(__x86_indirect_its_thunk_array) + +#endif + /* * This function name is magical and is used by -mfunction-return=thunk-extern * for the compiler to generate JMPs to it. diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 5a3ce74d132b..0d4218741e0d 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -472,7 +472,11 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) { u8 *prog = *pprog; - if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { + if (IS_ENABLED(CONFIG_MITIGATION_ITS) && + cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) { + OPTIMIZER_HIDE_VAR(reg); + emit_jump(&prog, &__x86_indirect_its_thunk_array[reg], ip); + } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { From 4754e29f43c63adb42da1300dea4d8c1ac121c98 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 21 Jun 2024 21:17:21 -0700 Subject: [PATCH 105/114] x86/its: Add support for ITS-safe return thunk commit a75bf27fe41abe658c53276a0c486c4bf9adecfc upstream. RETs in the lower half of cacheline may be affected by ITS bug, specifically when the RSB-underflows. Use ITS-safe return thunk for such RETs. RETs that are not patched: - RET in retpoline sequence does not need to be patched, because the sequence itself fills an RSB before RET. - RET in Call Depth Tracking (CDT) thunks __x86_indirect_{call|jump}_thunk and call_depth_return_thunk are not patched because CDT by design prevents RSB-underflow. - RETs in .init section are not reachable after init. - RETs that are explicitly marked safe with ANNOTATE_UNRET_SAFE. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/alternative.h | 14 ++++++++++++++ arch/x86/include/asm/nospec-branch.h | 6 ++++++ arch/x86/kernel/alternative.c | 19 +++++++++++++++++-- arch/x86/kernel/ftrace.c | 2 +- arch/x86/kernel/static_call.c | 4 ++-- arch/x86/kernel/vmlinux.lds.S | 4 ++++ arch/x86/lib/retpoline.S | 13 ++++++++++++- arch/x86/net/bpf_jit_comp.c | 2 +- 8 files changed, 57 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index cb9ce0f9e78e..7e879b521726 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -130,6 +130,20 @@ static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, } #endif +#if defined(CONFIG_RETHUNK) && defined(CONFIG_OBJTOOL) +extern bool cpu_wants_rethunk(void); +extern bool cpu_wants_rethunk_at(void *addr); +#else +static __always_inline bool cpu_wants_rethunk(void) +{ + return false; +} +static __always_inline bool cpu_wants_rethunk_at(void *addr) +{ + return false; +} +#endif + #ifdef CONFIG_SMP extern void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8633f7bff764..1668ae8b71bb 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -395,6 +395,12 @@ static inline void srso_return_thunk(void) {} static inline void srso_alias_return_thunk(void) {} #endif +#ifdef CONFIG_MITIGATION_ITS +extern void its_return_thunk(void); +#else +static inline void its_return_thunk(void) {} +#endif + extern void retbleed_return_thunk(void); extern void srso_return_thunk(void); extern void srso_alias_return_thunk(void); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 9be01147727e..ecf105f37de7 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -749,6 +749,21 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) #ifdef CONFIG_RETHUNK +bool cpu_wants_rethunk(void) +{ + return cpu_feature_enabled(X86_FEATURE_RETHUNK); +} + +bool cpu_wants_rethunk_at(void *addr) +{ + if (!cpu_feature_enabled(X86_FEATURE_RETHUNK)) + return false; + if (x86_return_thunk != its_return_thunk) + return true; + + return !((unsigned long)addr & 0x20); +} + /* * Rewrite the compiler generated return thunk tail-calls. * @@ -765,7 +780,7 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) int i = 0; /* Patch the custom return thunks... */ - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + if (cpu_wants_rethunk_at(addr)) { i = JMP32_INSN_SIZE; __text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i); } else { @@ -782,7 +797,7 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { s32 *s; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_wants_rethunk()) static_call_force_reinit(); for (s = start; s < end; s++) { diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 12df54ff0e81..50f8c8a8483b 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -363,7 +363,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) goto fail; ip = trampoline + size; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_wants_rethunk_at(ip)) __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE); else memcpy(ip, retq, sizeof(retq)); diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 07961b362e2a..bcea882e022f 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -81,7 +81,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, break; case RET: - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_wants_rethunk_at(insn)) code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk); else code = &retinsn; @@ -90,7 +90,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, case JCC: if (!func) { func = __static_call_return; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_wants_rethunk()) func = x86_return_thunk; } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 27323cda907a..c57d5df1abc6 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -547,4 +547,8 @@ INIT_PER_CPU(irq_stack_backing_store); . = ASSERT(__x86_indirect_its_thunk_array == __x86_indirect_its_thunk_rax, "Gap in ITS thunk array"); #endif +#if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B) +. = ASSERT(its_return_thunk & 0x20, "its_return_thunk not in second half of cacheline"); +#endif + #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 40d17f27499f..91ce427d5af1 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -386,7 +386,18 @@ SYM_CODE_START(__x86_indirect_its_thunk_array) .align 64, 0xcc SYM_CODE_END(__x86_indirect_its_thunk_array) -#endif +.align 64, 0xcc +.skip 32, 0xcc +SYM_CODE_START(its_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_CODE_END(its_return_thunk) +EXPORT_SYMBOL(its_return_thunk) + +#endif /* CONFIG_MITIGATION_ITS */ /* * This function name is magical and is used by -mfunction-return=thunk-extern diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 0d4218741e0d..9b7df9d7b22b 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -498,7 +498,7 @@ static void emit_return(u8 **pprog, u8 *ip) { u8 *prog = *pprog; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + if (cpu_wants_rethunk()) { emit_jump(&prog, x86_return_thunk, ip); } else { EMIT1(0xC3); /* ret */ From f7ef7f6ccf2bc3909ef5e50cbe9edf2bac71e1bd Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 21 Jun 2024 20:23:23 -0700 Subject: [PATCH 106/114] x86/its: Enable Indirect Target Selection mitigation commit f4818881c47fd91fcb6d62373c57c7844e3de1c0 upstream. Indirect Target Selection (ITS) is a bug in some pre-ADL Intel CPUs with eIBRS. It affects prediction of indirect branch and RETs in the lower half of cacheline. Due to ITS such branches may get wrongly predicted to a target of (direct or indirect) branch that is located in the upper half of the cacheline. Scope of impact =============== Guest/host isolation -------------------- When eIBRS is used for guest/host isolation, the indirect branches in the VMM may still be predicted with targets corresponding to branches in the guest. Intra-mode ---------- cBPF or other native gadgets can be used for intra-mode training and disclosure using ITS. User/kernel isolation --------------------- When eIBRS is enabled user/kernel isolation is not impacted. Indirect Branch Prediction Barrier (IBPB) ----------------------------------------- After an IBPB, indirect branches may be predicted with targets corresponding to direct branches which were executed prior to IBPB. This is mitigated by a microcode update. Add cmdline parameter indirect_target_selection=off|on|force to control the mitigation to relocate the affected branches to an ITS-safe thunk i.e. located in the upper half of cacheline. Also add the sysfs reporting. When retpoline mitigation is deployed, ITS safe-thunks are not needed, because retpoline sequence is already ITS-safe. Similarly, when call depth tracking (CDT) mitigation is deployed (retbleed=stuff), ITS safe return thunk is not used, as CDT prevents RSB-underflow. To not overcomplicate things, ITS mitigation is not supported with spectre-v2 lfence;jmp mitigation. Moreover, it is less practical to deploy lfence;jmp mitigation on ITS affected parts anyways. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-system-cpu | 1 + .../admin-guide/kernel-parameters.txt | 13 ++ arch/x86/include/asm/nospec-branch.h | 6 - arch/x86/kernel/cpu/bugs.c | 141 +++++++++++++++++- drivers/base/cpu.c | 3 + include/linux/cpu.h | 2 + 6 files changed, 156 insertions(+), 10 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 657bdee28d84..0426ec112155 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -514,6 +514,7 @@ Description: information about CPUs heterogeneity. What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/gather_data_sampling + /sys/devices/system/cpu/vulnerabilities/indirect_target_selection /sys/devices/system/cpu/vulnerabilities/itlb_multihit /sys/devices/system/cpu/vulnerabilities/l1tf /sys/devices/system/cpu/vulnerabilities/mds diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 184f2f96f6a5..848b367ca95b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2060,6 +2060,18 @@ different crypto accelerators. This option can be used to achieve best performance for particular HW. + indirect_target_selection= [X86,Intel] Mitigation control for Indirect + Target Selection(ITS) bug in Intel CPUs. Updated + microcode is also required for a fix in IBPB. + + on: Enable mitigation (default). + off: Disable mitigation. + force: Force the ITS bug and deploy default + mitigation. + + For details see: + Documentation/admin-guide/hw-vuln/indirect-target-selection.rst + init= [KNL] Format: Run specified binary instead of /sbin/init as init @@ -3331,6 +3343,7 @@ expose users to several CPU vulnerabilities. Equivalent to: if nokaslr then kpti=0 [ARM64] gather_data_sampling=off [X86] + indirect_target_selection=off [X86] kvm.nx_huge_pages=off [X86] l1tf=off [X86] mds=off [X86] diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 1668ae8b71bb..bc4fa6d09d29 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -421,11 +421,6 @@ extern void (*x86_return_thunk)(void); #ifdef CONFIG_CALL_DEPTH_TRACKING extern void __x86_return_skl(void); -static inline void x86_set_skl_return_thunk(void) -{ - x86_return_thunk = &__x86_return_skl; -} - #define CALL_DEPTH_ACCOUNT \ ALTERNATIVE("", \ __stringify(INCREMENT_CALL_DEPTH), \ @@ -438,7 +433,6 @@ DECLARE_PER_CPU(u64, __x86_stuffs_count); DECLARE_PER_CPU(u64, __x86_ctxsw_count); #endif #else -static inline void x86_set_skl_return_thunk(void) {} #define CALL_DEPTH_ACCOUNT "" diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 51f58776765b..df5edadf8bbe 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -49,6 +49,7 @@ static void __init srbds_select_mitigation(void); static void __init l1d_flush_select_mitigation(void); static void __init srso_select_mitigation(void); static void __init gds_select_mitigation(void); +static void __init its_select_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -67,6 +68,14 @@ static DEFINE_MUTEX(spec_ctrl_mutex); void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk; +static void __init set_return_thunk(void *thunk) +{ + if (x86_return_thunk != __x86_return_thunk) + pr_warn("x86/bugs: return thunk changed\n"); + + x86_return_thunk = thunk; +} + /* Update SPEC_CTRL MSR and its cached copy unconditionally */ static void update_spec_ctrl(u64 val) { @@ -175,6 +184,7 @@ void __init cpu_select_mitigations(void) */ srso_select_mitigation(); gds_select_mitigation(); + its_select_mitigation(); } /* @@ -1102,7 +1112,7 @@ do_cmd_auto: setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_UNRET); - x86_return_thunk = retbleed_return_thunk; + set_return_thunk(retbleed_return_thunk); if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) @@ -1136,7 +1146,9 @@ do_cmd_auto: case RETBLEED_MITIGATION_STUFF: setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH); - x86_set_skl_return_thunk(); +#ifdef CONFIG_CALL_DEPTH_TRACKING + set_return_thunk(&__x86_return_skl); +#endif break; default: @@ -1170,6 +1182,114 @@ do_cmd_auto: pr_info("%s\n", retbleed_strings[retbleed_mitigation]); } +#undef pr_fmt +#define pr_fmt(fmt) "ITS: " fmt + +enum its_mitigation_cmd { + ITS_CMD_OFF, + ITS_CMD_ON, +}; + +enum its_mitigation { + ITS_MITIGATION_OFF, + ITS_MITIGATION_ALIGNED_THUNKS, + ITS_MITIGATION_RETPOLINE_STUFF, +}; + +static const char * const its_strings[] = { + [ITS_MITIGATION_OFF] = "Vulnerable", + [ITS_MITIGATION_ALIGNED_THUNKS] = "Mitigation: Aligned branch/return thunks", + [ITS_MITIGATION_RETPOLINE_STUFF] = "Mitigation: Retpolines, Stuffing RSB", +}; + +static enum its_mitigation its_mitigation __ro_after_init = ITS_MITIGATION_ALIGNED_THUNKS; + +static enum its_mitigation_cmd its_cmd __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_ITS) ? ITS_CMD_ON : ITS_CMD_OFF; + +static int __init its_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!IS_ENABLED(CONFIG_MITIGATION_ITS)) { + pr_err("Mitigation disabled at compile time, ignoring option (%s)", str); + return 0; + } + + if (!strcmp(str, "off")) { + its_cmd = ITS_CMD_OFF; + } else if (!strcmp(str, "on")) { + its_cmd = ITS_CMD_ON; + } else if (!strcmp(str, "force")) { + its_cmd = ITS_CMD_ON; + setup_force_cpu_bug(X86_BUG_ITS); + } else { + pr_err("Ignoring unknown indirect_target_selection option (%s).", str); + } + + return 0; +} +early_param("indirect_target_selection", its_parse_cmdline); + +static void __init its_select_mitigation(void) +{ + enum its_mitigation_cmd cmd = its_cmd; + + if (!boot_cpu_has_bug(X86_BUG_ITS) || cpu_mitigations_off()) { + its_mitigation = ITS_MITIGATION_OFF; + return; + } + + /* Retpoline+CDT mitigates ITS, bail out */ + if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + boot_cpu_has(X86_FEATURE_CALL_DEPTH)) { + its_mitigation = ITS_MITIGATION_RETPOLINE_STUFF; + goto out; + } + + /* Exit early to avoid irrelevant warnings */ + if (cmd == ITS_CMD_OFF) { + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + if (spectre_v2_enabled == SPECTRE_V2_NONE) { + pr_err("WARNING: Spectre-v2 mitigation is off, disabling ITS\n"); + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + if (!IS_ENABLED(CONFIG_RETPOLINE) || !IS_ENABLED(CONFIG_RETHUNK)) { + pr_err("WARNING: ITS mitigation depends on retpoline and rethunk support\n"); + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + if (IS_ENABLED(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B)) { + pr_err("WARNING: ITS mitigation is not compatible with CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B\n"); + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + if (boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) { + pr_err("WARNING: ITS mitigation is not compatible with lfence mitigation\n"); + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + + switch (cmd) { + case ITS_CMD_OFF: + its_mitigation = ITS_MITIGATION_OFF; + break; + case ITS_CMD_ON: + its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS; + if (!boot_cpu_has(X86_FEATURE_RETPOLINE)) + setup_force_cpu_cap(X86_FEATURE_INDIRECT_THUNK_ITS); + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + set_return_thunk(its_return_thunk); + break; + } +out: + pr_info("%s\n", its_strings[its_mitigation]); +} + #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt @@ -2607,10 +2727,10 @@ static void __init srso_select_mitigation(void) if (boot_cpu_data.x86 == 0x19) { setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS); - x86_return_thunk = srso_alias_return_thunk; + set_return_thunk(srso_alias_return_thunk); } else { setup_force_cpu_cap(X86_FEATURE_SRSO); - x86_return_thunk = srso_return_thunk; + set_return_thunk(srso_return_thunk); } if (has_microcode) srso_mitigation = SRSO_MITIGATION_SAFE_RET; @@ -2794,6 +2914,11 @@ static ssize_t rfds_show_state(char *buf) return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]); } +static ssize_t its_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", its_strings[its_mitigation]); +} + static char *stibp_state(void) { if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && @@ -2976,6 +3101,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_RFDS: return rfds_show_state(buf); + case X86_BUG_ITS: + return its_show_state(buf); + default: break; } @@ -3055,4 +3183,9 @@ ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attrib { return cpu_show_common(dev, attr, buf, X86_BUG_RFDS); } + +ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_ITS); +} #endif diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index ef427ee787a9..a5cfc1bfad51 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -566,6 +566,7 @@ CPU_SHOW_VULN_FALLBACK(retbleed); CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow); CPU_SHOW_VULN_FALLBACK(gds); CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); +CPU_SHOW_VULN_FALLBACK(indirect_target_selection); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -581,6 +582,7 @@ static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); +static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -597,6 +599,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_spec_rstack_overflow.attr, &dev_attr_gather_data_sampling.attr, &dev_attr_reg_file_data_sampling.attr, + &dev_attr_indirect_target_selection.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index a7d91a167a8b..20db7fc0651f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -77,6 +77,8 @@ extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_indirect_target_selection(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, From 61bed1ddb2127501c0bb4cf967538b556755601b Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 18 Nov 2024 09:53:12 -0800 Subject: [PATCH 107/114] x86/its: Add "vmexit" option to skip mitigation on some CPUs commit 2665281a07e19550944e8354a2024635a7b2714a upstream. Ice Lake generation CPUs are not affected by guest/host isolation part of ITS. If a user is only concerned about KVM guests, they can now choose a new cmdline option "vmexit" that will not deploy the ITS mitigation when CPU is not affected by guest/host isolation. This saves the performance overhead of ITS mitigation on Ice Lake gen CPUs. When "vmexit" option selected, if the CPU is affected by ITS guest/host isolation, the default ITS mitigation is deployed. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 2 ++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 11 +++++++++++ arch/x86/kernel/cpu/common.c | 19 ++++++++++++------- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 848b367ca95b..30e48bcc18c9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2068,6 +2068,8 @@ off: Disable mitigation. force: Force the ITS bug and deploy default mitigation. + vmexit: Only deploy mitigation if CPU is affected by + guest/host isolation part of ITS. For details see: Documentation/admin-guide/hw-vuln/indirect-target-selection.rst diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 8cc47fc986e9..8a2482651a6f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -520,4 +520,5 @@ #define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ #define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ #define X86_BUG_ITS X86_BUG(1*32 + 5) /* CPU is affected by Indirect Target Selection */ +#define X86_BUG_ITS_NATIVE_ONLY X86_BUG(1*32 + 6) /* CPU is affected by ITS, VMX is not affected */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index df5edadf8bbe..beb34b716a4a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1188,16 +1188,19 @@ do_cmd_auto: enum its_mitigation_cmd { ITS_CMD_OFF, ITS_CMD_ON, + ITS_CMD_VMEXIT, }; enum its_mitigation { ITS_MITIGATION_OFF, + ITS_MITIGATION_VMEXIT_ONLY, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_RETPOLINE_STUFF, }; static const char * const its_strings[] = { [ITS_MITIGATION_OFF] = "Vulnerable", + [ITS_MITIGATION_VMEXIT_ONLY] = "Mitigation: Vulnerable, KVM: Not affected", [ITS_MITIGATION_ALIGNED_THUNKS] = "Mitigation: Aligned branch/return thunks", [ITS_MITIGATION_RETPOLINE_STUFF] = "Mitigation: Retpolines, Stuffing RSB", }; @@ -1224,6 +1227,8 @@ static int __init its_parse_cmdline(char *str) } else if (!strcmp(str, "force")) { its_cmd = ITS_CMD_ON; setup_force_cpu_bug(X86_BUG_ITS); + } else if (!strcmp(str, "vmexit")) { + its_cmd = ITS_CMD_VMEXIT; } else { pr_err("Ignoring unknown indirect_target_selection option (%s).", str); } @@ -1278,6 +1283,12 @@ static void __init its_select_mitigation(void) case ITS_CMD_OFF: its_mitigation = ITS_MITIGATION_OFF; break; + case ITS_CMD_VMEXIT: + if (boot_cpu_has_bug(X86_BUG_ITS_NATIVE_ONLY)) { + its_mitigation = ITS_MITIGATION_VMEXIT_ONLY; + goto out; + } + fallthrough; case ITS_CMD_ON: its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS; if (!boot_cpu_has(X86_FEATURE_RETPOLINE)) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1b1df06f6c23..067e31fb9e16 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1274,6 +1274,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define RFDS BIT(7) /* CPU is affected by Indirect Target Selection */ #define ITS BIT(8) +/* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */ +#define ITS_NATIVE_ONLY BIT(9) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1294,16 +1296,16 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xc), MMIO | RETBLEED | GDS | SRBDS), VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS | ITS), VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), - VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), - VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS | ITS), - VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS | ITS), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED | ITS), VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), - VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS | ITS), - VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS | ITS), + VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS), VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS), VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS), @@ -1520,8 +1522,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET)) setup_force_cpu_bug(X86_BUG_IBPB_NO_RET); - if (vulnerable_to_its(x86_arch_cap_msr)) + if (vulnerable_to_its(x86_arch_cap_msr)) { setup_force_cpu_bug(X86_BUG_ITS); + if (cpu_matches(cpu_vuln_blacklist, ITS_NATIVE_ONLY)) + setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY); + } if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; From ba1d70362658bfe89fba8275bf80782d5a8da433 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 2 Dec 2024 12:07:08 -0800 Subject: [PATCH 108/114] x86/its: Add support for RSB stuffing mitigation commit facd226f7e0c8ca936ac114aba43cb3e8b94e41e upstream. When retpoline mitigation is enabled for spectre-v2, enabling call-depth-tracking and RSB stuffing also mitigates ITS. Add cmdline option indirect_target_selection=stuff to allow enabling RSB stuffing mitigation. When retpoline mitigation is not enabled, =stuff option is ignored, and default mitigation for ITS is deployed. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 3 +++ arch/x86/kernel/cpu/bugs.c | 21 +++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 30e48bcc18c9..f95734ceb82b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2070,6 +2070,9 @@ mitigation. vmexit: Only deploy mitigation if CPU is affected by guest/host isolation part of ITS. + stuff: Deploy RSB-fill mitigation when retpoline is + also deployed. Otherwise, deploy the default + mitigation. For details see: Documentation/admin-guide/hw-vuln/indirect-target-selection.rst diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index beb34b716a4a..07b45bbf6348 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1189,6 +1189,7 @@ enum its_mitigation_cmd { ITS_CMD_OFF, ITS_CMD_ON, ITS_CMD_VMEXIT, + ITS_CMD_RSB_STUFF, }; enum its_mitigation { @@ -1229,6 +1230,8 @@ static int __init its_parse_cmdline(char *str) setup_force_cpu_bug(X86_BUG_ITS); } else if (!strcmp(str, "vmexit")) { its_cmd = ITS_CMD_VMEXIT; + } else if (!strcmp(str, "stuff")) { + its_cmd = ITS_CMD_RSB_STUFF; } else { pr_err("Ignoring unknown indirect_target_selection option (%s).", str); } @@ -1279,6 +1282,12 @@ static void __init its_select_mitigation(void) goto out; } + if (cmd == ITS_CMD_RSB_STUFF && + (!boot_cpu_has(X86_FEATURE_RETPOLINE) || !IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING))) { + pr_err("RSB stuff mitigation not supported, using default\n"); + cmd = ITS_CMD_ON; + } + switch (cmd) { case ITS_CMD_OFF: its_mitigation = ITS_MITIGATION_OFF; @@ -1296,6 +1305,18 @@ static void __init its_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_RETHUNK); set_return_thunk(its_return_thunk); break; + case ITS_CMD_RSB_STUFF: + its_mitigation = ITS_MITIGATION_RETPOLINE_STUFF; + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH); +#ifdef CONFIG_CALL_DEPTH_TRACKING + set_return_thunk(&__x86_return_skl); +#endif + if (retbleed_mitigation == RETBLEED_MITIGATION_NONE) { + retbleed_mitigation = RETBLEED_MITIGATION_STUFF; + pr_info("Retbleed mitigation updated to stuffing\n"); + } + break; } out: pr_info("%s\n", its_strings[its_mitigation]); From 9e7364c32c6cfa6d5765fb0d304da21f7c80c47c Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 2 May 2025 06:25:19 -0700 Subject: [PATCH 109/114] x86/its: Align RETs in BHB clear sequence to avoid thunking commit f0cd7091cc5a032c8870b4285305d9172569d126 upstream. The software mitigation for BHI is to execute BHB clear sequence at syscall entry, and possibly after a cBPF program. ITS mitigation thunks RETs in the lower half of the cacheline. This causes the RETs in the BHB clear sequence to be thunked as well, adding unnecessary branches to the BHB clear sequence. Since the sequence is in hot path, align the RET instructions in the sequence to avoid thunking. This is how disassembly clear_bhb_loop() looks like after this change: 0x44 <+4>: mov $0x5,%ecx 0x49 <+9>: call 0xffffffff81001d9b 0x4e <+14>: jmp 0xffffffff81001de5 0x53 <+19>: int3 ... 0x9b <+91>: call 0xffffffff81001dce 0xa0 <+96>: ret 0xa1 <+97>: int3 ... 0xce <+142>: mov $0x5,%eax 0xd3 <+147>: jmp 0xffffffff81001dd6 0xd5 <+149>: nop 0xd6 <+150>: sub $0x1,%eax 0xd9 <+153>: jne 0xffffffff81001dd3 0xdb <+155>: sub $0x1,%ecx 0xde <+158>: jne 0xffffffff81001d9b 0xe0 <+160>: ret 0xe1 <+161>: int3 0xe2 <+162>: int3 0xe3 <+163>: int3 0xe4 <+164>: int3 0xe5 <+165>: lfence 0xe8 <+168>: pop %rbp 0xe9 <+169>: ret Suggested-by: Andrew Cooper Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Alexandre Chartre Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 2192b6c33ea0..1f9e508ac075 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1569,7 +1569,9 @@ SYM_CODE_END(rewind_stack_and_make_dead) * ORC to unwind properly. * * The alignment is for performance and not for safety, and may be safely - * refactored in the future if needed. + * refactored in the future if needed. The .skips are for safety, to ensure + * that all RETs are in the second half of a cacheline to mitigate Indirect + * Target Selection, rather than taking the slowpath via its_return_thunk. */ SYM_FUNC_START(clear_bhb_loop) push %rbp @@ -1579,10 +1581,22 @@ SYM_FUNC_START(clear_bhb_loop) call 1f jmp 5f .align 64, 0xcc + /* + * Shift instructions so that the RET is in the upper half of the + * cacheline and don't take the slowpath to its_return_thunk. + */ + .skip 32 - (.Lret1 - 1f), 0xcc ANNOTATE_INTRA_FUNCTION_CALL 1: call 2f - RET +.Lret1: RET .align 64, 0xcc + /* + * As above shift instructions for RET at .Lret2 as well. + * + * This should be ideally be: .skip 32 - (.Lret2 - 2f), 0xcc + * but some Clang versions (e.g. 18) don't like this. + */ + .skip 32 - 18, 0xcc 2: movl $5, %eax 3: jmp 4f nop @@ -1590,7 +1604,7 @@ SYM_FUNC_START(clear_bhb_loop) jnz 3b sub $1, %ecx jnz 1b - RET +.Lret2: RET 5: lfence pop %rbp RET From 6699bf27a47156baafde7e3f4a732aab2ad9f8cb Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Sat, 3 May 2025 09:46:31 -0700 Subject: [PATCH 110/114] x86/ibt: Keep IBT disabled during alternative patching commit ebebe30794d38c51f71fe4951ba6af4159d9837d upstream. cfi_rewrite_callers() updates the fineIBT hash matching at the caller side, but except for paranoid-mode it relies on apply_retpoline() and friends for any ENDBR relocation. This could temporarily cause an indirect branch to land on a poisoned ENDBR. For instance, with para-virtualization enabled, a simple wrmsrl() could have an indirect branch pointing to native_write_msr() who's ENDBR has been relocated due to fineIBT: : push %rbp mov %rsp,%rbp mov %esi,%eax mov %rsi,%rdx shr $0x20,%rdx mov %edi,%edi mov %rax,%rsi call *0x21e65d0(%rip) # ^^^^^^^^^^^^^^^^^^^^^^^ Such an indirect call during the alternative patching could #CP if the caller is not *yet* adjusted for the new target ENDBR. To prevent a false #CP, keep CET-IBT disabled until all callers are patched. Patching during the module load does not need to be guarded by IBT-disable because the module code is not executed until the patching is complete. [ pawan: Since apply_paravirt() happens before __apply_fineibt() relocates the ENDBR, pv_ops in the example above is not relevant. It is still safer to keep this commit because missing an ENDBR means an oops. ] Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Alexandre Chartre Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/alternative.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ecf105f37de7..a179cfff3903 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -30,6 +30,7 @@ #include #include #include +#include int __read_mostly alternatives_patched; @@ -1629,6 +1630,8 @@ static noinline void __init alt_reloc_selftest(void) void __init alternative_instructions(void) { + u64 ibt; + int3_selftest(); /* @@ -1666,6 +1669,9 @@ void __init alternative_instructions(void) */ paravirt_set_cap(); + /* Keep CET-IBT disabled until caller/callee are patched */ + ibt = ibt_save(/*disable*/ true); + /* * First patch paravirt functions, such that we overwrite the indirect * call with the direct call. @@ -1699,6 +1705,8 @@ void __init alternative_instructions(void) */ apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); + ibt_restore(ibt); + #ifdef CONFIG_SMP /* Patch to UP if other cpus not imminent. */ if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { From 3b2234cd50a9b4ed664324054901b6f763890104 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Oct 2024 10:05:48 -0700 Subject: [PATCH 111/114] x86/its: Use dynamic thunks for indirect branches commit 872df34d7c51a79523820ea6a14860398c639b87 upstream. ITS mitigation moves the unsafe indirect branches to a safe thunk. This could degrade the prediction accuracy as the source address of indirect branches becomes same for different execution paths. To improve the predictions, and hence the performance, assign a separate thunk for each indirect callsite. This is also a defense-in-depth measure to avoid indirect branches aliasing with each other. As an example, 5000 dynamic thunks would utilize around 16 bits of the address space, thereby gaining entropy. For a BTB that uses 32 bits for indexing, dynamic thunks could provide better prediction accuracy over fixed thunks. Have ITS thunks be variable sized and use EXECMEM_MODULE_TEXT such that they are both more flexible (got to extend them later) and live in 2M TLBs, just like kernel code, avoiding undue TLB pressure. [ pawan: CONFIG_EXECMEM and CONFIG_EXECMEM_ROX are not supported on backport kernel, made changes to use module_alloc() and set_memory_*() for dynamic thunks. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Alexandre Chartre Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/alternative.h | 10 +++ arch/x86/kernel/alternative.c | 132 ++++++++++++++++++++++++++++- arch/x86/kernel/module.c | 7 ++ include/linux/module.h | 5 ++ 4 files changed, 151 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 7e879b521726..7a27a9e1df59 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -130,6 +130,16 @@ static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, } #endif +#ifdef CONFIG_MITIGATION_ITS +extern void its_init_mod(struct module *mod); +extern void its_fini_mod(struct module *mod); +extern void its_free_mod(struct module *mod); +#else /* CONFIG_MITIGATION_ITS */ +static inline void its_init_mod(struct module *mod) { } +static inline void its_fini_mod(struct module *mod) { } +static inline void its_free_mod(struct module *mod) { } +#endif + #if defined(CONFIG_RETHUNK) && defined(CONFIG_OBJTOOL) extern bool cpu_wants_rethunk(void); extern bool cpu_wants_rethunk_at(void *addr); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a179cfff3903..6085919d3b3e 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include #include @@ -31,6 +33,7 @@ #include #include #include +#include int __read_mostly alternatives_patched; @@ -124,6 +127,125 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] = #endif }; +#ifdef CONFIG_MITIGATION_ITS + +static struct module *its_mod; +static void *its_page; +static unsigned int its_offset; + +/* Initialize a thunk with the "jmp *reg; int3" instructions. */ +static void *its_init_thunk(void *thunk, int reg) +{ + u8 *bytes = thunk; + int i = 0; + + if (reg >= 8) { + bytes[i++] = 0x41; /* REX.B prefix */ + reg -= 8; + } + bytes[i++] = 0xff; + bytes[i++] = 0xe0 + reg; /* jmp *reg */ + bytes[i++] = 0xcc; + + return thunk; +} + +void its_init_mod(struct module *mod) +{ + if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) + return; + + mutex_lock(&text_mutex); + its_mod = mod; + its_page = NULL; +} + +void its_fini_mod(struct module *mod) +{ + if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) + return; + + WARN_ON_ONCE(its_mod != mod); + + its_mod = NULL; + its_page = NULL; + mutex_unlock(&text_mutex); + + for (int i = 0; i < mod->its_num_pages; i++) { + void *page = mod->its_page_array[i]; + set_memory_rox((unsigned long)page, 1); + } +} + +void its_free_mod(struct module *mod) +{ + if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) + return; + + for (int i = 0; i < mod->its_num_pages; i++) { + void *page = mod->its_page_array[i]; + module_memfree(page); + } + kfree(mod->its_page_array); +} + +DEFINE_FREE(its_execmem, void *, if (_T) module_memfree(_T)); + +static void *its_alloc(void) +{ + void *page __free(its_execmem) = module_alloc(PAGE_SIZE); + + if (!page) + return NULL; + + if (its_mod) { + void *tmp = krealloc(its_mod->its_page_array, + (its_mod->its_num_pages+1) * sizeof(void *), + GFP_KERNEL); + if (!tmp) + return NULL; + + its_mod->its_page_array = tmp; + its_mod->its_page_array[its_mod->its_num_pages++] = page; + } + + return no_free_ptr(page); +} + +static void *its_allocate_thunk(int reg) +{ + int size = 3 + (reg / 8); + void *thunk; + + if (!its_page || (its_offset + size - 1) >= PAGE_SIZE) { + its_page = its_alloc(); + if (!its_page) { + pr_err("ITS page allocation failed\n"); + return NULL; + } + memset(its_page, INT3_INSN_OPCODE, PAGE_SIZE); + its_offset = 32; + } + + /* + * If the indirect branch instruction will be in the lower half + * of a cacheline, then update the offset to reach the upper half. + */ + if ((its_offset + size - 1) % 64 < 32) + its_offset = ((its_offset - 1) | 0x3F) + 33; + + thunk = its_page + its_offset; + its_offset += size; + + set_memory_rw((unsigned long)its_page, 1); + thunk = its_init_thunk(thunk, reg); + set_memory_rox((unsigned long)its_page, 1); + + return thunk; +} + +#endif + /* * Fill the buffer with a single effective instruction of size @len. * @@ -577,9 +699,13 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 #ifdef CONFIG_MITIGATION_ITS static int emit_its_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes) { - return __emit_trampoline(addr, insn, bytes, - __x86_indirect_its_thunk_array[reg], - __x86_indirect_its_thunk_array[reg]); + u8 *thunk = __x86_indirect_its_thunk_array[reg]; + u8 *tmp = its_allocate_thunk(reg); + + if (tmp) + thunk = tmp; + + return __emit_trampoline(addr, insn, bytes, thunk, thunk); } /* Check if an indirect branch is at ITS-unsafe address */ diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 5f71a0cf4399..cfb7163cf90e 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -312,6 +312,9 @@ int module_finalize(const Elf_Ehdr *hdr, void *pseg = (void *)para->sh_addr; apply_paravirt(pseg, pseg + para->sh_size); } + + its_init_mod(me); + if (retpolines || cfi) { void *rseg = NULL, *cseg = NULL; unsigned int rsize = 0, csize = 0; @@ -332,6 +335,9 @@ int module_finalize(const Elf_Ehdr *hdr, void *rseg = (void *)retpolines->sh_addr; apply_retpolines(rseg, rseg + retpolines->sh_size); } + + its_fini_mod(me); + if (returns) { void *rseg = (void *)returns->sh_addr; apply_returns(rseg, rseg + returns->sh_size); @@ -379,4 +385,5 @@ int module_finalize(const Elf_Ehdr *hdr, void module_arch_cleanup(struct module *mod) { alternatives_smp_module_del(mod); + its_free_mod(mod); } diff --git a/include/linux/module.h b/include/linux/module.h index f2a8624eef1e..f58d1eb260fa 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -572,6 +572,11 @@ struct module { atomic_t refcnt; #endif +#ifdef CONFIG_MITIGATION_ITS + int its_num_pages; + void **its_page_array; +#endif + #ifdef CONFIG_CONSTRUCTORS /* Constructor functions. */ ctor_fn_t *ctors; From 9f69fe3888f643b16c35c8583036e94c5322ca61 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 12 May 2025 19:58:39 -0700 Subject: [PATCH 112/114] x86/its: Fix build errors when CONFIG_MODULES=n commit 9f35e33144ae5377d6a8de86dd3bd4d995c6ac65 upstream. Fix several build errors when CONFIG_MODULES=n, including the following: ../arch/x86/kernel/alternative.c:195:25: error: incomplete definition of type 'struct module' 195 | for (int i = 0; i < mod->its_num_pages; i++) { Fixes: 872df34d7c51 ("x86/its: Use dynamic thunks for indirect branches") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Acked-by: Dave Hansen Tested-by: Steven Rostedt (Google) Reviewed-by: Alexandre Chartre Signed-off-by: Linus Torvalds [ pawan: backport: Bring ITS dynamic thunk code under CONFIG_MODULES ] Signed-off-by: Pawan Gupta Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/alternative.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 6085919d3b3e..c6d9a3882ec8 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -129,6 +129,7 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] = #ifdef CONFIG_MITIGATION_ITS +#ifdef CONFIG_MODULES static struct module *its_mod; static void *its_page; static unsigned int its_offset; @@ -244,7 +245,16 @@ static void *its_allocate_thunk(int reg) return thunk; } -#endif +#else /* CONFIG_MODULES */ + +static void *its_allocate_thunk(int reg) +{ + return NULL; +} + +#endif /* CONFIG_MODULES */ + +#endif /* CONFIG_MITIGATION_ITS */ /* * Fill the buffer with a single effective instruction of size @len. From 772934d9062a0f7297ad4e5bffbd904208655660 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 23 Apr 2025 09:57:31 +0200 Subject: [PATCH 113/114] x86/its: FineIBT-paranoid vs ITS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e52c1dc7455d32c8a55f9949d300e5e87d011fa6 upstream. FineIBT-paranoid was using the retpoline bytes for the paranoid check, disabling retpolines, because all parts that have IBT also have eIBRS and thus don't need no stinking retpolines. Except... ITS needs the retpolines for indirect calls must not be in the first half of a cacheline :-/ So what was the paranoid call sequence: : 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d a: 4d 8d 5b lea -0x10(%r11), %r11 e: 75 fd jne d 10: 41 ff d3 call *%r11 13: 90 nop Now becomes: : 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d a: 4d 8d 5b f0 lea -0x10(%r11), %r11 e: 2e e8 XX XX XX XX cs call __x86_indirect_paranoid_thunk_r11 Where the paranoid_thunk looks like: 1d: (bad) __x86_indirect_paranoid_thunk_r11: 1e: 75 fd jne 1d __x86_indirect_its_thunk_r11: 20: 41 ff eb jmp *%r11 23: cc int3 [ dhansen: remove initialization to false ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Alexandre Chartre [ Just a portion of the original commit, in order to fix a build issue in stable kernels due to backports ] Tested-by: Holger Hoffstätte Link: https://lore.kernel.org/r/20250514113952.GB16434@noisy.programming.kicks-ass.net Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/alternative.h | 8 ++++++++ arch/x86/kernel/alternative.c | 7 +++++++ arch/x86/net/bpf_jit_comp.c | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 7a27a9e1df59..6740b839153a 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -5,6 +5,7 @@ #include #include #include +#include #define ALT_FLAGS_SHIFT 16 @@ -134,10 +135,17 @@ static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, extern void its_init_mod(struct module *mod); extern void its_fini_mod(struct module *mod); extern void its_free_mod(struct module *mod); +extern u8 *its_static_thunk(int reg); #else /* CONFIG_MITIGATION_ITS */ static inline void its_init_mod(struct module *mod) { } static inline void its_fini_mod(struct module *mod) { } static inline void its_free_mod(struct module *mod) { } +static inline u8 *its_static_thunk(int reg) +{ + WARN_ONCE(1, "ITS not compiled in"); + + return NULL; +} #endif #if defined(CONFIG_RETHUNK) && defined(CONFIG_OBJTOOL) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c6d9a3882ec8..4817e424d696 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1449,6 +1449,13 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, static void poison_cfi(void *addr) { } #endif +u8 *its_static_thunk(int reg) +{ + u8 *thunk = __x86_indirect_its_thunk_array[reg]; + + return thunk; +} + #endif void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 9b7df9d7b22b..07592eef253c 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -475,7 +475,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) if (IS_ENABLED(CONFIG_MITIGATION_ITS) && cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) { OPTIMIZER_HIDE_VAR(reg); - emit_jump(&prog, &__x86_indirect_its_thunk_array[reg], ip); + emit_jump(&prog, its_static_thunk(reg), ip); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); From 615b9e10e3377467ced8f50592a1b5ba8ce053d8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 18 May 2025 08:24:12 +0200 Subject: [PATCH 114/114] Linux 6.6.91 Link: https://lore.kernel.org/r/20250512172027.691520737@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Mark Brown Tested-by: Florian Fainelli Tested-by: Ron Economos Tested-by: Peter Schneider Tested-by: Shuah Khan Tested-by: Hardik Garg Link: https://lore.kernel.org/r/20250514125617.240903002@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Mark Brown Tested-by: Florian Fainelli Tested-by: Linux Kernel Functional Testing Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 587a1586e76d..a6a1942e2d00 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 90 +SUBLEVEL = 91 EXTRAVERSION = NAME = Pinguïn Aangedreven