From 86225a63467a07047487f37c877277af20a9bf85 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 25 Aug 2023 00:19:46 +0300 Subject: [PATCH 01/86] arm64: dts: qcom: msm8998: switch USB QMP PHY to new style of bindings [ Upstream commit b7efebfeb2e8ad8187cdabba5f0212ba2e6c1069 ] Change the USB QMP PHY to use newer style of QMP PHY bindings (single resource region, no per-PHY subnodes). Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20230824211952.1397699-11-dmitry.baryshkov@linaro.org Signed-off-by: Bjorn Andersson Stable-dep-of: 0046325ae520 ("arm64: dts: qcom: msm8998: Disable SS instance in Parkmode for USB") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8998.dtsi | 35 +++++++++++---------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index 3d4941dc31d7..c19fb8ae2da2 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -2029,7 +2029,7 @@ interrupts = ; snps,dis_u2_susphy_quirk; snps,dis_enblslpm_quirk; - phys = <&qusb2phy>, <&usb1_ssphy>; + phys = <&qusb2phy>, <&usb3phy>; phy-names = "usb2-phy", "usb3-phy"; snps,has-lpm-erratum; snps,hird-threshold = /bits/ 8 <0x10>; @@ -2038,33 +2038,26 @@ usb3phy: phy@c010000 { compatible = "qcom,msm8998-qmp-usb3-phy"; - reg = <0x0c010000 0x18c>; - status = "disabled"; - #address-cells = <1>; - #size-cells = <1>; - ranges; + reg = <0x0c010000 0x1000>; clocks = <&gcc GCC_USB3_PHY_AUX_CLK>, + <&gcc GCC_USB3_CLKREF_CLK>, <&gcc GCC_USB_PHY_CFG_AHB2PHY_CLK>, - <&gcc GCC_USB3_CLKREF_CLK>; - clock-names = "aux", "cfg_ahb", "ref"; + <&gcc GCC_USB3_PHY_PIPE_CLK>; + clock-names = "aux", + "ref", + "cfg_ahb", + "pipe"; + clock-output-names = "usb3_phy_pipe_clk_src"; + #clock-cells = <0>; + #phy-cells = <0>; resets = <&gcc GCC_USB3_PHY_BCR>, <&gcc GCC_USB3PHY_PHY_BCR>; - reset-names = "phy", "common"; + reset-names = "phy", + "phy_phy"; - usb1_ssphy: phy@c010200 { - reg = <0xc010200 0x128>, - <0xc010400 0x200>, - <0xc010c00 0x20c>, - <0xc010600 0x128>, - <0xc010800 0x200>; - #phy-cells = <0>; - #clock-cells = <0>; - clocks = <&gcc GCC_USB3_PHY_PIPE_CLK>; - clock-names = "pipe0"; - clock-output-names = "usb3_phy_pipe_clk_src"; - }; + status = "disabled"; }; qusb2phy: phy@c012000 { From a7bbf4367ebef1732e70f147a8bd25e9913f0d58 Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Thu, 4 Jul 2024 20:58:43 +0530 Subject: [PATCH 02/86] arm64: dts: qcom: msm8998: Disable SS instance in Parkmode for USB [ Upstream commit 0046325ae52079b46da13a7f84dd7b2a6f7c38f8 ] For Gen-1 targets like MSM8998, it is seen that stressing out the controller in host mode results in HC died error: xhci-hcd.12.auto: xHCI host not responding to stop endpoint command xhci-hcd.12.auto: xHCI host controller not responding, assume dead xhci-hcd.12.auto: HC died; cleaning up And at this instant only restarting the host mode fixes it. Disable SuperSpeed instance in park mode for MSM8998 to mitigate this issue. Cc: stable@vger.kernel.org Fixes: 026dad8f5873 ("arm64: dts: qcom: msm8998: Add USB-related nodes") Signed-off-by: Krishna Kurapati Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240704152848.3380602-4-quic_kriskura@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8998.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index c19fb8ae2da2..4de9ff045ff5 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -2029,6 +2029,7 @@ interrupts = ; snps,dis_u2_susphy_quirk; snps,dis_enblslpm_quirk; + snps,parkmode-disable-ss-quirk; phys = <&qusb2phy>, <&usb3phy>; phy-names = "usb2-phy", "usb3-phy"; snps,has-lpm-erratum; From 06078665376d9a99c718b0fed280817a9fb83a32 Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Thu, 4 Jul 2024 20:58:42 +0530 Subject: [PATCH 03/86] arm64: dts: qcom: ipq8074: Disable SS instance in Parkmode for USB [ Upstream commit dc6ba95c6c4400a84cca5b419b34ae852a08cfb5 ] For Gen-1 targets like IPQ8074, it is seen that stressing out the controller in host mode results in HC died error: xhci-hcd.12.auto: xHCI host not responding to stop endpoint command xhci-hcd.12.auto: xHCI host controller not responding, assume dead xhci-hcd.12.auto: HC died; cleaning up And at this instant only restarting the host mode fixes it. Disable SuperSpeed instance in park mode for IPQ8074 to mitigate this issue. Cc: stable@vger.kernel.org Fixes: 5e09bc51d07b ("arm64: dts: ipq8074: enable USB support") Signed-off-by: Krishna Kurapati Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240704152848.3380602-3-quic_kriskura@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/ipq8074.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index 3d8e5ba51ce0..2f53c099634b 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -593,6 +593,7 @@ interrupts = ; phys = <&qusb_phy_0>, <&usb0_ssphy>; phy-names = "usb2-phy", "usb3-phy"; + snps,parkmode-disable-ss-quirk; snps,is-utmi-l1-suspend; snps,hird-threshold = /bits/ 8 <0x0>; snps,dis_u2_susphy_quirk; @@ -635,6 +636,7 @@ interrupts = ; phys = <&qusb_phy_1>, <&usb1_ssphy>; phy-names = "usb2-phy", "usb3-phy"; + snps,parkmode-disable-ss-quirk; snps,is-utmi-l1-suspend; snps,hird-threshold = /bits/ 8 <0x0>; snps,dis_u2_susphy_quirk; From dd2a996c7f6e43ab625ec6c0d8b13d84fbcd8a46 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Mon, 15 Jan 2024 15:46:41 +0000 Subject: [PATCH 04/86] sysctl: allow change system v ipc sysctls inside ipc namespace [ Upstream commit 50ec499b9a43e46200c9f7b7d723ab2e4af540b3 ] Patch series "Allow to change ipc/mq sysctls inside ipc namespace", v3. Right now ipc and mq limits count as per ipc namespace, but only real root can change them. By default, the current values of these limits are such that it can only be reduced. Since only root can change the values, it is impossible to reduce these limits in the rootless container. We can allow limit changes within ipc namespace because mq parameters are limited by RLIMIT_MSGQUEUE and ipc parameters are not limited to anything other than cgroups. This patch (of 3): Rootless containers are not allowed to modify kernel IPC parameters. All default limits are set to such high values that in fact there are no limits at all. All limits are not inherited and are initialized to default values when a new ipc_namespace is created. For new ipc_namespace: size_t ipc_ns.shm_ctlmax = SHMMAX; // (ULONG_MAX - (1UL << 24)) size_t ipc_ns.shm_ctlall = SHMALL; // (ULONG_MAX - (1UL << 24)) int ipc_ns.shm_ctlmni = IPCMNI; // (1 << 15) int ipc_ns.shm_rmid_forced = 0; unsigned int ipc_ns.msg_ctlmax = MSGMAX; // 8192 unsigned int ipc_ns.msg_ctlmni = MSGMNI; // 32000 unsigned int ipc_ns.msg_ctlmnb = MSGMNB; // 16384 The shm_tot (total amount of shared pages) has also ceased to be global, it is located in ipc_namespace and is not inherited from anywhere. In such conditions, it cannot be said that these limits limit anything. The real limiter for them is cgroups. If we allow rootless containers to change these parameters, then it can only be reduced. Link: https://lkml.kernel.org/r/cover.1705333426.git.legion@kernel.org Link: https://lkml.kernel.org/r/d2f4603305cbfed58a24755aa61d027314b73a45.1705333426.git.legion@kernel.org Signed-off-by: Alexey Gladkov Signed-off-by: Eric W. Biederman Link: https://lkml.kernel.org/r/e2d84d3ec0172cfff759e6065da84ce0cc2736f8.1663756794.git.legion@kernel.org Cc: Christian Brauner Cc: Joel Granados Cc: Kees Cook Cc: Luis Chamberlain Cc: Manfred Spraul Cc: Davidlohr Bueso Signed-off-by: Andrew Morton Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") Signed-off-by: Sasha Levin --- ipc/ipc_sysctl.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index ef313ecfb53a..29c1d3ae2a5c 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "util.h" static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, @@ -190,25 +191,57 @@ static int set_is_seen(struct ctl_table_set *set) return ¤t->nsproxy->ipc_ns->ipc_set == set; } +static void ipc_set_ownership(struct ctl_table_header *head, + struct ctl_table *table, + kuid_t *uid, kgid_t *gid) +{ + struct ipc_namespace *ns = + container_of(head->set, struct ipc_namespace, ipc_set); + + kuid_t ns_root_uid = make_kuid(ns->user_ns, 0); + kgid_t ns_root_gid = make_kgid(ns->user_ns, 0); + + *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID; + *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID; +} + static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table) { int mode = table->mode; #ifdef CONFIG_CHECKPOINT_RESTORE - struct ipc_namespace *ns = current->nsproxy->ipc_ns; + struct ipc_namespace *ns = + container_of(head->set, struct ipc_namespace, ipc_set); if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) || (table->data == &ns->ids[IPC_MSG_IDS].next_id) || (table->data == &ns->ids[IPC_SHM_IDS].next_id)) && checkpoint_restore_ns_capable(ns->user_ns)) mode = 0666; + else #endif - return mode; + { + kuid_t ns_root_uid; + kgid_t ns_root_gid; + + ipc_set_ownership(head, table, &ns_root_uid, &ns_root_gid); + + if (uid_eq(current_euid(), ns_root_uid)) + mode >>= 6; + + else if (in_egroup_p(ns_root_gid)) + mode >>= 3; + } + + mode &= 7; + + return (mode << 6) | (mode << 3) | mode; } static struct ctl_table_root set_root = { .lookup = set_lookup, .permissions = ipc_permissions, + .set_ownership = ipc_set_ownership, }; bool setup_ipc_sysctls(struct ipc_namespace *ns) From c0d538bd5bd11fda70887e970551e85ed76a851a Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Mon, 15 Jan 2024 15:46:43 +0000 Subject: [PATCH 05/86] sysctl: allow to change limits for posix messages queues [ Upstream commit f9436a5d0497f759330d07e1189565edd4456be8 ] All parameters of posix messages queues (queues_max/msg_max/msgsize_max) end up being limited by RLIMIT_MSGQUEUE. The code in mqueue_get_inode is where that limiting happens. The RLIMIT_MSGQUEUE is bound to the user namespace and is counted hierarchically. We can allow root in the user namespace to modify the posix messages queues parameters. Link: https://lkml.kernel.org/r/6ad67f23d1459a4f4339f74aa73bac0ecf3995e1.1705333426.git.legion@kernel.org Signed-off-by: Alexey Gladkov Signed-off-by: Eric W. Biederman Link: https://lkml.kernel.org/r/7eb21211c8622e91d226e63416b1b93c079f60ee.1663756794.git.legion@kernel.org Cc: Christian Brauner Cc: Davidlohr Bueso Cc: Joel Granados Cc: Kees Cook Cc: Luis Chamberlain Cc: Manfred Spraul Signed-off-by: Andrew Morton Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") Signed-off-by: Sasha Levin --- ipc/mq_sysctl.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index fbf6a8b93a26..ce03930aced5 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c @@ -12,6 +12,7 @@ #include #include #include +#include static int msg_max_limit_min = MIN_MSGMAX; static int msg_max_limit_max = HARD_MSGMAX; @@ -76,8 +77,43 @@ static int set_is_seen(struct ctl_table_set *set) return ¤t->nsproxy->ipc_ns->mq_set == set; } +static void mq_set_ownership(struct ctl_table_header *head, + struct ctl_table *table, + kuid_t *uid, kgid_t *gid) +{ + struct ipc_namespace *ns = + container_of(head->set, struct ipc_namespace, mq_set); + + kuid_t ns_root_uid = make_kuid(ns->user_ns, 0); + kgid_t ns_root_gid = make_kgid(ns->user_ns, 0); + + *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID; + *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID; +} + +static int mq_permissions(struct ctl_table_header *head, struct ctl_table *table) +{ + int mode = table->mode; + kuid_t ns_root_uid; + kgid_t ns_root_gid; + + mq_set_ownership(head, table, &ns_root_uid, &ns_root_gid); + + if (uid_eq(current_euid(), ns_root_uid)) + mode >>= 6; + + else if (in_egroup_p(ns_root_gid)) + mode >>= 3; + + mode &= 7; + + return (mode << 6) | (mode << 3) | mode; +} + static struct ctl_table_root set_root = { .lookup = set_lookup, + .permissions = mq_permissions, + .set_ownership = mq_set_ownership, }; bool setup_mq_sysctls(struct ipc_namespace *ns) From cf3a73eeb59bbc953cdbca1ba4684fd05e370509 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 15 Mar 2024 19:11:30 +0100 Subject: [PATCH 06/86] sysctl: treewide: drop unused argument ctl_table_root::set_ownership(table) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 520713a93d550406dae14d49cdb8778d70cecdfd ] Remove the 'table' argument from set_ownership as it is never used. This change is a step towards putting "struct ctl_table" into .rodata and eventually having sysctl core only use "const struct ctl_table". The patch was created with the following coccinelle script: @@ identifier func, head, table, uid, gid; @@ void func( struct ctl_table_header *head, - struct ctl_table *table, kuid_t *uid, kgid_t *gid) { ... } No additional occurrences of 'set_ownership' were found after doing a tree-wide search. Reviewed-by: Joel Granados Signed-off-by: Thomas Weißschuh Signed-off-by: Joel Granados Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") Signed-off-by: Sasha Levin --- fs/proc/proc_sysctl.c | 2 +- include/linux/sysctl.h | 1 - ipc/ipc_sysctl.c | 3 +-- ipc/mq_sysctl.c | 3 +-- net/sysctl_net.c | 1 - 5 files changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 4a4c04a3b1a0..c468cc0f6d69 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -484,7 +484,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, } if (root->set_ownership) - root->set_ownership(head, table, &inode->i_uid, &inode->i_gid); + root->set_ownership(head, &inode->i_uid, &inode->i_gid); else { inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index a207c7ed41bd..9f24feb94b24 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -185,7 +185,6 @@ struct ctl_table_root { struct ctl_table_set default_set; struct ctl_table_set *(*lookup)(struct ctl_table_root *root); void (*set_ownership)(struct ctl_table_header *head, - struct ctl_table *table, kuid_t *uid, kgid_t *gid); int (*permissions)(struct ctl_table_header *head, struct ctl_table *table); }; diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 29c1d3ae2a5c..d7ca2bdae9e8 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -192,7 +192,6 @@ static int set_is_seen(struct ctl_table_set *set) } static void ipc_set_ownership(struct ctl_table_header *head, - struct ctl_table *table, kuid_t *uid, kgid_t *gid) { struct ipc_namespace *ns = @@ -224,7 +223,7 @@ static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *tabl kuid_t ns_root_uid; kgid_t ns_root_gid; - ipc_set_ownership(head, table, &ns_root_uid, &ns_root_gid); + ipc_set_ownership(head, &ns_root_uid, &ns_root_gid); if (uid_eq(current_euid(), ns_root_uid)) mode >>= 6; diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index ce03930aced5..c960691fc24d 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c @@ -78,7 +78,6 @@ static int set_is_seen(struct ctl_table_set *set) } static void mq_set_ownership(struct ctl_table_header *head, - struct ctl_table *table, kuid_t *uid, kgid_t *gid) { struct ipc_namespace *ns = @@ -97,7 +96,7 @@ static int mq_permissions(struct ctl_table_header *head, struct ctl_table *table kuid_t ns_root_uid; kgid_t ns_root_gid; - mq_set_ownership(head, table, &ns_root_uid, &ns_root_gid); + mq_set_ownership(head, &ns_root_uid, &ns_root_gid); if (uid_eq(current_euid(), ns_root_uid)) mode >>= 6; diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 4b45ed631eb8..2edb8040eb6c 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -54,7 +54,6 @@ static int net_ctl_permissions(struct ctl_table_header *head, } static void net_ctl_set_ownership(struct ctl_table_header *head, - struct ctl_table *table, kuid_t *uid, kgid_t *gid) { struct net *net = container_of(head->set, struct net, sysctls); From 1deae34db9f4f8e0e03f891be2e2e15c15c8ac05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 2 Apr 2024 23:10:34 +0200 Subject: [PATCH 07/86] sysctl: always initialize i_uid/i_gid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 98ca62ba9e2be5863c7d069f84f7166b45a5b2f4 ] Always initialize i_uid/i_gid inside the sysfs core so set_ownership() can safely skip setting them. Commit 5ec27ec735ba ("fs/proc/proc_sysctl.c: fix the default values of i_uid/i_gid on /proc/sys inodes.") added defaults for i_uid/i_gid when set_ownership() was not implemented. It also missed adjusting net_ctl_set_ownership() to use the same default values in case the computation of a better value failed. Fixes: 5ec27ec735ba ("fs/proc/proc_sysctl.c: fix the default values of i_uid/i_gid on /proc/sys inodes.") Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Signed-off-by: Joel Granados Signed-off-by: Sasha Levin --- fs/proc/proc_sysctl.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index c468cc0f6d69..df77a7bcce49 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -483,12 +483,10 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, make_empty_dir_inode(inode); } + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; if (root->set_ownership) root->set_ownership(head, &inode->i_uid, &inode->i_gid); - else { - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; - } return inode; } From d5bebdaa0fd236c636b9991198a24bfe87a8e45f Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Mon, 24 Apr 2023 11:38:45 +0800 Subject: [PATCH 08/86] ext4: make ext4_es_insert_extent() return void [ Upstream commit 6c120399cde6b1b5cf65ce403765c579fb3d3e50 ] Now ext4_es_insert_extent() never return error, so make it return void. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230424033846.4732-12-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Stable-dep-of: 0ea6560abb3b ("ext4: check the extent status again before inserting delalloc block") Signed-off-by: Sasha Levin --- fs/ext4/extents.c | 5 +++-- fs/ext4/extents_status.c | 14 ++++++-------- fs/ext4/extents_status.h | 6 +++--- fs/ext4/inode.c | 21 ++++++--------------- 4 files changed, 18 insertions(+), 28 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 67af684e44e6..5cbe5ae5ad4a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3113,8 +3113,9 @@ static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex) if (ee_len == 0) return 0; - return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, - EXTENT_STATUS_WRITTEN); + ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, + EXTENT_STATUS_WRITTEN); + return 0; } /* FIXME!! we need to try to merge to left or right after zero-out */ diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 9766d3b21ca2..592229027af7 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -847,12 +847,10 @@ out: /* * ext4_es_insert_extent() adds information to an inode's extent * status tree. - * - * Return 0 on success, error code on failure. */ -int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, - ext4_lblk_t len, ext4_fsblk_t pblk, - unsigned int status) +void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, + ext4_lblk_t len, ext4_fsblk_t pblk, + unsigned int status) { struct extent_status newes; ext4_lblk_t end = lblk + len - 1; @@ -864,13 +862,13 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, bool revise_pending = false; if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) - return 0; + return; es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n", lblk, len, pblk, status, inode->i_ino); if (!len) - return 0; + return; BUG_ON(end < lblk); @@ -939,7 +937,7 @@ error: goto retry; ext4_es_print_tree(inode); - return 0; + return; } /* diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index 4ec30a798260..481ec4381bee 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h @@ -127,9 +127,9 @@ extern int __init ext4_init_es(void); extern void ext4_exit_es(void); extern void ext4_es_init_tree(struct ext4_es_tree *tree); -extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, - ext4_lblk_t len, ext4_fsblk_t pblk, - unsigned int status); +extern void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, + ext4_lblk_t len, ext4_fsblk_t pblk, + unsigned int status); extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk, unsigned int status); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2479508deab3..6dc15ad45ac9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -595,10 +595,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk, map->m_lblk + map->m_len - 1)) status |= EXTENT_STATUS_DELAYED; - ret = ext4_es_insert_extent(inode, map->m_lblk, - map->m_len, map->m_pblk, status); - if (ret < 0) - retval = ret; + ext4_es_insert_extent(inode, map->m_lblk, map->m_len, + map->m_pblk, status); } up_read((&EXT4_I(inode)->i_data_sem)); @@ -707,12 +705,8 @@ found: ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk, map->m_lblk + map->m_len - 1)) status |= EXTENT_STATUS_DELAYED; - ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, - map->m_pblk, status); - if (ret < 0) { - retval = ret; - goto out_sem; - } + ext4_es_insert_extent(inode, map->m_lblk, map->m_len, + map->m_pblk, status); } out_sem: @@ -1812,7 +1806,6 @@ add_delayed: set_buffer_new(bh); set_buffer_delay(bh); } else if (retval > 0) { - int ret; unsigned int status; if (unlikely(retval != map->m_len)) { @@ -1825,10 +1818,8 @@ add_delayed: status = map->m_flags & EXT4_MAP_UNWRITTEN ? EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; - ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, - map->m_pblk, status); - if (ret != 0) - retval = ret; + ext4_es_insert_extent(inode, map->m_lblk, map->m_len, + map->m_pblk, status); } out_unlock: From 67e16594dc75225435a9962d742400b59befa8c5 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:00 +0800 Subject: [PATCH 09/86] ext4: refactor ext4_da_map_blocks() [ Upstream commit 3fcc2b887a1ba4c1f45319cd8c54daa263ecbc36 ] Refactor and cleanup ext4_da_map_blocks(), reduce some unnecessary parameters and branches, no logic changes. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-2-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o Stable-dep-of: 0ea6560abb3b ("ext4: check the extent status again before inserting delalloc block") Signed-off-by: Sasha Levin --- fs/ext4/inode.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6dc15ad45ac9..eab9aefe96ce 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1741,7 +1741,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { if (ext4_es_is_hole(&es)) { - retval = 0; down_read(&EXT4_I(inode)->i_data_sem); goto add_delayed; } @@ -1786,26 +1785,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, retval = ext4_ext_map_blocks(NULL, inode, map, 0); else retval = ext4_ind_map_blocks(NULL, inode, map, 0); - -add_delayed: - if (retval == 0) { - int ret; - - /* - * XXX: __block_prepare_write() unmaps passed block, - * is it OK? - */ - - ret = ext4_insert_delayed_block(inode, map->m_lblk); - if (ret != 0) { - retval = ret; - goto out_unlock; - } - - map_bh(bh, inode->i_sb, invalid_block); - set_buffer_new(bh); - set_buffer_delay(bh); - } else if (retval > 0) { + if (retval < 0) + goto out_unlock; + if (retval > 0) { unsigned int status; if (unlikely(retval != map->m_len)) { @@ -1820,11 +1802,24 @@ add_delayed: EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; ext4_es_insert_extent(inode, map->m_lblk, map->m_len, map->m_pblk, status); + goto out_unlock; } +add_delayed: + /* + * XXX: __block_prepare_write() unmaps passed block, + * is it OK? + */ + retval = ext4_insert_delayed_block(inode, map->m_lblk); + if (retval) + goto out_unlock; + + map_bh(bh, inode->i_sb, invalid_block); + set_buffer_new(bh); + set_buffer_delay(bh); + out_unlock: up_read((&EXT4_I(inode)->i_data_sem)); - return retval; } From 00fb5e17b41f8dae8058bc11f1c46b3cea65ccad Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:01 +0800 Subject: [PATCH 10/86] ext4: convert to exclusive lock while inserting delalloc extents [ Upstream commit acf795dc161f3cf481db20f05db4250714e375e5 ] ext4_da_map_blocks() only hold i_data_sem in shared mode and i_rwsem when inserting delalloc extents, it could be raced by another querying path of ext4_map_blocks() without i_rwsem, .e.g buffered read path. Suppose we buffered read a file containing just a hole, and without any cached extents tree, then it is raced by another delayed buffered write to the same area or the near area belongs to the same hole, and the new delalloc extent could be overwritten to a hole extent. pread() pwrite() filemap_read_folio() ext4_mpage_readpages() ext4_map_blocks() down_read(i_data_sem) ext4_ext_determine_hole() //find hole ext4_ext_put_gap_in_cache() ext4_es_find_extent_range() //no delalloc extent ext4_da_map_blocks() down_read(i_data_sem) ext4_insert_delayed_block() //insert delalloc extent ext4_es_insert_extent() //overwrite delalloc extent to hole This race could lead to inconsistent delalloc extents tree and incorrect reserved space counter. Fix this by converting to hold i_data_sem in exclusive mode when adding a new delalloc extent in ext4_da_map_blocks(). Cc: stable@vger.kernel.org Signed-off-by: Zhang Yi Suggested-by: Jan Kara Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-3-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o Stable-dep-of: 0ea6560abb3b ("ext4: check the extent status again before inserting delalloc block") Signed-off-by: Sasha Levin --- fs/ext4/inode.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index eab9aefe96ce..d6f7525a796c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1740,10 +1740,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { - if (ext4_es_is_hole(&es)) { - down_read(&EXT4_I(inode)->i_data_sem); + if (ext4_es_is_hole(&es)) goto add_delayed; - } /* * Delayed extent could be allocated by fallocate. @@ -1785,8 +1783,10 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, retval = ext4_ext_map_blocks(NULL, inode, map, 0); else retval = ext4_ind_map_blocks(NULL, inode, map, 0); - if (retval < 0) - goto out_unlock; + if (retval < 0) { + up_read(&EXT4_I(inode)->i_data_sem); + return retval; + } if (retval > 0) { unsigned int status; @@ -1802,24 +1802,21 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; ext4_es_insert_extent(inode, map->m_lblk, map->m_len, map->m_pblk, status); - goto out_unlock; + up_read(&EXT4_I(inode)->i_data_sem); + return retval; } + up_read(&EXT4_I(inode)->i_data_sem); add_delayed: - /* - * XXX: __block_prepare_write() unmaps passed block, - * is it OK? - */ + down_write(&EXT4_I(inode)->i_data_sem); retval = ext4_insert_delayed_block(inode, map->m_lblk); + up_write(&EXT4_I(inode)->i_data_sem); if (retval) - goto out_unlock; + return retval; map_bh(bh, inode->i_sb, invalid_block); set_buffer_new(bh); set_buffer_delay(bh); - -out_unlock: - up_read((&EXT4_I(inode)->i_data_sem)); return retval; } From 3f90e9c3880d1ddd96ae0e353005c0fadea16231 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Fri, 17 May 2024 20:39:56 +0800 Subject: [PATCH 11/86] ext4: factor out a common helper to query extent map [ Upstream commit 8e4e5cdf2fdeb99445a468b6b6436ad79b9ecb30 ] Factor out a new common helper ext4_map_query_blocks() from the ext4_da_map_blocks(), it query and return the extent map status on the inode's extent path, no logic changes. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Reviewed-by: Ritesh Harjani (IBM) Link: https://patch.msgid.link/20240517124005.347221-2-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o Stable-dep-of: 0ea6560abb3b ("ext4: check the extent status again before inserting delalloc block") Signed-off-by: Sasha Levin --- fs/ext4/inode.c | 57 +++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d6f7525a796c..a0c6a173c14d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -481,6 +481,35 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, } #endif /* ES_AGGRESSIVE_TEST */ +static int ext4_map_query_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map) +{ + unsigned int status; + int retval; + + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) + retval = ext4_ext_map_blocks(handle, inode, map, 0); + else + retval = ext4_ind_map_blocks(handle, inode, map, 0); + + if (retval <= 0) + return retval; + + if (unlikely(retval != map->m_len)) { + ext4_warning(inode->i_sb, + "ES len assertion failed for inode " + "%lu: retval %d != map->m_len %d", + inode->i_ino, retval, map->m_len); + WARN_ON(1); + } + + status = map->m_flags & EXT4_MAP_UNWRITTEN ? + EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; + ext4_es_insert_extent(inode, map->m_lblk, map->m_len, + map->m_pblk, status); + return retval; +} + /* * The ext4_map_blocks() function tries to look up the requested blocks, * and returns if the blocks are already mapped. @@ -1779,33 +1808,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, down_read(&EXT4_I(inode)->i_data_sem); if (ext4_has_inline_data(inode)) retval = 0; - else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) - retval = ext4_ext_map_blocks(NULL, inode, map, 0); else - retval = ext4_ind_map_blocks(NULL, inode, map, 0); - if (retval < 0) { - up_read(&EXT4_I(inode)->i_data_sem); - return retval; - } - if (retval > 0) { - unsigned int status; - - if (unlikely(retval != map->m_len)) { - ext4_warning(inode->i_sb, - "ES len assertion failed for inode " - "%lu: retval %d != map->m_len %d", - inode->i_ino, retval, map->m_len); - WARN_ON(1); - } - - status = map->m_flags & EXT4_MAP_UNWRITTEN ? - EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; - ext4_es_insert_extent(inode, map->m_lblk, map->m_len, - map->m_pblk, status); - up_read(&EXT4_I(inode)->i_data_sem); - return retval; - } + retval = ext4_map_query_blocks(NULL, inode, map); up_read(&EXT4_I(inode)->i_data_sem); + if (retval) + return retval; add_delayed: down_write(&EXT4_I(inode)->i_data_sem); From e99c372243ffaa3cc0e32dce1b6accd9e3d54825 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Fri, 17 May 2024 20:39:57 +0800 Subject: [PATCH 12/86] ext4: check the extent status again before inserting delalloc block [ Upstream commit 0ea6560abb3bac1ffcfa4bf6b2c4d344fdc27b3c ] ext4_da_map_blocks looks up for any extent entry in the extent status tree (w/o i_data_sem) and then the looks up for any ondisk extent mapping (with i_data_sem in read mode). If it finds a hole in the extent status tree or if it couldn't find any entry at all, it then takes the i_data_sem in write mode to add a da entry into the extent status tree. This can actually race with page mkwrite & fallocate path. Note that this is ok between 1. ext4 buffered-write path v/s ext4_page_mkwrite(), because of the folio lock 2. ext4 buffered write path v/s ext4 fallocate because of the inode lock. But this can race between ext4_page_mkwrite() & ext4 fallocate path ext4_page_mkwrite() ext4_fallocate() block_page_mkwrite() ext4_da_map_blocks() //find hole in extent status tree ext4_alloc_file_blocks() ext4_map_blocks() //allocate block and unwritten extent ext4_insert_delayed_block() ext4_da_reserve_space() //reserve one more block ext4_es_insert_delayed_block() //drop unwritten extent and add delayed extent by mistake Then, the delalloc extent is wrong until writeback and the extra reserved block can't be released any more and it triggers below warning: EXT4-fs (pmem2): Inode 13 (00000000bbbd4d23): i_reserved_data_blocks(1) not cleared! Fix the problem by looking up extent status tree again while the i_data_sem is held in write mode. If it still can't find any entry, then we insert a new da entry into the extent status tree. Cc: stable@vger.kernel.org Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://patch.msgid.link/20240517124005.347221-3-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/inode.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a0c6a173c14d..93a1c22048de 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1772,6 +1772,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, if (ext4_es_is_hole(&es)) goto add_delayed; +found: /* * Delayed extent could be allocated by fallocate. * So we need to check it. @@ -1816,6 +1817,26 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, add_delayed: down_write(&EXT4_I(inode)->i_data_sem); + /* + * Page fault path (ext4_page_mkwrite does not take i_rwsem) + * and fallocate path (no folio lock) can race. Make sure we + * lookup the extent status tree here again while i_data_sem + * is held in write mode, before inserting a new da entry in + * the extent status tree. + */ + if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { + if (!ext4_es_is_hole(&es)) { + up_write(&EXT4_I(inode)->i_data_sem); + goto found; + } + } else if (!ext4_has_inline_data(inode)) { + retval = ext4_map_query_blocks(NULL, inode, map); + if (retval) { + up_write(&EXT4_I(inode)->i_data_sem); + return retval; + } + } + retval = ext4_insert_delayed_block(inode, map->m_lblk); up_write(&EXT4_I(inode)->i_data_sem); if (retval) From 53ce6578cd7db4c172f1411f91a791db99baa529 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jul 2023 17:33:18 +0800 Subject: [PATCH 13/86] cpufreq: qcom-nvmem: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 402732324b17a31f7e5dce9659d1b1f049fd65d3 ] The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Cc: Uwe Kleine-König Signed-off-by: Yangtao Li Signed-off-by: Viresh Kumar Stable-dep-of: d01c84b97f19 ("cpufreq: qcom-nvmem: fix memory leaks in probe error paths") Signed-off-by: Sasha Levin --- drivers/cpufreq/qcom-cpufreq-nvmem.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index cb03bfb0435e..91634b84baa8 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -374,7 +374,7 @@ free_drv: return ret; } -static int qcom_cpufreq_remove(struct platform_device *pdev) +static void qcom_cpufreq_remove(struct platform_device *pdev) { struct qcom_cpufreq_drv *drv = platform_get_drvdata(pdev); unsigned int cpu; @@ -386,13 +386,11 @@ static int qcom_cpufreq_remove(struct platform_device *pdev) kfree(drv->opp_tokens); kfree(drv); - - return 0; } static struct platform_driver qcom_cpufreq_driver = { .probe = qcom_cpufreq_probe, - .remove = qcom_cpufreq_remove, + .remove_new = qcom_cpufreq_remove, .driver = { .name = "qcom-cpufreq-nvmem", }, From f5bbfc12b0d93c3d9e9e6e219f814f8e4411424a Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 18 Oct 2023 10:06:02 +0200 Subject: [PATCH 14/86] cpufreq: qcom-nvmem: Simplify driver data allocation [ Upstream commit 2a5d46c3ad6b0e62d2b04356ad999d504fb564e0 ] Simplify the allocation and cleanup of driver data by using devm together with a flexible array. Prepare for adding additional per-CPU data by defining a struct qcom_cpufreq_drv_cpu instead of storing the opp_tokens directly. Signed-off-by: Stephan Gerhold Reviewed-by: Konrad Dybcio Signed-off-by: Viresh Kumar Stable-dep-of: d01c84b97f19 ("cpufreq: qcom-nvmem: fix memory leaks in probe error paths") Signed-off-by: Sasha Levin --- drivers/cpufreq/qcom-cpufreq-nvmem.c | 49 ++++++++++------------------ 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index 91634b84baa8..983991c0afd5 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -53,10 +53,14 @@ struct qcom_cpufreq_match_data { const char **genpd_names; }; +struct qcom_cpufreq_drv_cpu { + int opp_token; +}; + struct qcom_cpufreq_drv { - int *opp_tokens; u32 versions; const struct qcom_cpufreq_match_data *data; + struct qcom_cpufreq_drv_cpu cpus[]; }; static struct platform_device *cpufreq_dt_pdev, *cpufreq_pdev; @@ -284,42 +288,32 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) return -ENOENT; } - drv = kzalloc(sizeof(*drv), GFP_KERNEL); + drv = devm_kzalloc(&pdev->dev, struct_size(drv, cpus, num_possible_cpus()), + GFP_KERNEL); if (!drv) return -ENOMEM; match = pdev->dev.platform_data; drv->data = match->data; - if (!drv->data) { - ret = -ENODEV; - goto free_drv; - } + if (!drv->data) + return -ENODEV; if (drv->data->get_version) { speedbin_nvmem = of_nvmem_cell_get(np, NULL); - if (IS_ERR(speedbin_nvmem)) { - ret = dev_err_probe(cpu_dev, PTR_ERR(speedbin_nvmem), - "Could not get nvmem cell\n"); - goto free_drv; - } + if (IS_ERR(speedbin_nvmem)) + return dev_err_probe(cpu_dev, PTR_ERR(speedbin_nvmem), + "Could not get nvmem cell\n"); ret = drv->data->get_version(cpu_dev, speedbin_nvmem, &pvs_name, drv); if (ret) { nvmem_cell_put(speedbin_nvmem); - goto free_drv; + return ret; } nvmem_cell_put(speedbin_nvmem); } of_node_put(np); - drv->opp_tokens = kcalloc(num_possible_cpus(), sizeof(*drv->opp_tokens), - GFP_KERNEL); - if (!drv->opp_tokens) { - ret = -ENOMEM; - goto free_drv; - } - for_each_possible_cpu(cpu) { struct dev_pm_opp_config config = { .supported_hw = NULL, @@ -345,9 +339,9 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) } if (config.supported_hw || config.genpd_names) { - drv->opp_tokens[cpu] = dev_pm_opp_set_config(cpu_dev, &config); - if (drv->opp_tokens[cpu] < 0) { - ret = drv->opp_tokens[cpu]; + drv->cpus[cpu].opp_token = dev_pm_opp_set_config(cpu_dev, &config); + if (drv->cpus[cpu].opp_token < 0) { + ret = drv->cpus[cpu].opp_token; dev_err(cpu_dev, "Failed to set OPP config\n"); goto free_opp; } @@ -366,11 +360,7 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) free_opp: for_each_possible_cpu(cpu) - dev_pm_opp_clear_config(drv->opp_tokens[cpu]); - kfree(drv->opp_tokens); -free_drv: - kfree(drv); - + dev_pm_opp_clear_config(drv->cpus[cpu].opp_token); return ret; } @@ -382,10 +372,7 @@ static void qcom_cpufreq_remove(struct platform_device *pdev) platform_device_unregister(cpufreq_dt_pdev); for_each_possible_cpu(cpu) - dev_pm_opp_clear_config(drv->opp_tokens[cpu]); - - kfree(drv->opp_tokens); - kfree(drv); + dev_pm_opp_clear_config(drv->cpus[cpu].opp_token); } static struct platform_driver qcom_cpufreq_driver = { From 7cde123b32c8b70caf6ae292769864fb0bc51bde Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Thu, 23 May 2024 23:24:59 +0200 Subject: [PATCH 15/86] cpufreq: qcom-nvmem: fix memory leaks in probe error paths [ Upstream commit d01c84b97f19f1137211e90b0a910289a560019e ] The code refactoring added new error paths between the np device node allocation and the call to of_node_put(), which leads to memory leaks if any of those errors occur. Add the missing of_node_put() in the error paths that require it. Cc: stable@vger.kernel.org Fixes: 57f2f8b4aa0c ("cpufreq: qcom: Refactor the driver to make it easier to extend") Signed-off-by: Javier Carrasco Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/qcom-cpufreq-nvmem.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index 983991c0afd5..2edb66097cdb 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -290,23 +290,30 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) drv = devm_kzalloc(&pdev->dev, struct_size(drv, cpus, num_possible_cpus()), GFP_KERNEL); - if (!drv) + if (!drv) { + of_node_put(np); return -ENOMEM; + } match = pdev->dev.platform_data; drv->data = match->data; - if (!drv->data) + if (!drv->data) { + of_node_put(np); return -ENODEV; + } if (drv->data->get_version) { speedbin_nvmem = of_nvmem_cell_get(np, NULL); - if (IS_ERR(speedbin_nvmem)) + if (IS_ERR(speedbin_nvmem)) { + of_node_put(np); return dev_err_probe(cpu_dev, PTR_ERR(speedbin_nvmem), "Could not get nvmem cell\n"); + } ret = drv->data->get_version(cpu_dev, speedbin_nvmem, &pvs_name, drv); if (ret) { + of_node_put(np); nvmem_cell_put(speedbin_nvmem); return ret; } From b4e147d3f1fe835c9d9334c7eab83c21ff2d0149 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 8 Dec 2023 23:56:41 +0100 Subject: [PATCH 16/86] leds: trigger: Remove unused function led_trigger_rename_static() [ Upstream commit c82a1662d4548c454de5343b88f69b9fc82266b3 ] This function was added with a8df7b1ab70b ("leds: add led_trigger_rename function") 11 yrs ago, but it has no users. So remove it. Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/d90f30be-f661-4db7-b0b5-d09d07a78a68@gmail.com Signed-off-by: Lee Jones Stable-dep-of: ab477b766edd ("leds: triggers: Flush pending brightness before activating trigger") Signed-off-by: Sasha Levin --- drivers/leds/led-triggers.c | 13 ------------- include/linux/leds.h | 17 ----------------- 2 files changed, 30 deletions(-) diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c index 024b73f84ce0..dddfc301d341 100644 --- a/drivers/leds/led-triggers.c +++ b/drivers/leds/led-triggers.c @@ -268,19 +268,6 @@ void led_trigger_set_default(struct led_classdev *led_cdev) } EXPORT_SYMBOL_GPL(led_trigger_set_default); -void led_trigger_rename_static(const char *name, struct led_trigger *trig) -{ - /* new name must be on a temporary string to prevent races */ - BUG_ON(name == trig->name); - - down_write(&triggers_list_lock); - /* this assumes that trig->name was originaly allocated to - * non constant storage */ - strcpy((char *)trig->name, name); - up_write(&triggers_list_lock); -} -EXPORT_SYMBOL_GPL(led_trigger_rename_static); - /* LED Trigger Interface */ int led_trigger_register(struct led_trigger *trig) diff --git a/include/linux/leds.h b/include/linux/leds.h index ba4861ec73d3..2bbff7519b73 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -409,23 +409,6 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev) return led_cdev->trigger_data; } -/** - * led_trigger_rename_static - rename a trigger - * @name: the new trigger name - * @trig: the LED trigger to rename - * - * Change a LED trigger name by copying the string passed in - * name into current trigger name, which MUST be large - * enough for the new string. - * - * Note that name must NOT point to the same string used - * during LED registration, as that could lead to races. - * - * This is meant to be used on triggers with statically - * allocated name. - */ -void led_trigger_rename_static(const char *name, struct led_trigger *trig); - #define module_led_trigger(__led_trigger) \ module_driver(__led_trigger, led_trigger_register, \ led_trigger_unregister) From 2bc78ff25fca21b7899b14d43655cec2ea24e22f Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 4 Mar 2024 21:57:30 +0100 Subject: [PATCH 17/86] leds: trigger: Store brightness set by led_trigger_event() [ Upstream commit 822c91e72eac568ed8d83765634f00decb45666c ] If a simple trigger is assigned to a LED, then the LED may be off until the next led_trigger_event() call. This may be an issue for simple triggers with rare led_trigger_event() calls, e.g. power supply charging indicators (drivers/power/supply/power_supply_leds.c). Therefore persist the brightness value of the last led_trigger_event() call and use this value if the trigger is assigned to a LED. In addition add a getter for the trigger brightness value. Signed-off-by: Heiner Kallweit Reviewed-by: Takashi Iwai Link: https://lore.kernel.org/r/b1358b25-3f30-458d-8240-5705ae007a8a@gmail.com Signed-off-by: Lee Jones Stable-dep-of: ab477b766edd ("leds: triggers: Flush pending brightness before activating trigger") Signed-off-by: Sasha Levin --- drivers/leds/led-triggers.c | 6 ++++-- include/linux/leds.h | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c index dddfc301d341..cdb446cb84af 100644 --- a/drivers/leds/led-triggers.c +++ b/drivers/leds/led-triggers.c @@ -193,11 +193,11 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig) spin_unlock(&trig->leddev_list_lock); led_cdev->trigger = trig; + ret = 0; if (trig->activate) ret = trig->activate(led_cdev); else - ret = 0; - + led_set_brightness(led_cdev, trig->brightness); if (ret) goto err_activate; @@ -372,6 +372,8 @@ void led_trigger_event(struct led_trigger *trig, if (!trig) return; + trig->brightness = brightness; + rcu_read_lock(); list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list) led_set_brightness(led_cdev, brightness); diff --git a/include/linux/leds.h b/include/linux/leds.h index 2bbff7519b73..79ab2dfd3c72 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -356,6 +356,9 @@ struct led_trigger { int (*activate)(struct led_classdev *led_cdev); void (*deactivate)(struct led_classdev *led_cdev); + /* Brightness set by led_trigger_event */ + enum led_brightness brightness; + /* LED-private triggers have this set */ struct led_hw_trigger_type *trigger_type; @@ -409,6 +412,12 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev) return led_cdev->trigger_data; } +static inline enum led_brightness +led_trigger_get_brightness(const struct led_trigger *trigger) +{ + return trigger ? trigger->brightness : LED_OFF; +} + #define module_led_trigger(__led_trigger) \ module_driver(__led_trigger, led_trigger_register, \ led_trigger_unregister) @@ -445,6 +454,12 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev) return NULL; } +static inline enum led_brightness +led_trigger_get_brightness(const struct led_trigger *trigger) +{ + return LED_OFF; +} + #endif /* CONFIG_LEDS_TRIGGERS */ /* Trigger specific functions */ From c3f8e2ec3c7b7d77c6dc7f288b3555fa9f6a4f24 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 31 May 2024 14:01:24 +0200 Subject: [PATCH 18/86] leds: trigger: Call synchronize_rcu() before calling trig->activate() [ Upstream commit b1bbd20f35e19774ea01989320495e09ac44fba3 ] Some triggers call led_trigger_event() from their activate() callback to initialize the brightness of the LED for which the trigger is being activated. In order for the LED's initial state to be set correctly this requires that the led_trigger_event() call uses the new version of trigger->led_cdevs, which has the new LED. AFAICT led_trigger_event() will always use the new version when it is running on the same CPU as where the list_add_tail_rcu() call was made, which is why the missing synchronize_rcu() has not lead to bug reports. But if activate() is pre-empted, sleeps or uses a worker then the led_trigger_event() call may run on another CPU which may still use the old trigger->led_cdevs list. Add a synchronize_rcu() call to ensure that any led_trigger_event() calls done from activate() always use the new list. Triggers using led_trigger_event() from their activate() callback are: net/bluetooth/leds.c, net/rfkill/core.c and drivers/tty/vt/keyboard.c. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240531120124.75662-1-hdegoede@redhat.com Signed-off-by: Lee Jones Stable-dep-of: ab477b766edd ("leds: triggers: Flush pending brightness before activating trigger") Signed-off-by: Sasha Levin --- drivers/leds/led-triggers.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c index cdb446cb84af..fe7fb2e7149c 100644 --- a/drivers/leds/led-triggers.c +++ b/drivers/leds/led-triggers.c @@ -193,6 +193,13 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig) spin_unlock(&trig->leddev_list_lock); led_cdev->trigger = trig; + /* + * Some activate() calls use led_trigger_event() to initialize + * the brightness of the LED for which the trigger is being set. + * Ensure the led_cdev is visible on trig->led_cdevs for this. + */ + synchronize_rcu(); + ret = 0; if (trig->activate) ret = trig->activate(led_cdev); From 7118f979163db8066aa8411eb187d4cbebec60ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 13 Jun 2024 17:24:51 +0200 Subject: [PATCH 19/86] leds: triggers: Flush pending brightness before activating trigger MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ab477b766edd3bfb6321a6e3df4c790612613fae ] The race fixed in timer_trig_activate() between a blocking set_brightness() call and trigger->activate() can affect any trigger. So move the call to flush_work() into led_trigger_set() where it can avoid the race for all triggers. Fixes: 0db37915d912 ("leds: avoid races with workqueue") Fixes: 8c0f693c6eff ("leds: avoid flush_work in atomic context") Cc: stable@vger.kernel.org Tested-by: Dustin L. Howett Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20240613-led-trigger-flush-v2-1-f4f970799d77@weissschuh.net Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/leds/led-triggers.c | 6 ++++++ drivers/leds/trigger/ledtrig-timer.c | 5 ----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c index fe7fb2e7149c..3d3673c197e3 100644 --- a/drivers/leds/led-triggers.c +++ b/drivers/leds/led-triggers.c @@ -200,6 +200,12 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig) */ synchronize_rcu(); + /* + * If "set brightness to 0" is pending in workqueue, + * we don't want that to be reordered after ->activate() + */ + flush_work(&led_cdev->set_brightness_work); + ret = 0; if (trig->activate) ret = trig->activate(led_cdev); diff --git a/drivers/leds/trigger/ledtrig-timer.c b/drivers/leds/trigger/ledtrig-timer.c index b4688d1d9d2b..1d213c999d40 100644 --- a/drivers/leds/trigger/ledtrig-timer.c +++ b/drivers/leds/trigger/ledtrig-timer.c @@ -110,11 +110,6 @@ static int timer_trig_activate(struct led_classdev *led_cdev) led_cdev->flags &= ~LED_INIT_DEFAULT_TRIGGER; } - /* - * If "set brightness to 0" is pending in workqueue, we don't - * want that to be reordered after blink_set() - */ - flush_work(&led_cdev->set_brightness_work); led_blink_set(led_cdev, &led_cdev->blink_delay_on, &led_cdev->blink_delay_off); From 5eb41c3bf19aedc1eb90383de6e3a06f8842aaf9 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 16 Oct 2023 13:29:57 +0800 Subject: [PATCH 20/86] mm: restrict the pcp batch scale factor to avoid too long latency [ Upstream commit 52166607ecc980391b1fffbce0be3074a96d0c7b ] In page allocator, PCP (Per-CPU Pageset) is refilled and drained in batches to increase page allocation throughput, reduce page allocation/freeing latency per page, and reduce zone lock contention. But too large batch size will cause too long maximal allocation/freeing latency, which may punish arbitrary users. So the default batch size is chosen carefully (in zone_batchsize(), the value is 63 for zone > 1GB) to avoid that. In commit 3b12e7e97938 ("mm/page_alloc: scale the number of pages that are batch freed"), the batch size will be scaled for large number of page freeing to improve page freeing performance and reduce zone lock contention. Similar optimization can be used for large number of pages allocation too. To find out a suitable max batch scale factor (that is, max effective batch size), some tests and measurement on some machines were done as follows. A set of debug patches are implemented as follows, - Set PCP high to be 2 * batch to reduce the effect of PCP high - Disable free batch size scaling to get the raw performance. - The code with zone lock held is extracted from rmqueue_bulk() and free_pcppages_bulk() to 2 separate functions to make it easy to measure the function run time with ftrace function_graph tracer. - The batch size is hard coded to be 63 (default), 127, 255, 511, 1023, 2047, 4095. Then will-it-scale/page_fault1 is used to generate the page allocation/freeing workload. The page allocation/freeing throughput (page/s) is measured via will-it-scale. The page allocation/freeing average latency (alloc/free latency avg, in us) and allocation/freeing latency at 99 percentile (alloc/free latency 99%, in us) are measured with ftrace function_graph tracer. The test results are as follows, Sapphire Rapids Server ====================== Batch throughput free latency free latency alloc latency alloc latency page/s avg / us 99% / us avg / us 99% / us ----- ---------- ------------ ------------ ------------- ------------- 63 513633.4 2.33 3.57 2.67 6.83 127 517616.7 4.35 6.65 4.22 13.03 255 520822.8 8.29 13.32 7.52 25.24 511 524122.0 15.79 23.42 14.02 49.35 1023 525980.5 30.25 44.19 25.36 94.88 2047 526793.6 59.39 84.50 45.22 140.81 Ice Lake Server =============== Batch throughput free latency free latency alloc latency alloc latency page/s avg / us 99% / us avg / us 99% / us ----- ---------- ------------ ------------ ------------- ------------- 63 620210.3 2.21 3.68 2.02 4.35 127 627003.0 4.09 6.86 3.51 8.28 255 630777.5 7.70 13.50 6.17 15.97 511 633651.5 14.85 22.62 11.66 31.08 1023 637071.1 28.55 42.02 20.81 54.36 2047 638089.7 56.54 84.06 39.28 91.68 Cascade Lake Server =================== Batch throughput free latency free latency alloc latency alloc latency page/s avg / us 99% / us avg / us 99% / us ----- ---------- ------------ ------------ ------------- ------------- 63 404706.7 3.29 5.03 3.53 4.75 127 422475.2 6.12 9.09 6.36 8.76 255 411522.2 11.68 16.97 10.90 16.39 511 428124.1 22.54 31.28 19.86 32.25 1023 414718.4 43.39 62.52 40.00 66.33 2047 429848.7 86.64 120.34 71.14 106.08 Commet Lake Desktop =================== Batch throughput free latency free latency alloc latency alloc latency page/s avg / us 99% / us avg / us 99% / us ----- ---------- ------------ ------------ ------------- ------------- 63 795183.13 2.18 3.55 2.03 3.05 127 803067.85 3.91 6.56 3.85 5.52 255 812771.10 7.35 10.80 7.14 10.20 511 817723.48 14.17 27.54 13.43 30.31 1023 818870.19 27.72 40.10 27.89 46.28 Coffee Lake Desktop =================== Batch throughput free latency free latency alloc latency alloc latency page/s avg / us 99% / us avg / us 99% / us ----- ---------- ------------ ------------ ------------- ------------- 63 510542.8 3.13 4.40 2.48 3.43 127 514288.6 5.97 7.89 4.65 6.04 255 516889.7 11.86 15.58 8.96 12.55 511 519802.4 23.10 28.81 16.95 26.19 1023 520802.7 45.30 52.51 33.19 45.95 2047 519997.1 90.63 104.00 65.26 81.74 From the above data, to restrict the allocation/freeing latency to be less than 100 us in most times, the max batch scale factor needs to be less than or equal to 5. Although it is reasonable to use 5 as max batch scale factor for the systems tested, there are also slower systems. Where smaller value should be used to constrain the page allocation/freeing latency. So, in this patch, a new kconfig option (PCP_BATCH_SCALE_MAX) is added to set the max batch scale factor. Whose default value is 5, and users can reduce it when necessary. Link: https://lkml.kernel.org/r/20231016053002.756205-5-ying.huang@intel.com Signed-off-by: "Huang, Ying" Acked-by: Andrew Morton Acked-by: Mel Gorman Cc: Vlastimil Babka Cc: David Hildenbrand Cc: Johannes Weiner Cc: Dave Hansen Cc: Michal Hocko Cc: Pavel Tatashin Cc: Matthew Wilcox Cc: Christoph Lameter Cc: Arjan van de Ven Cc: Sudeep Holla Signed-off-by: Andrew Morton Stable-dep-of: 66eca1021a42 ("mm/page_alloc: fix pcp->count race between drain_pages_zone() vs __rmqueue_pcplist()") Signed-off-by: Sasha Levin --- mm/Kconfig | 11 +++++++++++ mm/page_alloc.c | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index 35109a4a2f7c..a65145fe89f2 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -627,6 +627,17 @@ config HUGETLB_PAGE_SIZE_VARIABLE config CONTIG_ALLOC def_bool (MEMORY_ISOLATION && COMPACTION) || CMA +config PCP_BATCH_SCALE_MAX + int "Maximum scale factor of PCP (Per-CPU pageset) batch allocate/free" + default 5 + range 0 6 + help + In page allocator, PCP (Per-CPU pageset) is refilled and drained in + batches. The batch number is scaled automatically to improve page + allocation/free throughput. But too large scale factor may hurt + latency. This option sets the upper limit of scale factor to limit + the maximum latency. + config PHYS_ADDR_T_64BIT def_bool 64BIT diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 12412263d131..8eaf51257db5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3389,7 +3389,7 @@ static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch, * freeing of pages without any allocation. */ batch <<= pcp->free_factor; - if (batch < max_nr_free) + if (batch < max_nr_free && pcp->free_factor < CONFIG_PCP_BATCH_SCALE_MAX) pcp->free_factor++; batch = clamp(batch, min_nr_free, max_nr_free); From 0fd304a885c38425010206cae032febb4fb29f17 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 18 Mar 2024 21:07:36 +0100 Subject: [PATCH 21/86] mm: page_alloc: control latency caused by zone PCP draining [ Upstream commit 55f77df7d715110299f12c27f4365bd6332d1adb ] Patch series "mm/treewide: Remove pXd_huge() API", v2. In previous work [1], we removed the pXd_large() API, which is arch specific. This patchset further removes the hugetlb pXd_huge() API. Hugetlb was never special on creating huge mappings when compared with other huge mappings. Having a standalone API just to detect such pgtable entries is more or less redundant, especially after the pXd_leaf() API set is introduced with/without CONFIG_HUGETLB_PAGE. When looking at this problem, a few issues are also exposed that we don't have a clear definition of the *_huge() variance API. This patchset started by cleaning these issues first, then replace all *_huge() users to use *_leaf(), then drop all *_huge() code. On x86/sparc, swap entries will be reported "true" in pXd_huge(), while for all the rest archs they're reported "false" instead. This part is done in patch 1-5, in which I suspect patch 1 can be seen as a bug fix, but I'll leave that to hmm experts to decide. Besides, there are three archs (arm, arm64, powerpc) that have slightly different definitions between the *_huge() v.s. *_leaf() variances. I tackled them separately so that it'll be easier for arch experts to chim in when necessary. This part is done in patch 6-9. The final patches 10-14 do the rest on the final removal, since *_leaf() will be the ultimate API in the future, and we seem to have quite some confusions on how *_huge() APIs can be defined, provide a rich comment for *_leaf() API set to define them properly to avoid future misuse, and hopefully that'll also help new archs to start support huge mappings and avoid traps (like either swap entries, or PROT_NONE entry checks). [1] https://lore.kernel.org/r/20240305043750.93762-1-peterx@redhat.com This patch (of 14): When the complete PCP is drained a much larger number of pages than the usual batch size might be freed at once, causing large IRQ and preemption latency spikes, as they are all freed while holding the pcp and zone spinlocks. To avoid those latency spikes, limit the number of pages freed in a single bulk operation to common batch limits. Link: https://lkml.kernel.org/r/20240318200404.448346-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20240318200736.2835502-1-l.stach@pengutronix.de Signed-off-by: Lucas Stach Signed-off-by: Peter Xu Cc: Christophe Leroy Cc: Jason Gunthorpe Cc: "Matthew Wilcox (Oracle)" Cc: Mike Rapoport (IBM) Cc: Muchun Song Cc: Alistair Popple Cc: Andreas Larsson Cc: "Aneesh Kumar K.V" Cc: Arnd Bergmann Cc: Bjorn Andersson Cc: Borislav Petkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Fabio Estevam Cc: Ingo Molnar Cc: Konrad Dybcio Cc: Krzysztof Kozlowski Cc: Mark Salter Cc: Michael Ellerman Cc: Naoya Horiguchi Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Russell King Cc: Shawn Guo Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton Stable-dep-of: 66eca1021a42 ("mm/page_alloc: fix pcp->count race between drain_pages_zone() vs __rmqueue_pcplist()") Signed-off-by: Sasha Levin --- mm/page_alloc.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8eaf51257db5..4029d13636ec 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3176,12 +3176,15 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) */ static void drain_pages_zone(unsigned int cpu, struct zone *zone) { - struct per_cpu_pages *pcp; + struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); + int count = READ_ONCE(pcp->count); + + while (count) { + int to_drain = min(count, pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX); + count -= to_drain; - pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); - if (pcp->count) { spin_lock(&pcp->lock); - free_pcppages_bulk(zone, pcp->count, pcp, 0); + free_pcppages_bulk(zone, to_drain, pcp, 0); spin_unlock(&pcp->lock); } } From e7a2799dcb454e494ed53dc27bdf9204e04ed5e8 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 23 Jul 2024 14:44:28 +0800 Subject: [PATCH 22/86] mm/page_alloc: fix pcp->count race between drain_pages_zone() vs __rmqueue_pcplist() [ Upstream commit 66eca1021a42856d6af2a9802c99e160278aed91 ] It's expected that no page should be left in pcp_list after calling zone_pcp_disable() in offline_pages(). Previously, it's observed that offline_pages() gets stuck [1] due to some pages remaining in pcp_list. Cause: There is a race condition between drain_pages_zone() and __rmqueue_pcplist() involving the pcp->count variable. See below scenario: CPU0 CPU1 ---------------- --------------- spin_lock(&pcp->lock); __rmqueue_pcplist() { zone_pcp_disable() { /* list is empty */ if (list_empty(list)) { /* add pages to pcp_list */ alloced = rmqueue_bulk() mutex_lock(&pcp_batch_high_lock) ... __drain_all_pages() { drain_pages_zone() { /* read pcp->count, it's 0 here */ count = READ_ONCE(pcp->count) /* 0 means nothing to drain */ /* update pcp->count */ pcp->count += alloced << order; ... ... spin_unlock(&pcp->lock); In this case, after calling zone_pcp_disable() though, there are still some pages in pcp_list. And these pages in pcp_list are neither movable nor isolated, offline_pages() gets stuck as a result. Solution: Expand the scope of the pcp->lock to also protect pcp->count in drain_pages_zone(), to ensure no pages are left in the pcp list after zone_pcp_disable() [1] https://lore.kernel.org/linux-mm/6a07125f-e720-404c-b2f9-e55f3f166e85@fujitsu.com/ Link: https://lkml.kernel.org/r/20240723064428.1179519-1-lizhijian@fujitsu.com Fixes: 4b23a68f9536 ("mm/page_alloc: protect PCP lists with a spinlock") Signed-off-by: Li Zhijian Reported-by: Yao Xingtao Reviewed-by: Vlastimil Babka Cc: David Hildenbrand Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- mm/page_alloc.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4029d13636ec..a905b850d31c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3177,16 +3177,20 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) static void drain_pages_zone(unsigned int cpu, struct zone *zone) { struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); - int count = READ_ONCE(pcp->count); - - while (count) { - int to_drain = min(count, pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX); - count -= to_drain; + int count; + do { spin_lock(&pcp->lock); - free_pcppages_bulk(zone, to_drain, pcp, 0); + count = pcp->count; + if (count) { + int to_drain = min(count, + pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX); + + free_pcppages_bulk(zone, to_drain, pcp, 0); + count -= to_drain; + } spin_unlock(&pcp->lock); - } + } while (count); } /* From 216671e0c4abcfbdde434b07997c114f8bdd93cf Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Wed, 29 May 2024 17:47:00 +0800 Subject: [PATCH 23/86] f2fs: fix to avoid use SSR allocate when do defragment [ Upstream commit 21327a042dd94bc73181d7300e688699cb1f467e ] SSR allocate mode will be used when doing file defragment if ATGC is working at the same time, that is because set_page_private_gcing may make CURSEG_ALL_DATA_ATGC segment type got in f2fs_allocate_data_block when defragment page is writeback, which may cause file fragmentation is worse. A file with 2 fragmentations is changed as following after defragment: ----------------file info------------------- sensorsdata : -------------------------------------------- dev [254:48] ino [0x 3029 : 12329] mode [0x 81b0 : 33200] nlink [0x 1 : 1] uid [0x 27e6 : 10214] gid [0x 27e6 : 10214] size [0x 242000 : 2367488] blksize [0x 1000 : 4096] blocks [0x 1210 : 4624] -------------------------------------------- file_pos start_blk end_blk blks 0 11361121 11361207 87 356352 11361215 11361216 2 364544 11361218 11361218 1 368640 11361220 11361221 2 376832 11361224 11361225 2 385024 11361227 11361238 12 434176 11361240 11361252 13 487424 11361254 11361254 1 491520 11361271 11361279 9 528384 3681794 3681795 2 536576 3681797 3681797 1 540672 3681799 3681799 1 544768 3681803 3681803 1 548864 3681805 3681805 1 552960 3681807 3681807 1 557056 3681809 3681809 1 Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: 8cb1f4080dd9 ("f2fs: assign CURSEG_ALL_DATA_ATGC if blkaddr is valid") Signed-off-by: Sasha Levin --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e19b569d938d..99391ee4c28c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3185,7 +3185,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (page_private_gcing(fio->page)) { if (fio->sbi->am.atgc_enabled && (fio->io_type == FS_DATA_IO) && - (fio->sbi->gc_mode != GC_URGENT_HIGH)) + (fio->sbi->gc_mode != GC_URGENT_HIGH) && + !is_inode_flag_set(inode, FI_OPU_WRITE)) return CURSEG_ALL_DATA_ATGC; else return CURSEG_COLD_DATA; From 5fd057160ab240dd816ae09b625395d54c297de1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 18 Jun 2024 02:15:38 +0000 Subject: [PATCH 24/86] f2fs: assign CURSEG_ALL_DATA_ATGC if blkaddr is valid [ Upstream commit 8cb1f4080dd91c6e6b01dbea013a3f42341cb6a1 ] mkdir /mnt/test/comp f2fs_io setflags compression /mnt/test/comp dd if=/dev/zero of=/mnt/test/comp/testfile bs=16k count=1 truncate --size 13 /mnt/test/comp/testfile In the above scenario, we can get a BUG_ON. kernel BUG at fs/f2fs/segment.c:3589! Call Trace: do_write_page+0x78/0x390 [f2fs] f2fs_outplace_write_data+0x62/0xb0 [f2fs] f2fs_do_write_data_page+0x275/0x740 [f2fs] f2fs_write_single_data_page+0x1dc/0x8f0 [f2fs] f2fs_write_multi_pages+0x1e5/0xae0 [f2fs] f2fs_write_cache_pages+0xab1/0xc60 [f2fs] f2fs_write_data_pages+0x2d8/0x330 [f2fs] do_writepages+0xcf/0x270 __writeback_single_inode+0x44/0x350 writeback_sb_inodes+0x242/0x530 __writeback_inodes_wb+0x54/0xf0 wb_writeback+0x192/0x310 wb_workfn+0x30d/0x400 The reason is we gave CURSEG_ALL_DATA_ATGC to COMPR_ADDR where the page was set the gcing flag by set_cluster_dirty(). Cc: stable@vger.kernel.org Fixes: 4961acdd65c9 ("f2fs: fix to tag gcing flag on page during block migration") Reviewed-by: Chao Yu Tested-by: Will McVicker Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/segment.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 99391ee4c28c..1264a350d4d7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3186,6 +3186,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (fio->sbi->am.atgc_enabled && (fio->io_type == FS_DATA_IO) && (fio->sbi->gc_mode != GC_URGENT_HIGH) && + __is_valid_data_blkaddr(fio->old_blkaddr) && !is_inode_flag_set(inode, FI_OPU_WRITE)) return CURSEG_ALL_DATA_ATGC; else From 7e372c7c432f810899777947a39c068c51507034 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:04 +0200 Subject: [PATCH 25/86] irqdomain: Fixed unbalanced fwnode get and put [ Upstream commit 6ce3e98184b625d2870991880bf9586ded7ea7f9 ] fwnode_handle_get(fwnode) is called when a domain is created with fwnode passed as a function parameter. fwnode_handle_put(domain->fwnode) is called when the domain is destroyed but during the creation a path exists that does not set domain->fwnode. If this path is taken, the fwnode get will never be put. To avoid the unbalanced get and put, set domain->fwnode unconditionally. Fixes: d59f6617eef0 ("genirq: Allow fwnode to carry name information only") Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240614173232.1184015-4-herve.codina@bootlin.com Signed-off-by: Sasha Levin --- kernel/irq/irqdomain.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 607c0c3d3f5e..6aea9d25ab9a 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -154,7 +154,6 @@ static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode, switch (fwid->type) { case IRQCHIP_FWNODE_NAMED: case IRQCHIP_FWNODE_NAMED_ID: - domain->fwnode = fwnode; domain->name = kstrdup(fwid->name, GFP_KERNEL); if (!domain->name) { kfree(domain); @@ -163,7 +162,6 @@ static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode, domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; break; default: - domain->fwnode = fwnode; domain->name = fwid->name; break; } @@ -185,7 +183,6 @@ static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode, strreplace(name, '/', ':'); domain->name = name; - domain->fwnode = fwnode; domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; } @@ -201,8 +198,8 @@ static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode, domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; } - fwnode_handle_get(fwnode); - fwnode_dev_initialized(fwnode, true); + domain->fwnode = fwnode_handle_get(fwnode); + fwnode_dev_initialized(domain->fwnode, true); /* Fill structure */ INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL); From 34d1582dee5708743acf930f01db72e9d2075b4e Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 6 Oct 2022 11:53:40 +0200 Subject: [PATCH 26/86] drm/udl: Rename struct udl_drm_connector to struct udl_connector [ Upstream commit 59a811faa74f4326fe2d48d2b334c0ee95922628 ] Remove the _drm_ infix from struct udl_drm_connector and introduce a macro for upcasting from struct drm_connector. No functional changes. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20221006095355.23579-2-tzimmermann@suse.de Stable-dep-of: 5aed213c7c6c ("drm/udl: Remove DRM_CONNECTOR_POLL_HPD") Signed-off-by: Sasha Levin --- drivers/gpu/drm/udl/udl_connector.c | 19 +++++-------------- drivers/gpu/drm/udl/udl_connector.h | 10 ++++++++-- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c index fade4c7adbf7..3c8068626384 100644 --- a/drivers/gpu/drm/udl/udl_connector.c +++ b/drivers/gpu/drm/udl/udl_connector.c @@ -46,10 +46,7 @@ static int udl_get_edid_block(void *data, u8 *buf, unsigned int block, static int udl_get_modes(struct drm_connector *connector) { - struct udl_drm_connector *udl_connector = - container_of(connector, - struct udl_drm_connector, - connector); + struct udl_connector *udl_connector = to_udl_connector(connector); drm_connector_update_edid_property(connector, udl_connector->edid); if (udl_connector->edid) @@ -74,10 +71,7 @@ static enum drm_connector_status udl_detect(struct drm_connector *connector, bool force) { struct udl_device *udl = to_udl(connector->dev); - struct udl_drm_connector *udl_connector = - container_of(connector, - struct udl_drm_connector, - connector); + struct udl_connector *udl_connector = to_udl_connector(connector); /* cleanup previous edid */ if (udl_connector->edid != NULL) { @@ -94,10 +88,7 @@ udl_detect(struct drm_connector *connector, bool force) static void udl_connector_destroy(struct drm_connector *connector) { - struct udl_drm_connector *udl_connector = - container_of(connector, - struct udl_drm_connector, - connector); + struct udl_connector *udl_connector = to_udl_connector(connector); drm_connector_cleanup(connector); kfree(udl_connector->edid); @@ -120,10 +111,10 @@ static const struct drm_connector_funcs udl_connector_funcs = { struct drm_connector *udl_connector_init(struct drm_device *dev) { - struct udl_drm_connector *udl_connector; + struct udl_connector *udl_connector; struct drm_connector *connector; - udl_connector = kzalloc(sizeof(struct udl_drm_connector), GFP_KERNEL); + udl_connector = kzalloc(sizeof(*udl_connector), GFP_KERNEL); if (!udl_connector) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/udl/udl_connector.h b/drivers/gpu/drm/udl/udl_connector.h index 7f2d392df173..74ad68fd3cc9 100644 --- a/drivers/gpu/drm/udl/udl_connector.h +++ b/drivers/gpu/drm/udl/udl_connector.h @@ -1,15 +1,21 @@ #ifndef __UDL_CONNECTOR_H__ #define __UDL_CONNECTOR_H__ -#include +#include + +#include struct edid; -struct udl_drm_connector { +struct udl_connector { struct drm_connector connector; /* last udl_detect edid */ struct edid *edid; }; +static inline struct udl_connector *to_udl_connector(struct drm_connector *connector) +{ + return container_of(connector, struct udl_connector, connector); +} #endif //__UDL_CONNECTOR_H__ From 5788374a7e0c9a0579bff1e30495e334496f59f3 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 6 Oct 2022 11:53:41 +0200 Subject: [PATCH 27/86] drm/udl: Test pixel limit in mode-config's mode-valid function [ Upstream commit c020f66013b6136a68a3a4ad74cc7af3b3310586 ] The sku_pixel_limit is a per-device property, similar to the amount of available video memory. Move the respective mode-valid test from the connector to the mode-config structure. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20221006095355.23579-3-tzimmermann@suse.de Stable-dep-of: 5aed213c7c6c ("drm/udl: Remove DRM_CONNECTOR_POLL_HPD") Signed-off-by: Sasha Levin --- drivers/gpu/drm/udl/udl_connector.c | 14 -------------- drivers/gpu/drm/udl/udl_modeset.c | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c index 3c8068626384..e9539829032c 100644 --- a/drivers/gpu/drm/udl/udl_connector.c +++ b/drivers/gpu/drm/udl/udl_connector.c @@ -54,19 +54,6 @@ static int udl_get_modes(struct drm_connector *connector) return 0; } -static enum drm_mode_status udl_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - struct udl_device *udl = to_udl(connector->dev); - if (!udl->sku_pixel_limit) - return 0; - - if (mode->vdisplay * mode->hdisplay > udl->sku_pixel_limit) - return MODE_VIRTUAL_Y; - - return 0; -} - static enum drm_connector_status udl_detect(struct drm_connector *connector, bool force) { @@ -97,7 +84,6 @@ static void udl_connector_destroy(struct drm_connector *connector) static const struct drm_connector_helper_funcs udl_connector_helper_funcs = { .get_modes = udl_get_modes, - .mode_valid = udl_mode_valid, }; static const struct drm_connector_funcs udl_connector_funcs = { diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c index ec6876f449f3..c7adc29a53a1 100644 --- a/drivers/gpu/drm/udl/udl_modeset.c +++ b/drivers/gpu/drm/udl/udl_modeset.c @@ -407,8 +407,22 @@ static const struct drm_simple_display_pipe_funcs udl_simple_display_pipe_funcs * Modesetting */ +static enum drm_mode_status udl_mode_config_mode_valid(struct drm_device *dev, + const struct drm_display_mode *mode) +{ + struct udl_device *udl = to_udl(dev); + + if (udl->sku_pixel_limit) { + if (mode->vdisplay * mode->hdisplay > udl->sku_pixel_limit) + return MODE_MEM; + } + + return MODE_OK; +} + static const struct drm_mode_config_funcs udl_mode_funcs = { .fb_create = drm_gem_fb_create_with_dirty, + .mode_valid = udl_mode_config_mode_valid, .atomic_check = drm_atomic_helper_check, .atomic_commit = drm_atomic_helper_commit, }; From cb53ed1326a93e0250dc612d56d98fda4a5fa4ca Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 6 Oct 2022 11:53:42 +0200 Subject: [PATCH 28/86] drm/udl: Use USB timeout constant when reading EDID [ Upstream commit 2c1eafc40e53312864bf2fdccb55052dcbd9e8b2 ] Set the USB control-message timeout to the USB default of 5 seconds. Done for consistency with other uses of usb_control_msg() in udl and other drivers. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20221006095355.23579-4-tzimmermann@suse.de Stable-dep-of: 5aed213c7c6c ("drm/udl: Remove DRM_CONNECTOR_POLL_HPD") Signed-off-by: Sasha Levin --- drivers/gpu/drm/udl/udl_connector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c index e9539829032c..cb3d6820eaf9 100644 --- a/drivers/gpu/drm/udl/udl_connector.c +++ b/drivers/gpu/drm/udl/udl_connector.c @@ -31,7 +31,7 @@ static int udl_get_edid_block(void *data, u8 *buf, unsigned int block, int bval = (i + block * EDID_LENGTH) << 8; ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), 0x02, (0x80 | (0x02 << 5)), bval, - 0xA1, read_buff, 2, 1000); + 0xA1, read_buff, 2, USB_CTRL_GET_TIMEOUT); if (ret < 1) { DRM_ERROR("Read EDID byte %d failed err %x\n", i, ret); kfree(read_buff); From a864e01de55dafa5d053f8c3f326440953da9854 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 6 Oct 2022 11:53:43 +0200 Subject: [PATCH 29/86] drm/udl: Various improvements to the connector [ Upstream commit 43858eb41e0dde6e48565c13cdabac95b5d9df90 ] Add style fixes, better error handling and reporting, and minor clean-up changes to the connector code before moving the code to the rest of the modesetting pipeline. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20221006095355.23579-5-tzimmermann@suse.de Stable-dep-of: 5aed213c7c6c ("drm/udl: Remove DRM_CONNECTOR_POLL_HPD") Signed-off-by: Sasha Levin --- drivers/gpu/drm/udl/udl_connector.c | 64 ++++++++++++++++++----------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c index cb3d6820eaf9..538b47ffa67f 100644 --- a/drivers/gpu/drm/udl/udl_connector.c +++ b/drivers/gpu/drm/udl/udl_connector.c @@ -15,56 +15,64 @@ #include "udl_connector.h" #include "udl_drv.h" -static int udl_get_edid_block(void *data, u8 *buf, unsigned int block, - size_t len) +static int udl_get_edid_block(void *data, u8 *buf, unsigned int block, size_t len) { - int ret, i; - u8 *read_buff; struct udl_device *udl = data; + struct drm_device *dev = &udl->drm; struct usb_device *udev = udl_to_usb_device(udl); + u8 *read_buff; + int ret; + size_t i; read_buff = kmalloc(2, GFP_KERNEL); if (!read_buff) - return -1; + return -ENOMEM; for (i = 0; i < len; i++) { int bval = (i + block * EDID_LENGTH) << 8; + ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), 0x02, (0x80 | (0x02 << 5)), bval, 0xA1, read_buff, 2, USB_CTRL_GET_TIMEOUT); - if (ret < 1) { - DRM_ERROR("Read EDID byte %d failed err %x\n", i, ret); - kfree(read_buff); - return -1; + if (ret < 0) { + drm_err(dev, "Read EDID byte %zu failed err %x\n", i, ret); + goto err_kfree; + } else if (ret < 1) { + ret = -EIO; + drm_err(dev, "Read EDID byte %zu failed\n", i); + goto err_kfree; } + buf[i] = read_buff[1]; } kfree(read_buff); + return 0; + +err_kfree: + kfree(read_buff); + return ret; } -static int udl_get_modes(struct drm_connector *connector) +static int udl_connector_helper_get_modes(struct drm_connector *connector) { struct udl_connector *udl_connector = to_udl_connector(connector); drm_connector_update_edid_property(connector, udl_connector->edid); if (udl_connector->edid) return drm_add_edid_modes(connector, udl_connector->edid); + return 0; } -static enum drm_connector_status -udl_detect(struct drm_connector *connector, bool force) +static enum drm_connector_status udl_connector_detect(struct drm_connector *connector, bool force) { struct udl_device *udl = to_udl(connector->dev); struct udl_connector *udl_connector = to_udl_connector(connector); - /* cleanup previous edid */ - if (udl_connector->edid != NULL) { - kfree(udl_connector->edid); - udl_connector->edid = NULL; - } + /* cleanup previous EDID */ + kfree(udl_connector->edid); udl_connector->edid = drm_do_get_edid(connector, udl_get_edid_block, udl); if (!udl_connector->edid) @@ -79,38 +87,46 @@ static void udl_connector_destroy(struct drm_connector *connector) drm_connector_cleanup(connector); kfree(udl_connector->edid); - kfree(connector); + kfree(udl_connector); } static const struct drm_connector_helper_funcs udl_connector_helper_funcs = { - .get_modes = udl_get_modes, + .get_modes = udl_connector_helper_get_modes, }; static const struct drm_connector_funcs udl_connector_funcs = { .reset = drm_atomic_helper_connector_reset, - .detect = udl_detect, + .detect = udl_connector_detect, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = udl_connector_destroy, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; struct drm_connector *udl_connector_init(struct drm_device *dev) { struct udl_connector *udl_connector; struct drm_connector *connector; + int ret; udl_connector = kzalloc(sizeof(*udl_connector), GFP_KERNEL); if (!udl_connector) return ERR_PTR(-ENOMEM); connector = &udl_connector->connector; - drm_connector_init(dev, connector, &udl_connector_funcs, - DRM_MODE_CONNECTOR_VGA); + ret = drm_connector_init(dev, connector, &udl_connector_funcs, DRM_MODE_CONNECTOR_VGA); + if (ret) + goto err_kfree; + drm_connector_helper_add(connector, &udl_connector_helper_funcs); connector->polled = DRM_CONNECTOR_POLL_HPD | - DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; + DRM_CONNECTOR_POLL_CONNECT | + DRM_CONNECTOR_POLL_DISCONNECT; return connector; + +err_kfree: + kfree(udl_connector); + return ERR_PTR(ret); } From 9750811a3eeb6f5c87bf3f0bd80b08c26167e324 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 6 Oct 2022 11:53:44 +0200 Subject: [PATCH 30/86] drm/udl: Move connector to modesetting code [ Upstream commit 0862cfd3e22f3f936927f2f7381c2519ba034c6e ] Move the connector next to the rest of the modesetting code. No functional changes. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20221006095355.23579-6-tzimmermann@suse.de Stable-dep-of: 5aed213c7c6c ("drm/udl: Remove DRM_CONNECTOR_POLL_HPD") Signed-off-by: Sasha Levin --- drivers/gpu/drm/udl/Makefile | 2 +- drivers/gpu/drm/udl/udl_connector.c | 132 ---------------------------- drivers/gpu/drm/udl/udl_connector.h | 21 ----- drivers/gpu/drm/udl/udl_drv.h | 11 +++ drivers/gpu/drm/udl/udl_modeset.c | 122 +++++++++++++++++++++++++ 5 files changed, 134 insertions(+), 154 deletions(-) delete mode 100644 drivers/gpu/drm/udl/udl_connector.c delete mode 100644 drivers/gpu/drm/udl/udl_connector.h diff --git a/drivers/gpu/drm/udl/Makefile b/drivers/gpu/drm/udl/Makefile index 24d61f61d7db..3f6db179455d 100644 --- a/drivers/gpu/drm/udl/Makefile +++ b/drivers/gpu/drm/udl/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -udl-y := udl_drv.o udl_modeset.o udl_connector.o udl_main.o udl_transfer.o +udl-y := udl_drv.o udl_modeset.o udl_main.o udl_transfer.o obj-$(CONFIG_DRM_UDL) := udl.o diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c deleted file mode 100644 index 538b47ffa67f..000000000000 --- a/drivers/gpu/drm/udl/udl_connector.c +++ /dev/null @@ -1,132 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2012 Red Hat - * based in parts on udlfb.c: - * Copyright (C) 2009 Roberto De Ioris - * Copyright (C) 2009 Jaya Kumar - * Copyright (C) 2009 Bernie Thompson - */ - -#include -#include -#include -#include - -#include "udl_connector.h" -#include "udl_drv.h" - -static int udl_get_edid_block(void *data, u8 *buf, unsigned int block, size_t len) -{ - struct udl_device *udl = data; - struct drm_device *dev = &udl->drm; - struct usb_device *udev = udl_to_usb_device(udl); - u8 *read_buff; - int ret; - size_t i; - - read_buff = kmalloc(2, GFP_KERNEL); - if (!read_buff) - return -ENOMEM; - - for (i = 0; i < len; i++) { - int bval = (i + block * EDID_LENGTH) << 8; - - ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), - 0x02, (0x80 | (0x02 << 5)), bval, - 0xA1, read_buff, 2, USB_CTRL_GET_TIMEOUT); - if (ret < 0) { - drm_err(dev, "Read EDID byte %zu failed err %x\n", i, ret); - goto err_kfree; - } else if (ret < 1) { - ret = -EIO; - drm_err(dev, "Read EDID byte %zu failed\n", i); - goto err_kfree; - } - - buf[i] = read_buff[1]; - } - - kfree(read_buff); - - return 0; - -err_kfree: - kfree(read_buff); - return ret; -} - -static int udl_connector_helper_get_modes(struct drm_connector *connector) -{ - struct udl_connector *udl_connector = to_udl_connector(connector); - - drm_connector_update_edid_property(connector, udl_connector->edid); - if (udl_connector->edid) - return drm_add_edid_modes(connector, udl_connector->edid); - - return 0; -} - -static enum drm_connector_status udl_connector_detect(struct drm_connector *connector, bool force) -{ - struct udl_device *udl = to_udl(connector->dev); - struct udl_connector *udl_connector = to_udl_connector(connector); - - /* cleanup previous EDID */ - kfree(udl_connector->edid); - - udl_connector->edid = drm_do_get_edid(connector, udl_get_edid_block, udl); - if (!udl_connector->edid) - return connector_status_disconnected; - - return connector_status_connected; -} - -static void udl_connector_destroy(struct drm_connector *connector) -{ - struct udl_connector *udl_connector = to_udl_connector(connector); - - drm_connector_cleanup(connector); - kfree(udl_connector->edid); - kfree(udl_connector); -} - -static const struct drm_connector_helper_funcs udl_connector_helper_funcs = { - .get_modes = udl_connector_helper_get_modes, -}; - -static const struct drm_connector_funcs udl_connector_funcs = { - .reset = drm_atomic_helper_connector_reset, - .detect = udl_connector_detect, - .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = udl_connector_destroy, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -}; - -struct drm_connector *udl_connector_init(struct drm_device *dev) -{ - struct udl_connector *udl_connector; - struct drm_connector *connector; - int ret; - - udl_connector = kzalloc(sizeof(*udl_connector), GFP_KERNEL); - if (!udl_connector) - return ERR_PTR(-ENOMEM); - - connector = &udl_connector->connector; - ret = drm_connector_init(dev, connector, &udl_connector_funcs, DRM_MODE_CONNECTOR_VGA); - if (ret) - goto err_kfree; - - drm_connector_helper_add(connector, &udl_connector_helper_funcs); - - connector->polled = DRM_CONNECTOR_POLL_HPD | - DRM_CONNECTOR_POLL_CONNECT | - DRM_CONNECTOR_POLL_DISCONNECT; - - return connector; - -err_kfree: - kfree(udl_connector); - return ERR_PTR(ret); -} diff --git a/drivers/gpu/drm/udl/udl_connector.h b/drivers/gpu/drm/udl/udl_connector.h deleted file mode 100644 index 74ad68fd3cc9..000000000000 --- a/drivers/gpu/drm/udl/udl_connector.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef __UDL_CONNECTOR_H__ -#define __UDL_CONNECTOR_H__ - -#include - -#include - -struct edid; - -struct udl_connector { - struct drm_connector connector; - /* last udl_detect edid */ - struct edid *edid; -}; - -static inline struct udl_connector *to_udl_connector(struct drm_connector *connector) -{ - return container_of(connector, struct udl_connector, connector); -} - -#endif //__UDL_CONNECTOR_H__ diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h index b4cc7cc568c7..d7a3d495f2e7 100644 --- a/drivers/gpu/drm/udl/udl_drv.h +++ b/drivers/gpu/drm/udl/udl_drv.h @@ -46,6 +46,17 @@ struct urb_list { size_t size; }; +struct udl_connector { + struct drm_connector connector; + /* last udl_detect edid */ + struct edid *edid; +}; + +static inline struct udl_connector *to_udl_connector(struct drm_connector *connector) +{ + return container_of(connector, struct udl_connector, connector); +} + struct udl_device { struct drm_device drm; struct device *dev; diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c index c7adc29a53a1..93e7554e83fa 100644 --- a/drivers/gpu/drm/udl/udl_modeset.c +++ b/drivers/gpu/drm/udl/udl_modeset.c @@ -11,11 +11,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include "udl_drv.h" @@ -403,6 +405,126 @@ static const struct drm_simple_display_pipe_funcs udl_simple_display_pipe_funcs DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS, }; +/* + * Connector + */ + +static int udl_connector_helper_get_modes(struct drm_connector *connector) +{ + struct udl_connector *udl_connector = to_udl_connector(connector); + + drm_connector_update_edid_property(connector, udl_connector->edid); + if (udl_connector->edid) + return drm_add_edid_modes(connector, udl_connector->edid); + + return 0; +} + +static const struct drm_connector_helper_funcs udl_connector_helper_funcs = { + .get_modes = udl_connector_helper_get_modes, +}; + +static int udl_get_edid_block(void *data, u8 *buf, unsigned int block, size_t len) +{ + struct udl_device *udl = data; + struct drm_device *dev = &udl->drm; + struct usb_device *udev = udl_to_usb_device(udl); + u8 *read_buff; + int ret; + size_t i; + + read_buff = kmalloc(2, GFP_KERNEL); + if (!read_buff) + return -ENOMEM; + + for (i = 0; i < len; i++) { + int bval = (i + block * EDID_LENGTH) << 8; + + ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), + 0x02, (0x80 | (0x02 << 5)), bval, + 0xA1, read_buff, 2, USB_CTRL_GET_TIMEOUT); + if (ret < 0) { + drm_err(dev, "Read EDID byte %zu failed err %x\n", i, ret); + goto err_kfree; + } else if (ret < 1) { + ret = -EIO; + drm_err(dev, "Read EDID byte %zu failed\n", i); + goto err_kfree; + } + + buf[i] = read_buff[1]; + } + + kfree(read_buff); + + return 0; + +err_kfree: + kfree(read_buff); + return ret; +} + +static enum drm_connector_status udl_connector_detect(struct drm_connector *connector, bool force) +{ + struct udl_device *udl = to_udl(connector->dev); + struct udl_connector *udl_connector = to_udl_connector(connector); + + /* cleanup previous EDID */ + kfree(udl_connector->edid); + + udl_connector->edid = drm_do_get_edid(connector, udl_get_edid_block, udl); + if (!udl_connector->edid) + return connector_status_disconnected; + + return connector_status_connected; +} + +static void udl_connector_destroy(struct drm_connector *connector) +{ + struct udl_connector *udl_connector = to_udl_connector(connector); + + drm_connector_cleanup(connector); + kfree(udl_connector->edid); + kfree(udl_connector); +} + +static const struct drm_connector_funcs udl_connector_funcs = { + .reset = drm_atomic_helper_connector_reset, + .detect = udl_connector_detect, + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = udl_connector_destroy, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +struct drm_connector *udl_connector_init(struct drm_device *dev) +{ + struct udl_connector *udl_connector; + struct drm_connector *connector; + int ret; + + udl_connector = kzalloc(sizeof(*udl_connector), GFP_KERNEL); + if (!udl_connector) + return ERR_PTR(-ENOMEM); + + connector = &udl_connector->connector; + ret = drm_connector_init(dev, connector, &udl_connector_funcs, DRM_MODE_CONNECTOR_VGA); + if (ret) + goto err_kfree; + + drm_connector_helper_add(connector, &udl_connector_helper_funcs); + + connector->polled = DRM_CONNECTOR_POLL_HPD | + DRM_CONNECTOR_POLL_CONNECT | + DRM_CONNECTOR_POLL_DISCONNECT; + + return connector; + +err_kfree: + kfree(udl_connector); + return ERR_PTR(ret); +} + /* * Modesetting */ From fa0f0f5ef473cce57f9d6d14098d711f78cccbbf Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 10 May 2024 17:47:08 +0200 Subject: [PATCH 31/86] drm/udl: Remove DRM_CONNECTOR_POLL_HPD [ Upstream commit 5aed213c7c6c4f5dcb1a3ef146f493f18fe703dc ] DisplayLink devices do not generate hotplug events. Remove the poll flag DRM_CONNECTOR_POLL_HPD, as it may not be specified together with DRM_CONNECTOR_POLL_CONNECT or DRM_CONNECTOR_POLL_DISCONNECT. Signed-off-by: Thomas Zimmermann Fixes: afdfc4c6f55f ("drm/udl: Fixed problem with UDL adpater reconnection") Reviewed-by: Jani Nikula Cc: Robert Tarasov Cc: Alex Deucher Cc: Dave Airlie Cc: Sean Paul Cc: Thomas Zimmermann Cc: dri-devel@lists.freedesktop.org Cc: # v4.15+ Link: https://patchwork.freedesktop.org/patch/msgid/20240510154841.11370-2-tzimmermann@suse.de Signed-off-by: Sasha Levin --- drivers/gpu/drm/udl/udl_modeset.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c index 93e7554e83fa..8f4c4a857b6e 100644 --- a/drivers/gpu/drm/udl/udl_modeset.c +++ b/drivers/gpu/drm/udl/udl_modeset.c @@ -514,8 +514,7 @@ struct drm_connector *udl_connector_init(struct drm_device *dev) drm_connector_helper_add(connector, &udl_connector_helper_funcs); - connector->polled = DRM_CONNECTOR_POLL_HPD | - DRM_CONNECTOR_POLL_CONNECT | + connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; return connector; From 9d2567e998534470a63dd187b9f5ed694d10e4a3 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 8 Jul 2024 22:00:25 +0300 Subject: [PATCH 32/86] drm/i915/dp: Don't switch the LTTPR mode on an active link MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 509580fad7323b6a5da27e8365cd488f3b57210e ] Switching to transparent mode leads to a loss of link synchronization, so prevent doing this on an active link. This happened at least on an Intel N100 system / DELL UD22 dock, the LTTPR residing either on the host or the dock. To fix the issue, keep the current mode on an active link, adjusting the LTTPR count accordingly (resetting it to 0 in transparent mode). v2: Adjust code comment during link training about reiniting the LTTPRs. (Ville) Fixes: 7b2a4ab8b0ef ("drm/i915: Switch to LTTPR transparent mode link training") Reported-and-tested-by: Gareth Yu Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/10902 Cc: # v5.15+ Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Reviewed-by: Ankit Nautiyal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240708190029.271247-3-imre.deak@intel.com (cherry picked from commit 211ad49cf8ccfdc798a719b4d1e000d0a8a9e588) Signed-off-by: Tvrtko Ursulin Signed-off-by: Sasha Levin --- .../drm/i915/display/intel_dp_link_training.c | 54 ++++++++++++++++--- 1 file changed, 48 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 3d3efcf02011..1d9e4534287b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -103,12 +103,26 @@ intel_dp_set_lttpr_transparent_mode(struct intel_dp *intel_dp, bool enable) return drm_dp_dpcd_write(&intel_dp->aux, DP_PHY_REPEATER_MODE, &val, 1) == 1; } -static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) +static bool intel_dp_lttpr_transparent_mode_enabled(struct intel_dp *intel_dp) +{ + return intel_dp->lttpr_common_caps[DP_PHY_REPEATER_MODE - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV] == + DP_PHY_REPEATER_MODE_TRANSPARENT; +} + +/* + * Read the LTTPR common capabilities and switch the LTTPR PHYs to + * non-transparent mode if this is supported. Preserve the + * transparent/non-transparent mode on an active link. + * + * Return the number of detected LTTPRs in non-transparent mode or 0 if the + * LTTPRs are in transparent mode or the detection failed. + */ +static int intel_dp_init_lttpr_phys(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; struct drm_i915_private *i915 = to_i915(encoder->base.dev); int lttpr_count; - int i; if (!intel_dp_read_lttpr_common_caps(intel_dp, dpcd)) return 0; @@ -122,6 +136,19 @@ static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEI if (lttpr_count == 0) return 0; + /* + * Don't change the mode on an active link, to prevent a loss of link + * synchronization. See DP Standard v2.0 3.6.7. about the LTTPR + * resetting its internal state when the mode is changed from + * non-transparent to transparent. + */ + if (intel_dp->link_trained) { + if (lttpr_count < 0 || intel_dp_lttpr_transparent_mode_enabled(intel_dp)) + goto out_reset_lttpr_count; + + return lttpr_count; + } + /* * See DP Standard v2.0 3.6.6.1. about the explicit disabling of * non-transparent mode and the disable->enable non-transparent mode @@ -143,11 +170,25 @@ static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEI encoder->base.base.id, encoder->base.name); intel_dp_set_lttpr_transparent_mode(intel_dp, true); - intel_dp_reset_lttpr_count(intel_dp); - return 0; + goto out_reset_lttpr_count; } + return lttpr_count; + +out_reset_lttpr_count: + intel_dp_reset_lttpr_count(intel_dp); + + return 0; +} + +static int intel_dp_init_lttpr(struct intel_dp *intel_dp, const u8 dpcd[DP_RECEIVER_CAP_SIZE]) +{ + int lttpr_count; + int i; + + lttpr_count = intel_dp_init_lttpr_phys(intel_dp, dpcd); + for (i = 0; i < lttpr_count; i++) intel_dp_read_lttpr_phy_caps(intel_dp, dpcd, DP_PHY_LTTPR(i)); @@ -1435,8 +1476,9 @@ void intel_dp_start_link_train(struct intel_dp *intel_dp, { bool passed; /* - * TODO: Reiniting LTTPRs here won't be needed once proper connector - * HW state readout is added. + * Reinit the LTTPRs here to ensure that they are switched to + * non-transparent mode. During an earlier LTTPR detection this + * could've been prevented by an active link. */ int lttpr_count = intel_dp_init_lttpr_and_dprx_caps(intel_dp); From 351f1a6ec14b467bdd9bd1322b638d92c4e2f4e1 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Fri, 2 Jun 2023 17:50:50 +0800 Subject: [PATCH 33/86] MIPS: Loongson64: DTS: Add RTC support to Loongson-2K1000 [ Upstream commit e47084e116fccaa43644360d7c0b997979abce3e ] The module is now supported, enable it. Acked-by: Jiaxun Yang Signed-off-by: Binbin Zhou Signed-off-by: WANG Xuerui Signed-off-by: Thomas Bogendoerfer Stable-dep-of: dbb69b9d6234 ("MIPS: dts: loongson: Fix liointc IRQ polarity") Signed-off-by: Sasha Levin --- arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi index 9089d1e4f3fe..c0be84a6e81f 100644 --- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi @@ -96,6 +96,13 @@ <0x00000000>; /* int3 */ }; + rtc0: rtc@1fe07800 { + compatible = "loongson,ls2k1000-rtc"; + reg = <0 0x1fe07800 0 0x78>; + interrupt-parent = <&liointc0>; + interrupts = <60 IRQ_TYPE_LEVEL_LOW>; + }; + uart0: serial@1fe00000 { compatible = "ns16550a"; reg = <0 0x1fe00000 0 0x8>; From dfb970b838e9538de5ee30634e24ded9341ffed2 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 7 May 2024 19:51:22 +0100 Subject: [PATCH 34/86] MIPS: Loongson64: DTS: Fix PCIe port nodes for ls7a [ Upstream commit d89a415ff8d5e0aad4963f2d8ebb0f9e8110b7fa ] Add various required properties to silent warnings: arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi:116.16-297.5: Warning (interrupt_provider): /bus@10000000/pci@1a000000: '#interrupt-cells' found, but node is not an interrupt provider arch/mips/boot/dts/loongson/loongson64_2core_2k1000.dtb: Warning (interrupt_map): Failed prerequisite 'interrupt_provider' Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer Stable-dep-of: dbb69b9d6234 ("MIPS: dts: loongson: Fix liointc IRQ polarity") Signed-off-by: Sasha Levin --- .../boot/dts/loongson/loongson64-2k1000.dtsi | 37 +++++++++++++++---- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi index c0be84a6e81f..c1d3092fdd87 100644 --- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi @@ -117,7 +117,6 @@ device_type = "pci"; #address-cells = <3>; #size-cells = <2>; - #interrupt-cells = <2>; reg = <0 0x1a000000 0 0x02000000>, <0xfe 0x00000000 0 0x20000000>; @@ -205,93 +204,117 @@ interrupt-parent = <&liointc0>; }; - pci_bridge@9,0 { + pcie@9,0 { compatible = "pci0014,7a19.0", "pci0014,7a19", "pciclass060400", "pciclass0604"; reg = <0x4800 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupts = <0 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&liointc1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &liointc1 0 IRQ_TYPE_LEVEL_LOW>; + ranges; external-facing; }; - pci_bridge@a,0 { + pcie@a,0 { compatible = "pci0014,7a09.0", "pci0014,7a09", "pciclass060400", "pciclass0604"; reg = <0x5000 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupts = <1 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&liointc1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &liointc1 1 IRQ_TYPE_LEVEL_LOW>; + ranges; external-facing; }; - pci_bridge@b,0 { + pcie@b,0 { compatible = "pci0014,7a09.0", "pci0014,7a09", "pciclass060400", "pciclass0604"; reg = <0x5800 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&liointc1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &liointc1 2 IRQ_TYPE_LEVEL_LOW>; + ranges; external-facing; }; - pci_bridge@c,0 { + pcie@c,0 { compatible = "pci0014,7a09.0", "pci0014,7a09", "pciclass060400", "pciclass0604"; reg = <0x6000 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&liointc1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &liointc1 3 IRQ_TYPE_LEVEL_LOW>; + ranges; external-facing; }; - pci_bridge@d,0 { + pcie@d,0 { compatible = "pci0014,7a19.0", "pci0014,7a19", "pciclass060400", "pciclass0604"; reg = <0x6800 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupts = <4 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&liointc1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &liointc1 4 IRQ_TYPE_LEVEL_LOW>; + ranges; external-facing; }; - pci_bridge@e,0 { + pcie@e,0 { compatible = "pci0014,7a09.0", "pci0014,7a09", "pciclass060400", "pciclass0604"; reg = <0x7000 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupts = <5 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&liointc1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &liointc1 5 IRQ_TYPE_LEVEL_LOW>; + ranges; external-facing; }; From 153e085c8d596ac92f170fe4990636d3da0d816d Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Fri, 14 Jun 2024 16:40:10 +0100 Subject: [PATCH 35/86] MIPS: dts: loongson: Fix liointc IRQ polarity [ Upstream commit dbb69b9d6234aad23b3ecd33e5bc8a8ae1485b7d ] All internal liointc interrupts are high level triggered. Fixes: b1a792601f26 ("MIPS: Loongson64: DeviceTree for Loongson-2K1000") Cc: stable@vger.kernel.org Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- .../boot/dts/loongson/loongson64-2k1000.dtsi | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi index c1d3092fdd87..eec8243be649 100644 --- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi @@ -100,7 +100,7 @@ compatible = "loongson,ls2k1000-rtc"; reg = <0 0x1fe07800 0 0x78>; interrupt-parent = <&liointc0>; - interrupts = <60 IRQ_TYPE_LEVEL_LOW>; + interrupts = <60 IRQ_TYPE_LEVEL_HIGH>; }; uart0: serial@1fe00000 { @@ -108,7 +108,7 @@ reg = <0 0x1fe00000 0 0x8>; clock-frequency = <125000000>; interrupt-parent = <&liointc0>; - interrupts = <0 IRQ_TYPE_LEVEL_LOW>; + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; no-loopback-test; }; @@ -131,8 +131,8 @@ "pciclass0c03"; reg = <0x1800 0x0 0x0 0x0 0x0>; - interrupts = <12 IRQ_TYPE_LEVEL_LOW>, - <13 IRQ_TYPE_LEVEL_LOW>; + interrupts = <12 IRQ_TYPE_LEVEL_HIGH>, + <13 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq", "eth_lpi"; interrupt-parent = <&liointc0>; phy-mode = "rgmii-id"; @@ -155,8 +155,8 @@ "loongson, pci-gmac"; reg = <0x1900 0x0 0x0 0x0 0x0>; - interrupts = <14 IRQ_TYPE_LEVEL_LOW>, - <15 IRQ_TYPE_LEVEL_LOW>; + interrupts = <14 IRQ_TYPE_LEVEL_HIGH>, + <15 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq", "eth_lpi"; interrupt-parent = <&liointc0>; phy-mode = "rgmii-id"; @@ -178,7 +178,7 @@ "pciclass0c03"; reg = <0x2100 0x0 0x0 0x0 0x0>; - interrupts = <18 IRQ_TYPE_LEVEL_LOW>; + interrupts = <18 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&liointc1>; }; @@ -189,7 +189,7 @@ "pciclass0c03"; reg = <0x2200 0x0 0x0 0x0 0x0>; - interrupts = <19 IRQ_TYPE_LEVEL_LOW>; + interrupts = <19 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&liointc1>; }; @@ -200,7 +200,7 @@ "pciclass0106"; reg = <0x4000 0x0 0x0 0x0 0x0>; - interrupts = <19 IRQ_TYPE_LEVEL_LOW>; + interrupts = <19 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&liointc0>; }; @@ -215,10 +215,10 @@ #size-cells = <2>; device_type = "pci"; #interrupt-cells = <1>; - interrupts = <0 IRQ_TYPE_LEVEL_LOW>; + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&liointc1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &liointc1 0 IRQ_TYPE_LEVEL_LOW>; + interrupt-map = <0 0 0 0 &liointc1 0 IRQ_TYPE_LEVEL_HIGH>; ranges; external-facing; }; @@ -234,10 +234,10 @@ #size-cells = <2>; device_type = "pci"; #interrupt-cells = <1>; - interrupts = <1 IRQ_TYPE_LEVEL_LOW>; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&liointc1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &liointc1 1 IRQ_TYPE_LEVEL_LOW>; + interrupt-map = <0 0 0 0 &liointc1 1 IRQ_TYPE_LEVEL_HIGH>; ranges; external-facing; }; @@ -253,10 +253,10 @@ #size-cells = <2>; device_type = "pci"; #interrupt-cells = <1>; - interrupts = <2 IRQ_TYPE_LEVEL_LOW>; + interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&liointc1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &liointc1 2 IRQ_TYPE_LEVEL_LOW>; + interrupt-map = <0 0 0 0 &liointc1 2 IRQ_TYPE_LEVEL_HIGH>; ranges; external-facing; }; @@ -272,10 +272,10 @@ #size-cells = <2>; device_type = "pci"; #interrupt-cells = <1>; - interrupts = <3 IRQ_TYPE_LEVEL_LOW>; + interrupts = <3 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&liointc1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &liointc1 3 IRQ_TYPE_LEVEL_LOW>; + interrupt-map = <0 0 0 0 &liointc1 3 IRQ_TYPE_LEVEL_HIGH>; ranges; external-facing; }; @@ -291,10 +291,10 @@ #size-cells = <2>; device_type = "pci"; #interrupt-cells = <1>; - interrupts = <4 IRQ_TYPE_LEVEL_LOW>; + interrupts = <4 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&liointc1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &liointc1 4 IRQ_TYPE_LEVEL_LOW>; + interrupt-map = <0 0 0 0 &liointc1 4 IRQ_TYPE_LEVEL_HIGH>; ranges; external-facing; }; @@ -310,10 +310,10 @@ #size-cells = <2>; device_type = "pci"; #interrupt-cells = <1>; - interrupts = <5 IRQ_TYPE_LEVEL_LOW>; + interrupts = <5 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&liointc1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &liointc1 5 IRQ_TYPE_LEVEL_LOW>; + interrupt-map = <0 0 0 0 &liointc1 5 IRQ_TYPE_LEVEL_HIGH>; ranges; external-facing; }; From e531309fad00bc14afc9feee5b49b84f308c99e7 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Fri, 14 Jun 2024 16:40:11 +0100 Subject: [PATCH 36/86] MIPS: dts: loongson: Fix ls2k1000-rtc interrupt [ Upstream commit f70fd92df7529e7283e02a6c3a2510075f13ba30 ] The correct interrupt line for RTC is line 8 on liointc1. Fixes: e47084e116fc ("MIPS: Loongson64: DTS: Add RTC support to Loongson-2K1000") Cc: stable@vger.kernel.org Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi index eec8243be649..cc7747c5f21f 100644 --- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi @@ -99,8 +99,8 @@ rtc0: rtc@1fe07800 { compatible = "loongson,ls2k1000-rtc"; reg = <0 0x1fe07800 0 0x78>; - interrupt-parent = <&liointc0>; - interrupts = <60 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&liointc1>; + interrupts = <8 IRQ_TYPE_LEVEL_HIGH>; }; uart0: serial@1fe00000 { From 08a540fbfaea8d0b2d9b0c034b7760a861a4d2ff Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Tue, 9 May 2023 12:28:54 +0530 Subject: [PATCH 37/86] HID: amd_sfh: Remove duplicate cleanup [ Upstream commit e295709054d59e35be44794dd125efee528ccceb ] A number of duplicate cleanups are performed that are not necessary. As a result, remove duplicate cleanups and use common cleanup. Signed-off-by: Basavaraj Natikar Signed-off-by: Jiri Kosina Stable-dep-of: 8031b001da70 ("HID: amd_sfh: Move sensor discovery before HID device initialization") Signed-off-by: Sasha Levin --- drivers/hid/amd-sfh-hid/amd_sfh_client.c | 27 ++++-------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index c751d12f5df8..34eb419b225e 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -291,18 +291,8 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) cl_data->is_any_sensor_enabled = true; cl_data->sensor_sts[i] = SENSOR_ENABLED; rc = amdtp_hid_probe(cl_data->cur_hid_dev, cl_data); - if (rc) { - mp2_ops->stop(privdata, cl_data->sensor_idx[i]); - status = amd_sfh_wait_for_response - (privdata, cl_data->sensor_idx[i], SENSOR_DISABLED); - if (status != SENSOR_ENABLED) - cl_data->sensor_sts[i] = SENSOR_DISABLED; - dev_dbg(dev, "sid 0x%x (%s) status 0x%x\n", - cl_data->sensor_idx[i], - get_sensor_name(cl_data->sensor_idx[i]), - cl_data->sensor_sts[i]); + if (rc) goto cleanup; - } } else { cl_data->sensor_sts[i] = SENSOR_DISABLED; dev_dbg(dev, "sid 0x%x (%s) status 0x%x\n", @@ -316,25 +306,16 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) } if (!cl_data->is_any_sensor_enabled || (mp2_ops->discovery_status && mp2_ops->discovery_status(privdata) == 0)) { - amd_sfh_hid_client_deinit(privdata); - for (i = 0; i < cl_data->num_hid_devices; i++) { - devm_kfree(dev, cl_data->feature_report[i]); - devm_kfree(dev, in_data->input_report[i]); - devm_kfree(dev, cl_data->report_descr[i]); - } dev_warn(dev, "Failed to discover, sensors not enabled is %d\n", cl_data->is_any_sensor_enabled); - return -EOPNOTSUPP; + rc = -EOPNOTSUPP; + goto cleanup; } schedule_delayed_work(&cl_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP)); return 0; cleanup: + amd_sfh_hid_client_deinit(privdata); for (i = 0; i < cl_data->num_hid_devices; i++) { - if (in_data->sensor_virt_addr[i]) { - dma_free_coherent(&privdata->pdev->dev, 8 * sizeof(int), - in_data->sensor_virt_addr[i], - cl_data->sensor_dma_addr[i]); - } devm_kfree(dev, cl_data->feature_report[i]); devm_kfree(dev, in_data->input_report[i]); devm_kfree(dev, cl_data->report_descr[i]); From aba922a30cba3ea48b3e02eb36d6ff09f6ad005a Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Tue, 9 May 2023 12:28:55 +0530 Subject: [PATCH 38/86] HID: amd_sfh: Split sensor and HID initialization [ Upstream commit 5ca505c6b0259606361d8f95b0811b783d4e78f7 ] Sensors are enabled independently of HID device initialization. Sensor initialization should be kept separate in this case, while HID devices should be initialized according to the sensor state. Hence split sensor initialization and HID initialization into separate blocks. Signed-off-by: Basavaraj Natikar Signed-off-by: Jiri Kosina Stable-dep-of: 8031b001da70 ("HID: amd_sfh: Move sensor discovery before HID device initialization") Signed-off-by: Sasha Levin --- drivers/hid/amd-sfh-hid/amd_sfh_client.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index 34eb419b225e..6e65379b10d5 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -214,7 +214,7 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) struct device *dev; u32 feature_report_size; u32 input_report_size; - int rc, i, status; + int rc, i; u8 cl_idx; req_list = &cl_data->req_list; @@ -285,12 +285,15 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) if (rc) goto cleanup; mp2_ops->start(privdata, info); - status = amd_sfh_wait_for_response - (privdata, cl_data->sensor_idx[i], SENSOR_ENABLED); - if (status == SENSOR_ENABLED) { + cl_data->sensor_sts[i] = amd_sfh_wait_for_response + (privdata, cl_data->sensor_idx[i], SENSOR_ENABLED); + } + + for (i = 0; i < cl_data->num_hid_devices; i++) { + cl_data->cur_hid_dev = i; + if (cl_data->sensor_sts[i] == SENSOR_ENABLED) { cl_data->is_any_sensor_enabled = true; - cl_data->sensor_sts[i] = SENSOR_ENABLED; - rc = amdtp_hid_probe(cl_data->cur_hid_dev, cl_data); + rc = amdtp_hid_probe(i, cl_data); if (rc) goto cleanup; } else { @@ -304,6 +307,7 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) cl_data->sensor_idx[i], get_sensor_name(cl_data->sensor_idx[i]), cl_data->sensor_sts[i]); } + if (!cl_data->is_any_sensor_enabled || (mp2_ops->discovery_status && mp2_ops->discovery_status(privdata) == 0)) { dev_warn(dev, "Failed to discover, sensors not enabled is %d\n", cl_data->is_any_sensor_enabled); From c14acf517cee4d51dafb3dc3bf402ea7f3a03348 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Thu, 18 Jul 2024 16:46:16 +0530 Subject: [PATCH 39/86] HID: amd_sfh: Move sensor discovery before HID device initialization [ Upstream commit 8031b001da700474c11d28629581480b12a0d8d4 ] Sensors discovery is independent of HID device initialization. If sensor discovery fails after HID initialization, then the HID device needs to be deinitialized. Therefore, sensors discovery should be moved before HID device initialization. Fixes: 7bcfdab3f0c6 ("HID: amd_sfh: if no sensors are enabled, clean up") Tested-by: Aurinko Signed-off-by: Basavaraj Natikar Link: https://patch.msgid.link/20240718111616.3012155-1-Basavaraj.Natikar@amd.com Signed-off-by: Benjamin Tissoires Signed-off-by: Sasha Levin --- drivers/hid/amd-sfh-hid/amd_sfh_client.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index 6e65379b10d5..4343fef7dd83 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -287,12 +287,22 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) mp2_ops->start(privdata, info); cl_data->sensor_sts[i] = amd_sfh_wait_for_response (privdata, cl_data->sensor_idx[i], SENSOR_ENABLED); + + if (cl_data->sensor_sts[i] == SENSOR_ENABLED) + cl_data->is_any_sensor_enabled = true; + } + + if (!cl_data->is_any_sensor_enabled || + (mp2_ops->discovery_status && mp2_ops->discovery_status(privdata) == 0)) { + dev_warn(dev, "Failed to discover, sensors not enabled is %d\n", + cl_data->is_any_sensor_enabled); + rc = -EOPNOTSUPP; + goto cleanup; } for (i = 0; i < cl_data->num_hid_devices; i++) { cl_data->cur_hid_dev = i; if (cl_data->sensor_sts[i] == SENSOR_ENABLED) { - cl_data->is_any_sensor_enabled = true; rc = amdtp_hid_probe(i, cl_data); if (rc) goto cleanup; @@ -308,12 +318,6 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) cl_data->sensor_sts[i]); } - if (!cl_data->is_any_sensor_enabled || - (mp2_ops->discovery_status && mp2_ops->discovery_status(privdata) == 0)) { - dev_warn(dev, "Failed to discover, sensors not enabled is %d\n", cl_data->is_any_sensor_enabled); - rc = -EOPNOTSUPP; - goto cleanup; - } schedule_delayed_work(&cl_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP)); return 0; From ebebba4d357b6c67f96776a48ddbaf0060fa4c10 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 18 Jul 2024 18:58:46 +0200 Subject: [PATCH 40/86] drm/nouveau: prime: fix refcount underflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a9bf3efc33f1fbf88787a277f7349459283c9b95 ] Calling nouveau_bo_ref() on a nouveau_bo without initializing it (and hence the backing ttm_bo) leads to a refcount underflow. Instead of calling nouveau_bo_ref() in the unwind path of drm_gem_object_init(), clean things up manually. Fixes: ab9ccb96a6e6 ("drm/nouveau: use prime helpers") Reviewed-by: Ben Skeggs Reviewed-by: Christian König Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240718165959.3983-2-dakr@kernel.org (cherry picked from commit 1b93f3e89d03cfc576636e195466a0d728ad8de5) Signed-off-by: Danilo Krummrich Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_prime.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c index 9608121e49b7..8340d55aaa98 100644 --- a/drivers/gpu/drm/nouveau/nouveau_prime.c +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c @@ -63,7 +63,8 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev, * to the caller, instead of a normal nouveau_bo ttm reference. */ ret = drm_gem_object_init(dev, &nvbo->bo.base, size); if (ret) { - nouveau_bo_ref(NULL, &nvbo); + drm_gem_object_release(&nvbo->bo.base); + kfree(nvbo); obj = ERR_PTR(-ENOMEM); goto unlock; } From e3ccbb76e1e2f79b5db8d691e17ac844a9e71ee0 Mon Sep 17 00:00:00 2001 From: Ian Forbes Date: Fri, 19 Jul 2024 11:36:27 -0500 Subject: [PATCH 41/86] drm/vmwgfx: Fix overlay when using Screen Targets [ Upstream commit cb372a505a994cb39aa75acfb8b3bcf94787cf94 ] This code was never updated to support Screen Targets. Fixes a bug where Xv playback displays a green screen instead of actual video contents when 3D acceleration is disabled in the guest. Fixes: c8261a961ece ("vmwgfx: Major KMS refactoring / cleanup in preparation of screen targets") Reported-by: Doug Brown Closes: https://lore.kernel.org/all/bd9cb3c7-90e8-435d-bc28-0e38fee58977@schmorgal.com Signed-off-by: Ian Forbes Tested-by: Doug Brown Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20240719163627.20888-1-ian.forbes@broadcom.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c index abc354ead4e8..5dcddcb59a6f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c @@ -98,7 +98,7 @@ static int vmw_overlay_send_put(struct vmw_private *dev_priv, { struct vmw_escape_video_flush *flush; size_t fifo_size; - bool have_so = (dev_priv->active_display_unit == vmw_du_screen_object); + bool have_so = (dev_priv->active_display_unit != vmw_du_legacy); int i, num_items; SVGAGuestPtr ptr; From cb1b65d0e11df681df814d8a3b9729b277c2e465 Mon Sep 17 00:00:00 2001 From: Ian Forbes Date: Mon, 24 Jun 2024 15:59:51 -0500 Subject: [PATCH 42/86] drm/vmwgfx: Trigger a modeset when the screen moves [ Upstream commit 75c3e8a26a35d4f3eee299b3cc7e465f166f4e2d ] When multi-monitor is cycled the X,Y position of the Screen Target will likely change but the resolution will not. We need to trigger a modeset when this occurs in order to recreate the Screen Target with the correct X,Y position. Fixes a bug where multiple displays are shown in a single scrollable host window rather than in 2+ windows on separate host displays. Fixes: 426826933109 ("drm/vmwgfx: Filter modes which exceed graphics memory") Signed-off-by: Ian Forbes Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20240624205951.23343-1-ian.forbes@broadcom.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 29 +++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index 6dd33d1258d1..e98fde90f4e0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -1015,6 +1015,32 @@ vmw_stdu_connector_mode_valid(struct drm_connector *connector, return MODE_OK; } +/* + * Trigger a modeset if the X,Y position of the Screen Target changes. + * This is needed when multi-mon is cycled. The original Screen Target will have + * the same mode but its relative X,Y position in the topology will change. + */ +static int vmw_stdu_connector_atomic_check(struct drm_connector *conn, + struct drm_atomic_state *state) +{ + struct drm_connector_state *conn_state; + struct vmw_screen_target_display_unit *du; + struct drm_crtc_state *new_crtc_state; + + conn_state = drm_atomic_get_connector_state(state, conn); + du = vmw_connector_to_stdu(conn); + + if (!conn_state->crtc) + return 0; + + new_crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); + if (du->base.gui_x != du->base.set_gui_x || + du->base.gui_y != du->base.set_gui_y) + new_crtc_state->mode_changed = true; + + return 0; +} + static const struct drm_connector_funcs vmw_stdu_connector_funcs = { .dpms = vmw_du_connector_dpms, .detect = vmw_du_connector_detect, @@ -1029,7 +1055,8 @@ static const struct drm_connector_funcs vmw_stdu_connector_funcs = { static const struct drm_connector_helper_funcs vmw_stdu_connector_helper_funcs = { .get_modes = vmw_connector_get_modes, - .mode_valid = vmw_stdu_connector_mode_valid + .mode_valid = vmw_stdu_connector_mode_valid, + .atomic_check = vmw_stdu_connector_atomic_check, }; From d06daf0ad645d9225a3ff6958dd82e1f3988fa64 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 Jul 2024 09:27:45 +0000 Subject: [PATCH 43/86] sched: act_ct: take care of padding in struct zones_ht_key [ Upstream commit 2191a54f63225b548fd8346be3611c3219a24738 ] Blamed commit increased lookup key size from 2 bytes to 16 bytes, because zones_ht_key got a struct net pointer. Make sure rhashtable_lookup() is not using the padding bytes which are not initialized. BUG: KMSAN: uninit-value in rht_ptr_rcu include/linux/rhashtable.h:376 [inline] BUG: KMSAN: uninit-value in __rhashtable_lookup include/linux/rhashtable.h:607 [inline] BUG: KMSAN: uninit-value in rhashtable_lookup include/linux/rhashtable.h:646 [inline] BUG: KMSAN: uninit-value in rhashtable_lookup_fast include/linux/rhashtable.h:672 [inline] BUG: KMSAN: uninit-value in tcf_ct_flow_table_get+0x611/0x2260 net/sched/act_ct.c:329 rht_ptr_rcu include/linux/rhashtable.h:376 [inline] __rhashtable_lookup include/linux/rhashtable.h:607 [inline] rhashtable_lookup include/linux/rhashtable.h:646 [inline] rhashtable_lookup_fast include/linux/rhashtable.h:672 [inline] tcf_ct_flow_table_get+0x611/0x2260 net/sched/act_ct.c:329 tcf_ct_init+0xa67/0x2890 net/sched/act_ct.c:1408 tcf_action_init_1+0x6cc/0xb30 net/sched/act_api.c:1425 tcf_action_init+0x458/0xf00 net/sched/act_api.c:1488 tcf_action_add net/sched/act_api.c:2061 [inline] tc_ctl_action+0x4be/0x19d0 net/sched/act_api.c:2118 rtnetlink_rcv_msg+0x12fc/0x1410 net/core/rtnetlink.c:6647 netlink_rcv_skb+0x375/0x650 net/netlink/af_netlink.c:2550 rtnetlink_rcv+0x34/0x40 net/core/rtnetlink.c:6665 netlink_unicast_kernel net/netlink/af_netlink.c:1331 [inline] netlink_unicast+0xf52/0x1260 net/netlink/af_netlink.c:1357 netlink_sendmsg+0x10da/0x11e0 net/netlink/af_netlink.c:1901 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:745 ____sys_sendmsg+0x877/0xb60 net/socket.c:2597 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2651 __sys_sendmsg net/socket.c:2680 [inline] __do_sys_sendmsg net/socket.c:2689 [inline] __se_sys_sendmsg net/socket.c:2687 [inline] __x64_sys_sendmsg+0x307/0x4a0 net/socket.c:2687 x64_sys_call+0x2dd6/0x3c10 arch/x86/include/generated/asm/syscalls_64.h:47 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Local variable key created at: tcf_ct_flow_table_get+0x4a/0x2260 net/sched/act_ct.c:324 tcf_ct_init+0xa67/0x2890 net/sched/act_ct.c:1408 Fixes: 88c67aeb1407 ("sched: act_ct: add netns into the key of tcf_ct_flow_table") Reported-by: syzbot+1b5e4e187cc586d05ea0@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Cc: Xin Long Reviewed-by: Simon Horman Reviewed-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/act_ct.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 44ff7f356ec1..9594dbc32165 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -42,6 +42,8 @@ static DEFINE_MUTEX(zones_mutex); struct zones_ht_key { struct net *net; u16 zone; + /* Note : pad[] must be the last field. */ + u8 pad[]; }; struct tcf_ct_flow_table { @@ -58,7 +60,7 @@ struct tcf_ct_flow_table { static const struct rhashtable_params zones_params = { .head_offset = offsetof(struct tcf_ct_flow_table, node), .key_offset = offsetof(struct tcf_ct_flow_table, key), - .key_len = sizeof_field(struct tcf_ct_flow_table, key), + .key_len = offsetof(struct zones_ht_key, pad), .automatic_shrinking = true, }; From 7e4a051ac21e8c2642b9892eba46ab15728bf410 Mon Sep 17 00:00:00 2001 From: songxiebing Date: Fri, 26 Jul 2024 18:07:26 +0800 Subject: [PATCH 44/86] ALSA: hda: conexant: Fix headset auto detect fail in the polling mode [ Upstream commit e60dc98122110594d0290845160f12916192fc6d ] The previous fix (7aeb25908648) only handles the unsol_event reporting during interrupts and does not include the polling mode used to set jackroll_ms, so now we are replacing it with snd_hda_jack_detect_enable_callback. Fixes: 7aeb25908648 ("ALSA: hda/conexant: Fix headset auto detect fail in cx8070 and SN6140") Co-developed-by: bo liu Signed-off-by: bo liu Signed-off-by: songxiebing Link: https://patch.msgid.link/20240726100726.50824-1-soxiebing@163.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_conexant.c | 54 ++++++---------------------------- 1 file changed, 9 insertions(+), 45 deletions(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index e8209178d87b..af921364195e 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -21,12 +21,6 @@ #include "hda_jack.h" #include "hda_generic.h" -enum { - CX_HEADSET_NOPRESENT = 0, - CX_HEADSET_PARTPRESENT, - CX_HEADSET_ALLPRESENT, -}; - struct conexant_spec { struct hda_gen_spec gen; @@ -48,7 +42,6 @@ struct conexant_spec { unsigned int gpio_led; unsigned int gpio_mute_led_mask; unsigned int gpio_mic_led_mask; - unsigned int headset_present_flag; bool is_cx8070_sn6140; }; @@ -250,48 +243,19 @@ static void cx_process_headset_plugin(struct hda_codec *codec) } } -static void cx_update_headset_mic_vref(struct hda_codec *codec, unsigned int res) +static void cx_update_headset_mic_vref(struct hda_codec *codec, struct hda_jack_callback *event) { - unsigned int phone_present, mic_persent, phone_tag, mic_tag; - struct conexant_spec *spec = codec->spec; + unsigned int mic_present; /* In cx8070 and sn6140, the node 16 can only be config to headphone or disabled, * the node 19 can only be config to microphone or disabled. * Check hp&mic tag to process headset pulgin&plugout. */ - phone_tag = snd_hda_codec_read(codec, 0x16, 0, AC_VERB_GET_UNSOLICITED_RESPONSE, 0x0); - mic_tag = snd_hda_codec_read(codec, 0x19, 0, AC_VERB_GET_UNSOLICITED_RESPONSE, 0x0); - if ((phone_tag & (res >> AC_UNSOL_RES_TAG_SHIFT)) || - (mic_tag & (res >> AC_UNSOL_RES_TAG_SHIFT))) { - phone_present = snd_hda_codec_read(codec, 0x16, 0, AC_VERB_GET_PIN_SENSE, 0x0); - if (!(phone_present & AC_PINSENSE_PRESENCE)) {/* headphone plugout */ - spec->headset_present_flag = CX_HEADSET_NOPRESENT; - snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20); - return; - } - if (spec->headset_present_flag == CX_HEADSET_NOPRESENT) { - spec->headset_present_flag = CX_HEADSET_PARTPRESENT; - } else if (spec->headset_present_flag == CX_HEADSET_PARTPRESENT) { - mic_persent = snd_hda_codec_read(codec, 0x19, 0, - AC_VERB_GET_PIN_SENSE, 0x0); - /* headset is present */ - if ((phone_present & AC_PINSENSE_PRESENCE) && - (mic_persent & AC_PINSENSE_PRESENCE)) { - cx_process_headset_plugin(codec); - spec->headset_present_flag = CX_HEADSET_ALLPRESENT; - } - } - } -} - -static void cx_jack_unsol_event(struct hda_codec *codec, unsigned int res) -{ - struct conexant_spec *spec = codec->spec; - - if (spec->is_cx8070_sn6140) - cx_update_headset_mic_vref(codec, res); - - snd_hda_jack_unsol_event(codec, res); + mic_present = snd_hda_codec_read(codec, 0x19, 0, AC_VERB_GET_PIN_SENSE, 0x0); + if (!(mic_present & AC_PINSENSE_PRESENCE)) /* mic plugout */ + snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20); + else + cx_process_headset_plugin(codec); } #ifdef CONFIG_PM @@ -307,7 +271,7 @@ static const struct hda_codec_ops cx_auto_patch_ops = { .build_pcms = snd_hda_gen_build_pcms, .init = cx_auto_init, .free = cx_auto_free, - .unsol_event = cx_jack_unsol_event, + .unsol_event = snd_hda_jack_unsol_event, #ifdef CONFIG_PM .suspend = cx_auto_suspend, .check_power_status = snd_hda_gen_check_power_status, @@ -1167,7 +1131,7 @@ static int patch_conexant_auto(struct hda_codec *codec) case 0x14f11f86: case 0x14f11f87: spec->is_cx8070_sn6140 = true; - spec->headset_present_flag = CX_HEADSET_NOPRESENT; + snd_hda_jack_detect_enable_callback(codec, 0x19, cx_update_headset_mic_vref); break; } From e683b94a9caf34d961ad2b69efd5af6d9de4fa82 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 15 Jul 2024 10:40:03 -0400 Subject: [PATCH 45/86] Bluetooth: hci_sync: Fix suspending with wrong filter policy [ Upstream commit 96b82af36efaa1787946e021aa3dc5410c05beeb ] When suspending the scan filter policy cannot be 0x00 (no acceptlist) since that means the host has to process every advertisement report waking up the system, so this attempts to check if hdev is marked as suspended and if the resulting filter policy would be 0x00 (no acceptlist) then skip passive scanning if thre no devices in the acceptlist otherwise reset the filter policy to 0x01 so the acceptlist is used since the devices programmed there can still wakeup be system. Fixes: 182ee45da083 ("Bluetooth: hci_sync: Rework hci_suspend_notifier") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_sync.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 57302021b7eb..320fc1e6dff2 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2837,6 +2837,27 @@ static int hci_passive_scan_sync(struct hci_dev *hdev) */ filter_policy = hci_update_accept_list_sync(hdev); + /* If suspended and filter_policy set to 0x00 (no acceptlist) then + * passive scanning cannot be started since that would require the host + * to be woken up to process the reports. + */ + if (hdev->suspended && !filter_policy) { + /* Check if accept list is empty then there is no need to scan + * while suspended. + */ + if (list_empty(&hdev->le_accept_list)) + return 0; + + /* If there are devices is the accept_list that means some + * devices could not be programmed which in non-suspended case + * means filter_policy needs to be set to 0x00 so the host needs + * to filter, but since this is treating suspended case we + * can ignore device needing host to filter to allow devices in + * the acceptlist to be able to wakeup the system. + */ + filter_policy = 0x01; + } + /* When the controller is using random resolvable addresses and * with that having LE privacy enabled, then controllers with * Extended Scanner Filter Policies support can now enable support From da391e9733413b299ea25610610862b3dfb2b355 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Fri, 26 Jul 2024 15:06:50 +0800 Subject: [PATCH 46/86] net: axienet: start napi before enabling Rx/Tx [ Upstream commit 799a829507506924add8a7620493adc1c3cfda30 ] softirq may get lost if an Rx interrupt comes before we call napi_enable. Move napi_enable in front of axienet_setoptions(), which turns on the device, to address the issue. Link: https://lists.gnu.org/archive/html/qemu-devel/2024-07/msg06160.html Fixes: cc37610caaf8 ("net: axienet: implement NAPI and GRO receive") Signed-off-by: Andy Chiu Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 5ea9dc251dd9..ff777735be66 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1825,9 +1825,9 @@ static void axienet_dma_err_handler(struct work_struct *work) ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); axienet_set_mac_address(ndev, NULL); axienet_set_multicast_list(ndev); - axienet_setoptions(ndev, lp->options); napi_enable(&lp->napi_rx); napi_enable(&lp->napi_tx); + axienet_setoptions(ndev, lp->options); } /** From 072e4646e625eb3c660338afddc4577ac4fd8f37 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 26 Jul 2024 17:19:53 -0700 Subject: [PATCH 47/86] rtnetlink: Don't ignore IFLA_TARGET_NETNSID when ifname is specified in rtnl_dellink(). [ Upstream commit 9415d375d8520e0ed55f0c0b058928da9a5b5b3d ] The cited commit accidentally replaced tgt_net with net in rtnl_dellink(). As a result, IFLA_TARGET_NETNSID is ignored if the interface is specified with IFLA_IFNAME or IFLA_ALT_IFNAME. Let's pass tgt_net to rtnl_dev_get(). Fixes: cc6090e985d7 ("net: rtnetlink: introduce helper to get net_device instance by ifname") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1163226c025c..be663a7382ce 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3178,7 +3178,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, if (ifm->ifi_index > 0) dev = __dev_get_by_index(tgt_net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) - dev = rtnl_dev_get(net, tb); + dev = rtnl_dev_get(tgt_net, tb); else if (tb[IFLA_GROUP]) err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP])); else From 3dbc58774e581e07f0b48a0b9fd6cfe31672eca5 Mon Sep 17 00:00:00 2001 From: Michal Kubiak Date: Fri, 26 Jul 2024 20:17:09 +0200 Subject: [PATCH 48/86] ice: respect netif readiness in AF_XDP ZC related ndo's [ Upstream commit ec145a18687fec8dd97eeb4f30057fa4debef577 ] Address a scenario in which XSK ZC Tx produces descriptors to XDP Tx ring when link is either not yet fully initialized or process of stopping the netdev has already started. To avoid this, add checks against carrier readiness in ice_xsk_wakeup() and in ice_xmit_zc(). One could argue that bailing out early in ice_xsk_wakeup() would be sufficient but given the fact that we produce Tx descriptors on behalf of NAPI that is triggered for Rx traffic, the latter is also needed. Bringing link up is an asynchronous event executed within ice_service_task so even though interface has been brought up there is still a time frame where link is not yet ok. Without this patch, when AF_XDP ZC Tx is used simultaneously with stack Tx, Tx timeouts occur after going through link flap (admin brings interface down then up again). HW seem to be unable to transmit descriptor to the wire after HW tail register bump which in turn causes bit __QUEUE_STATE_STACK_XOFF to be set forever as netdev_tx_completed_queue() sees no cleaned bytes on the input. Fixes: 126cdfe1007a ("ice: xsk: Improve AF_XDP ZC Tx and use batching API") Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Reviewed-by: Shannon Nelson Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Michal Kubiak Signed-off-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_xsk.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index b917f271cdac..61e4730bba59 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -937,6 +937,10 @@ bool ice_xmit_zc(struct ice_tx_ring *xdp_ring) ice_clean_xdp_irq_zc(xdp_ring); + if (!netif_carrier_ok(xdp_ring->vsi->netdev) || + !netif_running(xdp_ring->vsi->netdev)) + return true; + budget = ICE_DESC_UNUSED(xdp_ring); budget = min_t(u16, budget, ICE_RING_QUARTER(xdp_ring)); @@ -980,7 +984,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, struct ice_vsi *vsi = np->vsi; struct ice_tx_ring *ring; - if (test_bit(ICE_VSI_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state) || !netif_carrier_ok(netdev)) return -ENETDOWN; if (!ice_is_xdp_ena_vsi(vsi)) From 15115033f056cbd7649b8e1806287f71bdb7ce5c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 26 Jul 2024 20:17:10 +0200 Subject: [PATCH 49/86] ice: don't busy wait for Rx queue disable in ice_qp_dis() [ Upstream commit 1ff72a2f67791cd4ddad19ed830445f57b30e992 ] When ice driver is spammed with multiple xdpsock instances and flow control is enabled, there are cases when Rx queue gets stuck and unable to reflect the disable state in QRX_CTRL register. Similar issue has previously been addressed in commit 13a6233b033f ("ice: Add support to enable/disable all Rx queues before waiting"). To workaround this, let us simply not wait for a disabled state as later patch will make sure that regardless of the encountered error in the process of disabling a queue pair, the Rx queue will be enabled. Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Reviewed-by: Shannon Nelson Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_xsk.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 61e4730bba59..ebc017dd245f 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -191,10 +191,8 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) if (err) return err; } - err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true); - if (err) - return err; + ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, false); ice_qp_clean_rings(vsi, q_idx); ice_qp_reset_stats(vsi, q_idx); From 8782f0fcb19dc59804d1bfea5a39343410328d20 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 26 Jul 2024 20:17:11 +0200 Subject: [PATCH 50/86] ice: replace synchronize_rcu with synchronize_net [ Upstream commit 405d9999aa0b4ae467ef391d1d9c7e0d30ad0841 ] Given that ice_qp_dis() is called under rtnl_lock, synchronize_net() can be called instead of synchronize_rcu() so that XDP rings can finish its job in a faster way. Also let us do this as earlier in XSK queue disable flow. Additionally, turn off regular Tx queue before disabling irqs and NAPI. Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Reviewed-by: Shannon Nelson Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_xsk.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index ebc017dd245f..2677d7c86a6d 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -41,10 +41,8 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx) { ice_clean_tx_ring(vsi->tx_rings[q_idx]); - if (ice_is_xdp_ena_vsi(vsi)) { - synchronize_rcu(); + if (ice_is_xdp_ena_vsi(vsi)) ice_clean_tx_ring(vsi->xdp_rings[q_idx]); - } ice_clean_rx_ring(vsi->rx_rings[q_idx]); } @@ -172,11 +170,12 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) usleep_range(1000, 2000); } + synchronize_net(); + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + ice_qvec_dis_irq(vsi, rx_ring, q_vector); ice_qvec_toggle_napi(vsi, q_vector, false); - netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); - ice_fill_txq_meta(vsi, tx_ring, &txq_meta); err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta); if (err) From 5a80b682e3e161784edf1550de9b8602a5013140 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 26 Jul 2024 20:17:15 +0200 Subject: [PATCH 51/86] ice: add missing WRITE_ONCE when clearing ice_rx_ring::xdp_prog [ Upstream commit 6044ca26210ba72b3dcc649fae1cbedd9e6ab018 ] It is read by data path and modified from process context on remote cpu so it is needed to use WRITE_ONCE to clear the pointer. Fixes: efc2214b6047 ("ice: Add support for XDP") Reviewed-by: Shannon Nelson Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index dbe80e5053a8..bd62781191b3 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -454,7 +454,7 @@ void ice_free_rx_ring(struct ice_rx_ring *rx_ring) if (rx_ring->vsi->type == ICE_VSI_PF) if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - rx_ring->xdp_prog = NULL; + WRITE_ONCE(rx_ring->xdp_prog, NULL); if (rx_ring->xsk_pool) { kfree(rx_ring->xdp_buf); rx_ring->xdp_buf = NULL; From 8b424c9e44111c5a76f41c6b741f8d4c4179d876 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Mon, 29 Jul 2024 14:28:16 +0200 Subject: [PATCH 52/86] net/iucv: fix use after free in iucv_sock_close() [ Upstream commit f558120cd709682b739207b48cf7479fd9568431 ] iucv_sever_path() is called from process context and from bh context. iucv->path is used as indicator whether somebody else is taking care of severing the path (or it is already removed / never existed). This needs to be done with atomic compare and swap, otherwise there is a small window where iucv_sock_close() will try to work with a path that has already been severed and freed by iucv_callback_connrej() called by iucv_tasklet_fn(). Example: [452744.123844] Call Trace: [452744.123845] ([<0000001e87f03880>] 0x1e87f03880) [452744.123966] [<00000000d593001e>] iucv_path_sever+0x96/0x138 [452744.124330] [<000003ff801ddbca>] iucv_sever_path+0xc2/0xd0 [af_iucv] [452744.124336] [<000003ff801e01b6>] iucv_sock_close+0xa6/0x310 [af_iucv] [452744.124341] [<000003ff801e08cc>] iucv_sock_release+0x3c/0xd0 [af_iucv] [452744.124345] [<00000000d574794e>] __sock_release+0x5e/0xe8 [452744.124815] [<00000000d5747a0c>] sock_close+0x34/0x48 [452744.124820] [<00000000d5421642>] __fput+0xba/0x268 [452744.124826] [<00000000d51b382c>] task_work_run+0xbc/0xf0 [452744.124832] [<00000000d5145710>] do_notify_resume+0x88/0x90 [452744.124841] [<00000000d5978096>] system_call+0xe2/0x2c8 [452744.125319] Last Breaking-Event-Address: [452744.125321] [<00000000d5930018>] iucv_path_sever+0x90/0x138 [452744.125324] [452744.125325] Kernel panic - not syncing: Fatal exception in interrupt Note that bh_lock_sock() is not serializing the tasklet context against process context, because the check for sock_owned_by_user() and corresponding handling is missing. Ideas for a future clean-up patch: A) Correct usage of bh_lock_sock() in tasklet context, as described in Link: https://lore.kernel.org/netdev/1280155406.2899.407.camel@edumazet-laptop/ Re-enqueue, if needed. This may require adding return values to the tasklet functions and thus changes to all users of iucv. B) Change iucv tasklet into worker and use only lock_sock() in af_iucv. Fixes: 7d316b945352 ("af_iucv: remove IUCV-pathes completely") Reviewed-by: Halil Pasic Signed-off-by: Alexandra Winter Link: https://patch.msgid.link/20240729122818.947756-1-wintera@linux.ibm.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/iucv/af_iucv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 498a0c35b7bb..815b1df0b2d1 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -335,8 +335,8 @@ static void iucv_sever_path(struct sock *sk, int with_user_data) struct iucv_sock *iucv = iucv_sk(sk); struct iucv_path *path = iucv->path; - if (iucv->path) { - iucv->path = NULL; + /* Whoever resets the path pointer, must sever and free it. */ + if (xchg(&iucv->path, NULL)) { if (with_user_data) { low_nmcpy(user_data, iucv->src_name); high_nmcpy(user_data, iucv->dst_name); From c786c37354faad5e56ba62b3f2d4c6a606a51962 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Tue, 30 Jul 2024 09:25:05 +0530 Subject: [PATCH 53/86] drm/i915/hdcp: Fix HDCP2_STREAM_STATUS macro [ Upstream commit 555069117390a5d581863bc797fb546bb4417c31 ] Fix HDCP2_STREAM_STATUS macro, it called pipe instead of port never threw a compile error as no one used it. --v2 -Add Fixes [Jani] Fixes: d631b984cc90 ("drm/i915/hdcp: Add HDCP 2.2 stream register") Signed-off-by: Suraj Kandpal Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240730035505.3759899-1-suraj.kandpal@intel.com (cherry picked from commit 73d7cd542bbd0a7c6881ea0df5255f190a1e7236) Signed-off-by: Joonas Lahtinen Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/display/intel_hdcp_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h index 2a3733e8966c..2702cc8c88d8 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h +++ b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h @@ -249,7 +249,7 @@ #define HDCP2_STREAM_STATUS(dev_priv, trans, port) \ (GRAPHICS_VER(dev_priv) >= 12 ? \ TRANS_HDCP2_STREAM_STATUS(trans) : \ - PIPE_HDCP2_STREAM_STATUS(pipe)) + PIPE_HDCP2_STREAM_STATUS(port)) #define _PORTA_HDCP2_AUTH_STREAM 0x66F00 #define _PORTB_HDCP2_AUTH_STREAM 0x66F04 From 9bd159d3e56ab7f6e32ed362abb1c907329e4ca5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 24 Jul 2024 11:06:56 -0500 Subject: [PATCH 54/86] net: mvpp2: Don't re-use loop iterator [ Upstream commit 0aa3ca956c46d849775eae1816cef8fe4bc8b50e ] This function has a nested loop. The problem is that both the inside and outside loop use the same variable as an iterator. I found this via static analysis so I'm not sure the impact. It could be that it loops forever or, more likely, the loop exits early. Fixes: 3a616b92a9d1 ("net: mvpp2: Add TX flow control support for jumbo frames") Signed-off-by: Dan Carpenter Reviewed-by: Simon Horman Link: https://patch.msgid.link/eaa8f403-7779-4d81-973d-a9ecddc0bf6f@stanley.mountain Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 2f80ee84c7ec..bbcdab562513 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -953,13 +953,13 @@ static void mvpp2_bm_pool_update_fc(struct mvpp2_port *port, static void mvpp2_bm_pool_update_priv_fc(struct mvpp2 *priv, bool en) { struct mvpp2_port *port; - int i; + int i, j; for (i = 0; i < priv->port_count; i++) { port = priv->port_list[i]; if (port->priv->percpu_pools) { - for (i = 0; i < port->nrxqs; i++) - mvpp2_bm_pool_update_fc(port, &port->priv->bm_pools[i], + for (j = 0; j < port->nrxqs; j++) + mvpp2_bm_pool_update_fc(port, &port->priv->bm_pools[j], port->tx_fc & en); } else { mvpp2_bm_pool_update_fc(port, port->pool_long, port->tx_fc & en); From 92afcc310038ebe5d66c689bb0bf418f5451201c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 31 Jul 2024 19:05:15 +0200 Subject: [PATCH 55/86] ALSA: hda: Conditionally use snooping for AMD HDMI [ Upstream commit 478689b5990deb626a0b3f1ebf165979914d6be4 ] The recent regression report revealed that the use of WC pages for AMD HDMI device together with AMD IOMMU leads to unexpected truncation or noises. The issue seems triggered by the change in the kernel core memory allocation that enables IOMMU driver to use always S/G buffers. Meanwhile, the use of WC pages has been a workaround for the similar issue with standard pages in the past. So, now we need to apply the workaround conditionally, namely, only when IOMMU isn't in place. This patch modifies the workaround code to check the DMA ops at first and apply the snoop-off only when needed. Fixes: f5ff79fddf0e ("dma-mapping: remove CONFIG_DMA_REMAP") Link: https://bugzilla.kernel.org/show_bug.cgi?id=219087 Link: https://patch.msgid.link/20240731170521.31714-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/hda_controller.h | 2 +- sound/pci/hda/hda_intel.c | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index 8556031bcd68..f31cb31d4636 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -28,7 +28,7 @@ #else #define AZX_DCAPS_I915_COMPONENT 0 /* NOP */ #endif -/* 14 unused */ +#define AZX_DCAPS_AMD_ALLOC_FIX (1 << 14) /* AMD allocation workaround */ #define AZX_DCAPS_CTX_WORKAROUND (1 << 15) /* X-Fi workaround */ #define AZX_DCAPS_POSFIX_LPIB (1 << 16) /* Use LPIB as default */ #define AZX_DCAPS_AMD_WORKAROUND (1 << 17) /* AMD-specific workaround */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index a26f2a2d44cf..695026c647e1 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -40,6 +40,7 @@ #ifdef CONFIG_X86 /* for snoop control */ +#include #include #include #endif @@ -300,7 +301,7 @@ enum { /* quirks for ATI HDMI with snoop off */ #define AZX_DCAPS_PRESET_ATI_HDMI_NS \ - (AZX_DCAPS_PRESET_ATI_HDMI | AZX_DCAPS_SNOOP_OFF) + (AZX_DCAPS_PRESET_ATI_HDMI | AZX_DCAPS_AMD_ALLOC_FIX) /* quirks for AMD SB */ #define AZX_DCAPS_PRESET_AMD_SB \ @@ -1718,6 +1719,13 @@ static void azx_check_snoop_available(struct azx *chip) if (chip->driver_caps & AZX_DCAPS_SNOOP_OFF) snoop = false; +#ifdef CONFIG_X86 + /* check the presence of DMA ops (i.e. IOMMU), disable snoop conditionally */ + if ((chip->driver_caps & AZX_DCAPS_AMD_ALLOC_FIX) && + !get_dma_ops(chip->card->dev)) + snoop = false; +#endif + chip->snoop = snoop; if (!snoop) { dev_info(chip->card->dev, "Force to non-snoop mode\n"); From 95590a4929027769af35b153645c0ab6fd22b29b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 25 Jul 2024 12:28:20 -0700 Subject: [PATCH 56/86] netfilter: iptables: Fix null-ptr-deref in iptable_nat_table_init(). [ Upstream commit 5830aa863981d43560748aa93589c0695191d95d ] We had a report that iptables-restore sometimes triggered null-ptr-deref at boot time. [0] The problem is that iptable_nat_table_init() is exposed to user space before the kernel fully initialises netns. In the small race window, a user could call iptable_nat_table_init() that accesses net_generic(net, iptable_nat_net_id), which is available only after registering iptable_nat_net_ops. Let's call register_pernet_subsys() before xt_register_template(). [0]: bpfilter: Loaded bpfilter_umh pid 11702 Started bpfilter BUG: kernel NULL pointer dereference, address: 0000000000000013 PF: supervisor write access in kernel mode PF: error_code(0x0002) - not-present page PGD 0 P4D 0 PREEMPT SMP NOPTI CPU: 2 PID: 11879 Comm: iptables-restor Not tainted 6.1.92-99.174.amzn2023.x86_64 #1 Hardware name: Amazon EC2 c6i.4xlarge/, BIOS 1.0 10/16/2017 RIP: 0010:iptable_nat_table_init (net/ipv4/netfilter/iptable_nat.c:87 net/ipv4/netfilter/iptable_nat.c:121) iptable_nat Code: 10 4c 89 f6 48 89 ef e8 0b 19 bb ff 41 89 c4 85 c0 75 38 41 83 c7 01 49 83 c6 28 41 83 ff 04 75 dc 48 8b 44 24 08 48 8b 0c 24 <48> 89 08 4c 89 ef e8 a2 3b a2 cf 48 83 c4 10 44 89 e0 5b 5d 41 5c RSP: 0018:ffffbef902843cd0 EFLAGS: 00010246 RAX: 0000000000000013 RBX: ffff9f4b052caa20 RCX: ffff9f4b20988d80 RDX: 0000000000000000 RSI: 0000000000000064 RDI: ffffffffc04201c0 RBP: ffff9f4b29394000 R08: ffff9f4b07f77258 R09: ffff9f4b07f77240 R10: 0000000000000000 R11: ffff9f4b09635388 R12: 0000000000000000 R13: ffff9f4b1a3c6c00 R14: ffff9f4b20988e20 R15: 0000000000000004 FS: 00007f6284340000(0000) GS:ffff9f51fe280000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000013 CR3: 00000001d10a6005 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? show_trace_log_lvl (arch/x86/kernel/dumpstack.c:259) ? show_trace_log_lvl (arch/x86/kernel/dumpstack.c:259) ? xt_find_table_lock (net/netfilter/x_tables.c:1259) ? __die_body.cold (arch/x86/kernel/dumpstack.c:478 arch/x86/kernel/dumpstack.c:420) ? page_fault_oops (arch/x86/mm/fault.c:727) ? exc_page_fault (./arch/x86/include/asm/irqflags.h:40 ./arch/x86/include/asm/irqflags.h:75 arch/x86/mm/fault.c:1470 arch/x86/mm/fault.c:1518) ? asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:570) ? iptable_nat_table_init (net/ipv4/netfilter/iptable_nat.c:87 net/ipv4/netfilter/iptable_nat.c:121) iptable_nat xt_find_table_lock (net/netfilter/x_tables.c:1259) xt_request_find_table_lock (net/netfilter/x_tables.c:1287) get_info (net/ipv4/netfilter/ip_tables.c:965) ? security_capable (security/security.c:809 (discriminator 13)) ? ns_capable (kernel/capability.c:376 kernel/capability.c:397) ? do_ipt_get_ctl (net/ipv4/netfilter/ip_tables.c:1656) ? bpfilter_send_req (net/bpfilter/bpfilter_kern.c:52) bpfilter nf_getsockopt (net/netfilter/nf_sockopt.c:116) ip_getsockopt (net/ipv4/ip_sockglue.c:1827) __sys_getsockopt (net/socket.c:2327) __x64_sys_getsockopt (net/socket.c:2342 net/socket.c:2339 net/socket.c:2339) do_syscall_64 (arch/x86/entry/common.c:51 arch/x86/entry/common.c:81) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:121) RIP: 0033:0x7f62844685ee Code: 48 8b 0d 45 28 0f 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 37 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 0a c3 66 0f 1f 84 00 00 00 00 00 48 8b 15 09 RSP: 002b:00007ffd1f83d638 EFLAGS: 00000246 ORIG_RAX: 0000000000000037 RAX: ffffffffffffffda RBX: 00007ffd1f83d680 RCX: 00007f62844685ee RDX: 0000000000000040 RSI: 0000000000000000 RDI: 0000000000000004 RBP: 0000000000000004 R08: 00007ffd1f83d670 R09: 0000558798ffa2a0 R10: 00007ffd1f83d680 R11: 0000000000000246 R12: 00007ffd1f83e3b2 R13: 00007f628455baa0 R14: 00007ffd1f83d7b0 R15: 00007f628457a008 Modules linked in: iptable_nat(+) bpfilter rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache veth xt_state xt_connmark xt_nat xt_statistic xt_MASQUERADE xt_mark xt_addrtype ipt_REJECT nf_reject_ipv4 nft_chain_nat nf_nat xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xt_comment nft_compat nf_tables nfnetlink overlay nls_ascii nls_cp437 vfat fat ghash_clmulni_intel aesni_intel ena crypto_simd ptp cryptd i8042 pps_core serio button sunrpc sch_fq_codel configfs loop dm_mod fuse dax dmi_sysfs crc32_pclmul crc32c_intel efivarfs CR2: 0000000000000013 Fixes: fdacd57c79b7 ("netfilter: x_tables: never register tables by default") Reported-by: Takahiro Kawahara Signed-off-by: Kuniyuki Iwashima Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/ipv4/netfilter/iptable_nat.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 56f6ecc43451..12ca666d6e2c 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -145,25 +145,27 @@ static struct pernet_operations iptable_nat_net_ops = { static int __init iptable_nat_init(void) { - int ret = xt_register_template(&nf_nat_ipv4_table, - iptable_nat_table_init); + int ret; + /* net->gen->ptr[iptable_nat_net_id] must be allocated + * before calling iptable_nat_table_init(). + */ + ret = register_pernet_subsys(&iptable_nat_net_ops); if (ret < 0) return ret; - ret = register_pernet_subsys(&iptable_nat_net_ops); - if (ret < 0) { - xt_unregister_template(&nf_nat_ipv4_table); - return ret; - } + ret = xt_register_template(&nf_nat_ipv4_table, + iptable_nat_table_init); + if (ret < 0) + unregister_pernet_subsys(&iptable_nat_net_ops); return ret; } static void __exit iptable_nat_exit(void) { - unregister_pernet_subsys(&iptable_nat_net_ops); xt_unregister_template(&nf_nat_ipv4_table); + unregister_pernet_subsys(&iptable_nat_net_ops); } module_init(iptable_nat_init); From 91b6df6611b7edb28676c4f63f90c56c30d3e601 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 25 Jul 2024 12:28:21 -0700 Subject: [PATCH 57/86] netfilter: iptables: Fix potential null-ptr-deref in ip6table_nat_table_init(). [ Upstream commit c22921df777de5606f1047b1345b8d22ef1c0b34 ] ip6table_nat_table_init() accesses net->gen->ptr[ip6table_nat_net_ops.id], but the function is exposed to user space before the entry is allocated via register_pernet_subsys(). Let's call register_pernet_subsys() before xt_register_template(). Fixes: fdacd57c79b7 ("netfilter: x_tables: never register tables by default") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/ipv6/netfilter/ip6table_nat.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index bf3cb3a13600..52d597b16b65 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -147,23 +147,27 @@ static struct pernet_operations ip6table_nat_net_ops = { static int __init ip6table_nat_init(void) { - int ret = xt_register_template(&nf_nat_ipv6_table, - ip6table_nat_table_init); + int ret; + /* net->gen->ptr[ip6table_nat_net_id] must be allocated + * before calling ip6t_nat_register_lookups(). + */ + ret = register_pernet_subsys(&ip6table_nat_net_ops); if (ret < 0) return ret; - ret = register_pernet_subsys(&ip6table_nat_net_ops); + ret = xt_register_template(&nf_nat_ipv6_table, + ip6table_nat_table_init); if (ret) - xt_unregister_template(&nf_nat_ipv6_table); + unregister_pernet_subsys(&ip6table_nat_net_ops); return ret; } static void __exit ip6table_nat_exit(void) { - unregister_pernet_subsys(&ip6table_nat_net_ops); xt_unregister_template(&nf_nat_ipv6_table); + unregister_pernet_subsys(&ip6table_nat_net_ops); } module_init(ip6table_nat_init); From d4122d141f4b23b01acae6bc18c77f359bd2fb33 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 30 Jul 2024 09:16:33 +0300 Subject: [PATCH 58/86] net/mlx5: Lag, don't use the hardcoded value of the first port [ Upstream commit 3fda84dc090390573cfbd0b1d70372663315de21 ] The cited commit didn't change the body of the loop as it should. It shouldn't be using MLX5_LAG_P1. Fixes: 7e978e7714d6 ("net/mlx5: Lag, use actual number of lag ports") Signed-off-by: Mark Bloch Signed-off-by: Tariq Toukan Reviewed-by: Wojciech Drewek Link: https://patch.msgid.link/20240730061638.1831002-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index a283d8ae466b..4b4d76108111 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1483,7 +1483,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, goto unlock; for (i = 0; i < ldev->ports; i++) { - if (ldev->pf[MLX5_LAG_P1].netdev == slave) { + if (ldev->pf[i].netdev == slave) { port = i; break; } From 5d07d1d40aabfd61bab21115639bd4f641db6002 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 30 Jul 2024 09:16:34 +0300 Subject: [PATCH 59/86] net/mlx5: Fix missing lock on sync reset reload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 572f9caa9e7295f8c8822e4122c7ae8f1c412ff9 ] On sync reset reload work, when remote host updates devlink on reload actions performed on that host, it misses taking devlink lock before calling devlink_remote_reload_actions_performed() which results in triggering lock assert like the following: WARNING: CPU: 4 PID: 1164 at net/devlink/core.c:261 devl_assert_locked+0x3e/0x50 … CPU: 4 PID: 1164 Comm: kworker/u96:6 Tainted: G S W 6.10.0-rc2+ #116 Hardware name: Supermicro SYS-2028TP-DECTR/X10DRT-PT, BIOS 2.0 12/18/2015 Workqueue: mlx5_fw_reset_events mlx5_sync_reset_reload_work [mlx5_core] RIP: 0010:devl_assert_locked+0x3e/0x50 … Call Trace: ? __warn+0xa4/0x210 ? devl_assert_locked+0x3e/0x50 ? report_bug+0x160/0x280 ? handle_bug+0x3f/0x80 ? exc_invalid_op+0x17/0x40 ? asm_exc_invalid_op+0x1a/0x20 ? devl_assert_locked+0x3e/0x50 devlink_notify+0x88/0x2b0 ? mlx5_attach_device+0x20c/0x230 [mlx5_core] ? __pfx_devlink_notify+0x10/0x10 ? process_one_work+0x4b6/0xbb0 process_one_work+0x4b6/0xbb0 […] Fixes: 84a433a40d0e ("net/mlx5: Lock mlx5 devlink reload callbacks") Signed-off-by: Moshe Shemesh Reviewed-by: Maor Gottlieb Signed-off-by: Tariq Toukan Reviewed-by: Wojciech Drewek Link: https://patch.msgid.link/20240730061638.1831002-6-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index dec1492da74d..1a818759a9aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -145,6 +145,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + struct devlink *devlink = priv_to_devlink(dev); /* if this is the driver that initiated the fw reset, devlink completed the reload */ if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { @@ -155,9 +156,11 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); else mlx5_load_one(dev, true); - devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, + devl_lock(devlink); + devlink_remote_reload_actions_performed(devlink, 0, BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); + devl_unlock(devlink); } } From 18b26c732454c116f77fe3527cdeae4fe1f2a459 Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Tue, 30 Jul 2024 09:16:37 +0300 Subject: [PATCH 60/86] net/mlx5e: Add a check for the return value from mlx5_port_set_eth_ptys [ Upstream commit 3f8e82a020a5c22f9b791f4ac499b8e18007fbda ] Since the documentation for mlx5_toggle_port_link states that it should only be used after setting the port register, we add a check for the return value from mlx5_port_set_eth_ptys to ensure the register was successfully set before calling it. Fixes: 667daedaecd1 ("net/mlx5e: Toggle link only after modifying port parameters") Signed-off-by: Shahar Shitrit Reviewed-by: Carolina Jubran Signed-off-by: Tariq Toukan Reviewed-by: Wojciech Drewek Link: https://patch.msgid.link/20240730061638.1831002-9-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index ceeb23f478e1..3ee61987266c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1223,7 +1223,12 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext); + err = mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext); + if (err) { + netdev_err(priv->netdev, "%s: failed to set ptys reg: %d\n", __func__, err); + goto out; + } + mlx5_toggle_port_link(mdev); out: From aa0f864052d041773c8c3fed1080c5207a1fba74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Mon, 29 Jul 2024 17:17:48 -0700 Subject: [PATCH 61/86] ipv6: fix ndisc_is_useropt() handling for PIO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a46c68debf3be3a477a69ccbf0a1d050df841676 ] The current logic only works if the PIO is between two other ND user options. This fixes it so that the PIO can also be either before or after other ND user options (for example the first or last option in the RA). side note: there's actually Android tests verifying a portion of the old broken behaviour, so: https://android-review.googlesource.com/c/kernel/tests/+/3196704 fixes those up. Cc: Jen Linkova Cc: Lorenzo Colitti Cc: Patrick Rohr Cc: David Ahern Cc: YOSHIFUJI Hideaki / 吉藤英明 Cc: Jakub Kicinski Signed-off-by: Maciej Żenczykowski Fixes: 048c796beb6e ("ipv6: adjust ndisc_is_useropt() to also return true for PIO") Link: https://patch.msgid.link/20240730001748.147636-1-maze@google.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv6/ndisc.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 8c5a99fe6803..cfb4cf6e6654 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -227,6 +227,7 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, return NULL; memset(ndopts, 0, sizeof(*ndopts)); while (opt_len) { + bool unknown = false; int l; if (opt_len < sizeof(struct nd_opt_hdr)) return NULL; @@ -262,22 +263,23 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, break; #endif default: - if (ndisc_is_useropt(dev, nd_opt)) { - ndopts->nd_useropts_end = nd_opt; - if (!ndopts->nd_useropts) - ndopts->nd_useropts = nd_opt; - } else { - /* - * Unknown options must be silently ignored, - * to accommodate future extension to the - * protocol. - */ - ND_PRINTK(2, notice, - "%s: ignored unsupported option; type=%d, len=%d\n", - __func__, - nd_opt->nd_opt_type, - nd_opt->nd_opt_len); - } + unknown = true; + } + if (ndisc_is_useropt(dev, nd_opt)) { + ndopts->nd_useropts_end = nd_opt; + if (!ndopts->nd_useropts) + ndopts->nd_useropts = nd_opt; + } else if (unknown) { + /* + * Unknown options must be silently ignored, + * to accommodate future extension to the + * protocol. + */ + ND_PRINTK(2, notice, + "%s: ignored unsupported option; type=%d, len=%d\n", + __func__, + nd_opt->nd_opt_type, + nd_opt->nd_opt_len); } next_opt: opt_len -= l; From d7ccf2ca772bfe33e2c53ef80fa20d2d87eb6144 Mon Sep 17 00:00:00 2001 From: Zhe Qiao Date: Wed, 31 Jul 2024 16:45:47 +0800 Subject: [PATCH 62/86] riscv/mm: Add handling for VM_FAULT_SIGSEGV in mm_fault_error() [ Upstream commit 0c710050c47d45eb77b28c271cddefc5c785cb40 ] Handle VM_FAULT_SIGSEGV in the page fault path so that we correctly kill the process and we don't BUG() the kernel. Fixes: 07037db5d479 ("RISC-V: Paging and MMU") Signed-off-by: Zhe Qiao Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240731084547.85380-1-qiaozhe@iscas.ac.cn Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/mm/fault.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 274bc6dd839f..05d7d3647964 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -60,26 +60,27 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr) static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) { + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } + if (fault & VM_FAULT_OOM) { /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ - if (!user_mode(regs)) { - no_context(regs, addr); - return; - } pagefault_out_of_memory(); return; } else if (fault & VM_FAULT_SIGBUS) { /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) { - no_context(regs, addr); - return; - } do_trap(regs, SIGBUS, BUS_ADRERR, addr); return; + } else if (fault & VM_FAULT_SIGSEGV) { + do_trap(regs, SIGSEGV, SEGV_MAPERR, addr); + return; } + BUG(); } From 56ddc3233c68f3fb1a6b1753782077d74833e294 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 31 Jul 2024 14:36:01 +0100 Subject: [PATCH 63/86] arm64: jump_label: Ensure patched jump_labels are visible to all CPUs [ Upstream commit cfb00a35786414e7c0e6226b277d9f09657eae74 ] Although the Arm architecture permits concurrent modification and execution of NOP and branch instructions, it still requires some synchronisation to ensure that other CPUs consistently execute the newly written instruction: > When the modified instructions are observable, each PE that is > executing the modified instructions must execute an ISB or perform a > context synchronizing event to ensure execution of the modified > instructions Prior to commit f6cc0c501649 ("arm64: Avoid calling stop_machine() when patching jump labels"), the arm64 jump_label patching machinery performed synchronisation using stop_machine() after each modification, however this was problematic when flipping static keys from atomic contexts (namely, the arm_arch_timer CPU hotplug startup notifier) and so we switched to the _nosync() patching routines to avoid "scheduling while atomic" BUG()s during boot. In hindsight, the analysis of the issue in f6cc0c501649 isn't quite right: it cites the use of IPIs in the default patching routines as the cause of the lockup, whereas stop_machine() does not rely on IPIs and the I-cache invalidation is performed using __flush_icache_range(), which elides the call to kick_all_cpus_sync(). In fact, the blocking wait for other CPUs is what triggers the BUG() and the problem remains even after f6cc0c501649, for example because we could block on the jump_label_mutex. Eventually, the arm_arch_timer driver was fixed to avoid the static key entirely in commit a862fc2254bd ("clocksource/arm_arch_timer: Remove use of workaround static key"). This all leaves the jump_label patching code in a funny situation on arm64 as we do not synchronise with other CPUs to reduce the likelihood of a bug which no longer exists. Consequently, toggling a static key on one CPU cannot be assumed to take effect on other CPUs, leading to potential issues, for example with missing preempt notifiers. Rather than revert f6cc0c501649 and go back to stop_machine() for each patch site, implement arch_jump_label_transform_apply() and kick all the other CPUs with an IPI at the end of patching. Cc: Alexander Potapenko Cc: Mark Rutland Cc: Marc Zyngier Fixes: f6cc0c501649 ("arm64: Avoid calling stop_machine() when patching jump labels") Signed-off-by: Will Deacon Reviewed-by: Catalin Marinas Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240731133601.3073-1-will@kernel.org Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/include/asm/jump_label.h | 1 + arch/arm64/kernel/jump_label.c | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index b5bd3c38a01b..e714d7770999 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -13,6 +13,7 @@ #include #include +#define HAVE_JUMP_LABEL_BATCH #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE static __always_inline bool arch_static_branch(struct static_key *key, diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index faf88ec9c48e..f63ea915d6ad 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -7,11 +7,12 @@ */ #include #include +#include #include #include -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) +bool arch_jump_label_transform_queue(struct jump_entry *entry, + enum jump_label_type type) { void *addr = (void *)jump_entry_code(entry); u32 insn; @@ -25,4 +26,10 @@ void arch_jump_label_transform(struct jump_entry *entry, } aarch64_insn_patch_text_nosync(addr, insn); + return true; +} + +void arch_jump_label_transform_apply(void) +{ + kick_all_cpus_sync(); } From ed15fdf30736a255c0e4f5d0263e12cf6636fade Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 29 Jul 2024 14:22:49 +0000 Subject: [PATCH 64/86] rust: SHADOW_CALL_STACK is incompatible with Rust commit f126745da81783fb1d082e67bf14c6795e489a88 upstream. When using the shadow call stack sanitizer, all code must be compiled with the -ffixed-x18 flag, but this flag is not currently being passed to Rust. This results in crashes that are extremely difficult to debug. To ensure that nobody else has to go through the same debugging session that I had to, prevent configurations that enable both SHADOW_CALL_STACK and RUST. It is rather common for people to backport 724a75ac9542 ("arm64: rust: Enable Rust support for AArch64"), so I recommend applying this fix all the way back to 6.1. Cc: stable@vger.kernel.org # 6.1 and later Fixes: 724a75ac9542 ("arm64: rust: Enable Rust support for AArch64") Signed-off-by: Alice Ryhl Acked-by: Miguel Ojeda Link: https://lore.kernel.org/r/20240729-shadow-call-stack-v4-1-2a664b082ea4@google.com Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- init/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/init/Kconfig b/init/Kconfig index 537f01eba2e6..4cd3fc82b09e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1924,6 +1924,7 @@ config RUST depends on !MODVERSIONS depends on !GCC_PLUGINS depends on !RANDSTRUCT + depends on !SHADOW_CALL_STACK depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE help Enables Rust support in the kernel. From 1b3777d2f248d6e2a12346431ef6d6cd0e420d5c Mon Sep 17 00:00:00 2001 From: Patryk Duda Date: Tue, 30 Jul 2024 10:44:25 +0000 Subject: [PATCH 65/86] platform/chrome: cros_ec_proto: Lock device when updating MKBP version commit df615907f1bf907260af01ccb904d0e9304b5278 upstream. The cros_ec_get_host_command_version_mask() function requires that the caller must have ec_dev->lock mutex before calling it. This requirement was not met and as a result it was possible that two commands were sent to the device at the same time. The problem was observed while using UART backend which doesn't use any additional locks, unlike SPI backend which locks the controller until response is received. Fixes: f74c7557ed0d ("platform/chrome: cros_ec_proto: Update version on GET_NEXT_EVENT failure") Cc: stable@vger.kernel.org Signed-off-by: Patryk Duda Link: https://lore.kernel.org/r/20240730104425.607083-1-patrykd@google.com Signed-off-by: Tzung-Bi Shih Signed-off-by: Greg Kroah-Hartman --- drivers/platform/chrome/cros_ec_proto.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index 475a6dd72db6..809fabef3b44 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -805,9 +805,11 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, if (ret == -ENOPROTOOPT) { dev_dbg(ec_dev->dev, "GET_NEXT_EVENT returned invalid version error.\n"); + mutex_lock(&ec_dev->lock); ret = cros_ec_get_host_command_version_mask(ec_dev, EC_CMD_GET_NEXT_EVENT, &ver_mask); + mutex_unlock(&ec_dev->lock); if (ret < 0 || ver_mask == 0) /* * Do not change the MKBP supported version if we can't From 8bb9cf2edf490e65678f2d57634ac763a778aeb6 Mon Sep 17 00:00:00 2001 From: Tatsunosuke Tobita Date: Tue, 9 Jul 2024 14:57:28 +0900 Subject: [PATCH 66/86] HID: wacom: Modify pen IDs commit f0d17d696dfce77c9abc830e4ac2d677890a2dad upstream. The pen ID, 0x80842, was not the correct ID for wacom driver to treat. The ID was corrected to 0x8842. Also, 0x4200 was not the expected ID used on any Wacom device. Therefore, 0x4200 was removed. Signed-off-by: Tatsunosuke Tobita Signed-off-by: Tatsunosuke Tobita Fixes: bfdc750c4cb2 ("HID: wacom: add three styli to wacom_intuos_get_tool_type") Cc: stable@kernel.org #6.2 Reviewed-by: Ping Cheng Link: https://patch.msgid.link/20240709055729.17158-1-tatsunosuke.wacom@gmail.com Signed-off-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 53235b276bb2..05e40880e7d4 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -709,13 +709,12 @@ static int wacom_intuos_get_tool_type(int tool_id) case 0x8e2: /* IntuosHT2 pen */ case 0x022: case 0x200: /* Pro Pen 3 */ - case 0x04200: /* Pro Pen 3 */ case 0x10842: /* MobileStudio Pro Pro Pen slim */ case 0x14802: /* Intuos4/5 13HD/24HD Classic Pen */ case 0x16802: /* Cintiq 13HD Pro Pen */ case 0x18802: /* DTH2242 Pen */ case 0x10802: /* Intuos4/5 13HD/24HD General Pen */ - case 0x80842: /* Intuos Pro and Cintiq Pro 3D Pen */ + case 0x8842: /* Intuos Pro and Cintiq Pro 3D Pen */ tool_type = BTN_TOOL_PEN; break; From 36dac679722e5aa68c9efa822a38a5ea0c107342 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 15 Feb 2023 09:18:02 +0900 Subject: [PATCH 67/86] btrfs: zoned: fix zone_unusable accounting on making block group read-write again commit 8cd44dd1d17a23d5cc8c443c659ca57aa76e2fa5 upstream. When btrfs makes a block group read-only, it adds all free regions in the block group to space_info->bytes_readonly. That free space excludes reserved and pinned regions. OTOH, when btrfs makes the block group read-write again, it moves all the unused regions into the block group's zone_unusable. That unused region includes reserved and pinned regions. As a result, it counts too much zone_unusable bytes. Fortunately (or unfortunately), having erroneous zone_unusable does not affect the calculation of space_info->bytes_readonly, because free space (num_bytes in btrfs_dec_block_group_ro) calculation is done based on the erroneous zone_unusable and it reduces the num_bytes just to cancel the error. This behavior can be easily discovered by adding a WARN_ON to check e.g, "bg->pinned > 0" in btrfs_dec_block_group_ro(), and running fstests test case like btrfs/282. Fix it by properly considering pinned and reserved in btrfs_dec_block_group_ro(). Also, add a WARN_ON and introduce btrfs_space_info_update_bytes_zone_unusable() to catch a similar mistake. Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones") CC: stable@vger.kernel.org # 5.15+ Signed-off-by: Naohiro Aota Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/block-group.c | 13 ++++++++----- fs/btrfs/extent-tree.c | 3 ++- fs/btrfs/free-space-cache.c | 4 +++- fs/btrfs/space-info.c | 2 +- fs/btrfs/space-info.h | 1 + include/trace/events/btrfs.h | 8 ++++++++ 6 files changed, 23 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 676978f2e994..e9a0b27e1c4f 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1065,8 +1065,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, block_group->space_info->active_total_bytes -= block_group->length; block_group->space_info->bytes_readonly -= (block_group->length - block_group->zone_unusable); - block_group->space_info->bytes_zone_unusable -= - block_group->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(fs_info, block_group->space_info, + -block_group->zone_unusable); block_group->space_info->disk_total -= block_group->length * factor; spin_unlock(&block_group->space_info->lock); @@ -1250,7 +1250,8 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes to readonly */ sinfo->bytes_readonly += cache->zone_unusable; - sinfo->bytes_zone_unusable -= cache->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, + -cache->zone_unusable); cache->zone_unusable = 0; } cache->ro++; @@ -2812,9 +2813,11 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache) if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes back */ cache->zone_unusable = - (cache->alloc_offset - cache->used) + + (cache->alloc_offset - cache->used - cache->pinned - + cache->reserved) + (cache->length - cache->zone_capacity); - sinfo->bytes_zone_unusable += cache->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, + cache->zone_unusable); sinfo->bytes_readonly -= cache->zone_unusable; } num_bytes = cache->length - cache->reserved - diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0d2cc186974d..528cd88a77fd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2731,7 +2731,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, readonly = true; } else if (btrfs_is_zoned(fs_info)) { /* Need reset before reusing in a zoned block group */ - space_info->bytes_zone_unusable += len; + btrfs_space_info_update_bytes_zone_unusable(fs_info, space_info, + len); readonly = true; } spin_unlock(&cache->lock); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 862a222caab3..76d52d682b3b 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2702,8 +2702,10 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, * If the block group is read-only, we should account freed space into * bytes_readonly. */ - if (!block_group->ro) + if (!block_group->ro) { block_group->zone_unusable += to_unusable; + WARN_ON(block_group->zone_unusable > block_group->length); + } spin_unlock(&ctl->tree_lock); if (!used) { spin_lock(&block_group->lock); diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 8b75f436a9a3..bede72f3dffc 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -311,7 +311,7 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, found->bytes_used += block_group->used; found->disk_used += block_group->used * factor; found->bytes_readonly += block_group->bytes_super; - found->bytes_zone_unusable += block_group->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(info, found, block_group->zone_unusable); if (block_group->length > 0) found->full = 0; btrfs_try_granting_tickets(info, found); diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index ce66023a9eb8..99ce3225dd59 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -121,6 +121,7 @@ btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info, \ DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info"); DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned"); +DECLARE_SPACE_INFO_UPDATE(bytes_zone_unusable, "zone_unusable"); int btrfs_init_space_info(struct btrfs_fs_info *fs_info); void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 5e10b5b1d16c..7a6c5a870d33 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2322,6 +2322,14 @@ DEFINE_EVENT(btrfs__space_info_update, update_bytes_pinned, TP_ARGS(fs_info, sinfo, old, diff) ); +DEFINE_EVENT(btrfs__space_info_update, update_bytes_zone_unusable, + + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_space_info *sinfo, u64 old, s64 diff), + + TP_ARGS(fs_info, sinfo, old, diff) +); + DECLARE_EVENT_CLASS(btrfs_raid56_bio, TP_PROTO(const struct btrfs_raid_bio *rbio, From 5db999fff545b924b24c9afd368ef5c17279b176 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 1 Aug 2024 15:22:22 -0400 Subject: [PATCH 68/86] protect the fetch of ->fd[fd] in do_dup2() from mispredictions commit 8aa37bde1a7b645816cda8b80df4753ecf172bf1 upstream. both callers have verified that fd is not greater than ->max_fds; however, misprediction might end up with tofree = fdt->fd[fd]; being speculatively executed. That's wrong for the same reasons why it's wrong in close_fd()/file_close_fd_locked(); the same solution applies - array_index_nospec(fd, fdt->max_fds) could differ from fd only in case of speculative execution on mispredicted path. Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/file.c b/fs/file.c index 69386c2e37c5..82c5d2382082 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1122,6 +1122,7 @@ __releases(&files->file_lock) * tables and this condition does not arise without those. */ fdt = files_fdtable(files); + fd = array_index_nospec(fd, fdt->max_fds); tofree = fdt->fd[fd]; if (!tofree && fd_is_open(fd, fdt)) goto Ebusy; From 47ab33e1d6a796a82f9b7a70ed95c9649e92d7d2 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Sat, 27 Jul 2024 12:01:23 +0200 Subject: [PATCH 69/86] mptcp: sched: check both directions for backup commit b6a66e521a2032f7fcba2af5a9bcbaeaa19b7ca3 upstream. The 'mptcp_subflow_context' structure has two items related to the backup flags: - 'backup': the subflow has been marked as backup by the other peer - 'request_bkup': the backup flag has been set by the host Before this patch, the scheduler was only looking at the 'backup' flag. That can make sense in some cases, but it looks like that's not what we wanted for the general use, because either the path-manager was setting both of them when sending an MP_PRIO, or the receiver was duplicating the 'backup' flag in the subflow request. Note that the use of these two flags in the path-manager are going to be fixed in the next commits, but this change here is needed not to modify the behaviour. Fixes: f296234c98a8 ("mptcp: Add handling of incoming MP_JOIN requests") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- include/trace/events/mptcp.h | 2 +- net/mptcp/protocol.c | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h index 563e48617374..54e8fb5a229c 100644 --- a/include/trace/events/mptcp.h +++ b/include/trace/events/mptcp.h @@ -34,7 +34,7 @@ TRACE_EVENT(mptcp_subflow_get_send, struct sock *ssk; __entry->active = mptcp_subflow_active(subflow); - __entry->backup = subflow->backup; + __entry->backup = subflow->backup || subflow->request_bkup; if (subflow->tcp_sock && sk_fullsock(subflow->tcp_sock)) __entry->free = sk_stream_memory_free(subflow->tcp_sock); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index d6f3e1b9e844..19f9c1700661 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1491,13 +1491,15 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) } mptcp_for_each_subflow(msk, subflow) { + bool backup = subflow->backup || subflow->request_bkup; + trace_mptcp_subflow_get_send(subflow); ssk = mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; tout = max(tout, mptcp_timeout_from_subflow(subflow)); - nr_active += !subflow->backup; + nr_active += !backup; pace = subflow->avg_pacing_rate; if (unlikely(!pace)) { /* init pacing rate from socket */ @@ -1508,9 +1510,9 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) } linger_time = div_u64((u64)READ_ONCE(ssk->sk_wmem_queued) << 32, pace); - if (linger_time < send_info[subflow->backup].linger_time) { - send_info[subflow->backup].ssk = ssk; - send_info[subflow->backup].linger_time = linger_time; + if (linger_time < send_info[backup].linger_time) { + send_info[backup].ssk = ssk; + send_info[backup].linger_time = linger_time; } } __mptcp_set_timeout(sk, tout); From 584e9aa47ea54ec3d182f88df8a97d4439708173 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 31 Jul 2024 16:19:41 +0200 Subject: [PATCH 70/86] ALSA: usb-audio: Correct surround channels in UAC1 channel map commit b7b7e1ab7619deb3b299b5e5c619c3e6f183a12d upstream. USB-audio driver puts SNDRV_CHMAP_SL and _SR as left and right surround channels for UAC1 channel map, respectively. But they should have been SNDRV_CHMAP_RL and _RR; the current value *_SL and _SR are rather "side" channels, not "surround". I guess I took those mistakenly when I read the spec mentioning "surround left". This patch corrects those entries to be the right channels. Suggested-by: Sylvain BERTRAND Closes: https://lore.kernel.orgZ/qIyJD8lhd8hFhlC@freedom Fixes: 04324ccc75f9 ("ALSA: usb-audio: add channel map support") Cc: Link: https://patch.msgid.link/20240731142018.24750-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/stream.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/stream.c b/sound/usb/stream.c index d5409f387945..e14c725acebf 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -244,8 +244,8 @@ static struct snd_pcm_chmap_elem *convert_chmap(int channels, unsigned int bits, SNDRV_CHMAP_FR, /* right front */ SNDRV_CHMAP_FC, /* center front */ SNDRV_CHMAP_LFE, /* LFE */ - SNDRV_CHMAP_SL, /* left surround */ - SNDRV_CHMAP_SR, /* right surround */ + SNDRV_CHMAP_RL, /* left surround */ + SNDRV_CHMAP_RR, /* right surround */ SNDRV_CHMAP_FLC, /* left of center */ SNDRV_CHMAP_FRC, /* right of center */ SNDRV_CHMAP_RC, /* surround */ From fec031e89d2d818ba78737708ab58ffc647f8b66 Mon Sep 17 00:00:00 2001 From: Mavroudis Chatzilazaridis Date: Sun, 28 Jul 2024 12:36:04 +0000 Subject: [PATCH 71/86] ALSA: hda/realtek: Add quirk for Acer Aspire E5-574G commit 3c0b6f924e1259ade38587ea719b693f6f6f2f3e upstream. ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST fixes combo jack detection and limits the internal microphone boost that causes clipping on this model. Signed-off-by: Mavroudis Chatzilazaridis Cc: Link: https://patch.msgid.link/20240728123601.144017-1-mavchatz@protonmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e0df44bfda4e..edfcd38175d2 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9496,6 +9496,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x0840, "Acer Aspire E1", ALC269VB_FIXUP_ASPIRE_E1_COEF), + SND_PCI_QUIRK(0x1025, 0x100c, "Acer Aspire E5-574G", ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1025, 0x101c, "Acer Veriton N2510G", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1065, "Acer Aspire C20-820", ALC269VC_FIXUP_ACER_HEADSET_MIC), From a7cdecede82e64d65da150c7079f84d5e87cda97 Mon Sep 17 00:00:00 2001 From: Edmund Raile Date: Tue, 30 Jul 2024 19:53:26 +0000 Subject: [PATCH 72/86] Revert "ALSA: firewire-lib: obsolete workqueue for period update" commit 6ccf9984d6be3c2f804087b736db05c2ec42664b upstream. prepare resolution of AB/BA deadlock competition for substream lock: restore workqueue previously used for process context: revert commit b5b519965c4c ("ALSA: firewire-lib: obsolete workqueue for period update") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/kwryofzdmjvzkuw6j3clftsxmoolynljztxqwg76hzeo4simnl@jn3eo7pe642q/ Signed-off-by: Edmund Raile Reviewed-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240730195318.869840-2-edmund.raile@protonmail.com Signed-off-by: Greg Kroah-Hartman --- sound/firewire/amdtp-stream.c | 15 +++++++++++++++ sound/firewire/amdtp-stream.h | 1 + 2 files changed, 16 insertions(+) diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index 875312568369..ce8349c13441 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -77,6 +77,8 @@ // overrun. Actual device can skip more, then this module stops the packet streaming. #define IR_JUMBO_PAYLOAD_MAX_SKIP_CYCLES 5 +static void pcm_period_work(struct work_struct *work); + /** * amdtp_stream_init - initialize an AMDTP stream structure * @s: the AMDTP stream to initialize @@ -105,6 +107,7 @@ int amdtp_stream_init(struct amdtp_stream *s, struct fw_unit *unit, s->flags = flags; s->context = ERR_PTR(-1); mutex_init(&s->mutex); + INIT_WORK(&s->period_work, pcm_period_work); s->packet_index = 0; init_waitqueue_head(&s->ready_wait); @@ -343,6 +346,7 @@ EXPORT_SYMBOL(amdtp_stream_get_max_payload); */ void amdtp_stream_pcm_prepare(struct amdtp_stream *s) { + cancel_work_sync(&s->period_work); s->pcm_buffer_pointer = 0; s->pcm_period_pointer = 0; } @@ -622,6 +626,16 @@ static void update_pcm_pointers(struct amdtp_stream *s, } } +static void pcm_period_work(struct work_struct *work) +{ + struct amdtp_stream *s = container_of(work, struct amdtp_stream, + period_work); + struct snd_pcm_substream *pcm = READ_ONCE(s->pcm); + + if (pcm) + snd_pcm_period_elapsed(pcm); +} + static int queue_packet(struct amdtp_stream *s, struct fw_iso_packet *params, bool sched_irq) { @@ -1798,6 +1812,7 @@ static void amdtp_stream_stop(struct amdtp_stream *s) return; } + cancel_work_sync(&s->period_work); fw_iso_context_stop(s->context); fw_iso_context_destroy(s->context); s->context = ERR_PTR(-1); diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h index cf9ab347277f..011d0f0c3941 100644 --- a/sound/firewire/amdtp-stream.h +++ b/sound/firewire/amdtp-stream.h @@ -190,6 +190,7 @@ struct amdtp_stream { /* For a PCM substream processing. */ struct snd_pcm_substream *pcm; + struct work_struct period_work; snd_pcm_uframes_t pcm_buffer_pointer; unsigned int pcm_period_pointer; From b239a37d68e8bc59f9516444da222841e3b13ba9 Mon Sep 17 00:00:00 2001 From: Edmund Raile Date: Tue, 30 Jul 2024 19:53:29 +0000 Subject: [PATCH 73/86] Revert "ALSA: firewire-lib: operate for period elapse event in process context" commit 3dab73ab925a51ab05543b491bf17463a48ca323 upstream. Commit 7ba5ca32fe6e ("ALSA: firewire-lib: operate for period elapse event in process context") removed the process context workqueue from amdtp_domain_stream_pcm_pointer() and update_pcm_pointers() to remove its overhead. With RME Fireface 800, this lead to a regression since Kernels 5.14.0, causing an AB/BA deadlock competition for the substream lock with eventual system freeze under ALSA operation: thread 0: * (lock A) acquire substream lock by snd_pcm_stream_lock_irq() in snd_pcm_status64() * (lock B) wait for tasklet to finish by calling tasklet_unlock_spin_wait() in tasklet_disable_in_atomic() in ohci_flush_iso_completions() of ohci.c thread 1: * (lock B) enter tasklet * (lock A) attempt to acquire substream lock, waiting for it to be released: snd_pcm_stream_lock_irqsave() in snd_pcm_period_elapsed() in update_pcm_pointers() in process_ctx_payloads() in process_rx_packets() of amdtp-stream.c ? tasklet_unlock_spin_wait ohci_flush_iso_completions firewire_ohci amdtp_domain_stream_pcm_pointer snd_firewire_lib snd_pcm_update_hw_ptr0 snd_pcm snd_pcm_status64 snd_pcm ? native_queued_spin_lock_slowpath _raw_spin_lock_irqsave snd_pcm_period_elapsed snd_pcm process_rx_packets snd_firewire_lib irq_target_callback snd_firewire_lib handle_it_packet firewire_ohci context_tasklet firewire_ohci Restore the process context work queue to prevent deadlock AB/BA deadlock competition for ALSA substream lock of snd_pcm_stream_lock_irq() in snd_pcm_status64() and snd_pcm_stream_lock_irqsave() in snd_pcm_period_elapsed(). revert commit 7ba5ca32fe6e ("ALSA: firewire-lib: operate for period elapse event in process context") Replace inline description to prevent future deadlock. Cc: stable@vger.kernel.org Fixes: 7ba5ca32fe6e ("ALSA: firewire-lib: operate for period elapse event in process context") Reported-by: edmund.raile Closes: https://lore.kernel.org/r/kwryofzdmjvzkuw6j3clftsxmoolynljztxqwg76hzeo4simnl@jn3eo7pe642q/ Signed-off-by: Edmund Raile Reviewed-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240730195318.869840-3-edmund.raile@protonmail.com Signed-off-by: Greg Kroah-Hartman --- sound/firewire/amdtp-stream.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index ce8349c13441..842fe127c537 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -613,16 +613,8 @@ static void update_pcm_pointers(struct amdtp_stream *s, // The program in user process should periodically check the status of intermediate // buffer associated to PCM substream to process PCM frames in the buffer, instead // of receiving notification of period elapsed by poll wait. - if (!pcm->runtime->no_period_wakeup) { - if (in_softirq()) { - // In software IRQ context for 1394 OHCI. - snd_pcm_period_elapsed(pcm); - } else { - // In process context of ALSA PCM application under acquired lock of - // PCM substream. - snd_pcm_period_elapsed_under_stream_lock(pcm); - } - } + if (!pcm->runtime->no_period_wakeup) + queue_work(system_highpri_wq, &s->period_work); } } @@ -1752,11 +1744,14 @@ unsigned long amdtp_domain_stream_pcm_pointer(struct amdtp_domain *d, { struct amdtp_stream *irq_target = d->irq_target; - // Process isochronous packets queued till recent isochronous cycle to handle PCM frames. if (irq_target && amdtp_stream_running(irq_target)) { - // In software IRQ context, the call causes dead-lock to disable the tasklet - // synchronously. - if (!in_softirq()) + // use wq to prevent AB/BA deadlock competition for + // substream lock: + // fw_iso_context_flush_completions() acquires + // lock by ohci_flush_iso_completions(), + // amdtp-stream process_rx_packets() attempts to + // acquire same lock by snd_pcm_elapsed() + if (current_work() != &s->period_work) fw_iso_context_flush_completions(irq_target->context); } From 3b933b16c996af8adb6bc1b5748a63dfb41a82bc Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 22 Jul 2024 14:41:13 -0400 Subject: [PATCH 74/86] drm/vmwgfx: Fix a deadlock in dma buf fence polling commit e58337100721f3cc0c7424a18730e4f39844934f upstream. Introduce a version of the fence ops that on release doesn't remove the fence from the pending list, and thus doesn't require a lock to fix poll->fence wait->fence unref deadlocks. vmwgfx overwrites the wait callback to iterate over the list of all fences and update their status, to do that it holds a lock to prevent the list modifcations from other threads. The fence destroy callback both deletes the fence and removes it from the list of pending fences, for which it holds a lock. dma buf polling cb unrefs a fence after it's been signaled: so the poll calls the wait, which signals the fences, which are being destroyed. The destruction tries to acquire the lock on the pending fences list which it can never get because it's held by the wait from which it was called. Old bug, but not a lot of userspace apps were using dma-buf polling interfaces. Fix those, in particular this fixes KDE stalls/deadlock. Signed-off-by: Zack Rusin Fixes: 2298e804e96e ("drm/vmwgfx: rework to new fence interface, v2") Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Cc: # v6.2+ Reviewed-by: Maaz Mombasawala Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20240722184313.181318-2-zack.rusin@broadcom.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 95344735d00e..add39769283f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -32,7 +32,6 @@ #define VMW_FENCE_WRAP (1 << 31) struct vmw_fence_manager { - int num_fence_objects; struct vmw_private *dev_priv; spinlock_t lock; struct list_head fence_list; @@ -124,13 +123,13 @@ static void vmw_fence_obj_destroy(struct dma_fence *f) { struct vmw_fence_obj *fence = container_of(f, struct vmw_fence_obj, base); - struct vmw_fence_manager *fman = fman_from_fence(fence); - spin_lock(&fman->lock); - list_del_init(&fence->head); - --fman->num_fence_objects; - spin_unlock(&fman->lock); + if (!list_empty(&fence->head)) { + spin_lock(&fman->lock); + list_del_init(&fence->head); + spin_unlock(&fman->lock); + } fence->destroy(fence); } @@ -257,7 +256,6 @@ static const struct dma_fence_ops vmw_fence_ops = { .release = vmw_fence_obj_destroy, }; - /* * Execute signal actions on fences recently signaled. * This is done from a workqueue so we don't have to execute @@ -355,7 +353,6 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman, goto out_unlock; } list_add_tail(&fence->head, &fman->fence_list); - ++fman->num_fence_objects; out_unlock: spin_unlock(&fman->lock); @@ -403,7 +400,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, u32 passed_seqno) { u32 goal_seqno; - struct vmw_fence_obj *fence; + struct vmw_fence_obj *fence, *next_fence; if (likely(!fman->seqno_valid)) return false; @@ -413,7 +410,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, return false; fman->seqno_valid = false; - list_for_each_entry(fence, &fman->fence_list, head) { + list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) { if (!list_empty(&fence->seq_passed_actions)) { fman->seqno_valid = true; vmw_fence_goal_write(fman->dev_priv, From 5670466033d14329aaa87e726a481a6c56892eff Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Mon, 29 Jul 2024 10:40:35 -0700 Subject: [PATCH 75/86] drm/i915: Fix possible int overflow in skl_ddi_calculate_wrpll() commit 5b511572660190db1dc8ba412efd0be0d3781ab6 upstream. On the off chance that clock value ends up being too high (by means of skl_ddi_calculate_wrpll() having been called with big enough value of crtc_state->port_clock * 1000), one possible consequence may be that the result will not be able to fit into signed int. Fix this issue by moving conversion of clock parameter from kHz to Hz into the body of skl_ddi_calculate_wrpll(), as well as casting the same parameter to u64 type while calculating the value for AFE clock. This both mitigates the overflow problem and avoids possible erroneous integer promotion mishaps. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: 82d354370189 ("drm/i915/skl: Implementation of SKL DPLL programming") Cc: stable@vger.kernel.org Signed-off-by: Nikita Zhandarovich Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240729174035.25727-1-n.zhandarovich@fintech.ru (cherry picked from commit 833cf12846aa19adf9b76bc79c40747726f3c0c1) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 64dd603dc69a..ec0ef3ff9e6a 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -1552,7 +1552,7 @@ static void skl_wrpll_params_populate(struct skl_wrpll_params *params, } static int -skl_ddi_calculate_wrpll(int clock /* in Hz */, +skl_ddi_calculate_wrpll(int clock, int ref_clock, struct skl_wrpll_params *wrpll_params) { @@ -1577,7 +1577,7 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */, }; unsigned int dco, d, i; unsigned int p0, p1, p2; - u64 afe_clock = clock * 5; /* AFE Clock is 5x Pixel clock */ + u64 afe_clock = (u64)clock * 1000 * 5; /* AFE Clock is 5x Pixel clock, in Hz */ for (d = 0; d < ARRAY_SIZE(dividers); d++) { for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) { @@ -1709,7 +1709,7 @@ static int skl_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state) ctrl1 |= DPLL_CTRL1_HDMI_MODE(0); - ret = skl_ddi_calculate_wrpll(crtc_state->port_clock * 1000, + ret = skl_ddi_calculate_wrpll(crtc_state->port_clock, i915->display.dpll.ref_clks.nssc, &wrpll_params); if (ret) return ret; From 52977968f377d422186544effdef666db449200b Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Thu, 25 Jul 2024 10:29:42 +0800 Subject: [PATCH 76/86] net: usb: sr9700: fix uninitialized variable use in sr_mdio_read commit 08f3a5c38087d1569e982a121aad1e6acbf145ce upstream. It could lead to error happen because the variable res is not updated if the call to sr_share_read_word returns an error. In this particular case error code was returned and res stayed uninitialized. Same issue also applies to sr_read_reg. This can be avoided by checking the return value of sr_share_read_word and sr_read_reg, and propagating the error if the read operation failed. Found by code review. Cc: stable@vger.kernel.org Fixes: c9b37458e956 ("USB2NET : SR9700 : One chip USB 1.1 USB2NET SR9700Device Driver Support") Signed-off-by: Ma Ke Reviewed-by: Shigeru Yoshida Reviewed-by: Hariprasad Kelam Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/sr9700.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index 0a662e42ed96..cb7d2f798fb4 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -179,6 +179,7 @@ static int sr_mdio_read(struct net_device *netdev, int phy_id, int loc) struct usbnet *dev = netdev_priv(netdev); __le16 res; int rc = 0; + int err; if (phy_id) { netdev_dbg(netdev, "Only internal phy supported\n"); @@ -189,11 +190,17 @@ static int sr_mdio_read(struct net_device *netdev, int phy_id, int loc) if (loc == MII_BMSR) { u8 value; - sr_read_reg(dev, SR_NSR, &value); + err = sr_read_reg(dev, SR_NSR, &value); + if (err < 0) + return err; + if (value & NSR_LINKST) rc = 1; } - sr_share_read_word(dev, 1, loc, &res); + err = sr_share_read_word(dev, 1, loc, &res); + if (err < 0) + return err; + if (rc == 1) res = le16_to_cpu(res) | BMSR_LSTATUS; else From f3d0261d910e0f027241bc5c49c68a8997e9f777 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 30 Jul 2024 21:51:52 +0200 Subject: [PATCH 77/86] r8169: don't increment tx_dropped in case of NETDEV_TX_BUSY commit d516b187a9cc2e842030dd005be2735db3e8f395 upstream. The skb isn't consumed in case of NETDEV_TX_BUSY, therefore don't increment the tx_dropped counter. Fixes: 188f4af04618 ("r8169: use NETDEV_TX_{BUSY/OK}") Cc: stable@vger.kernel.org Suggested-by: Jakub Kicinski Signed-off-by: Heiner Kallweit Reviewed-by: Wojciech Drewek Link: https://patch.msgid.link/bbba9c48-8bac-4932-9aa1-d2ed63bc9433@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169_main.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index f83bd15f9e99..b187371fa2f0 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4273,7 +4273,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, if (unlikely(!rtl_tx_slots_avail(tp))) { if (net_ratelimit()) netdev_err(dev, "BUG! Tx Ring full when queue awake!\n"); - goto err_stop_0; + netif_stop_queue(dev); + return NETDEV_TX_BUSY; } opts[1] = rtl8169_tx_vlan_tag(skb); @@ -4346,11 +4347,6 @@ err_dma_0: dev_kfree_skb_any(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; - -err_stop_0: - netif_stop_queue(dev); - dev->stats.tx_dropped++; - return NETDEV_TX_BUSY; } static unsigned int rtl_last_frag_len(struct sk_buff *skb) From 991b26e1109a5163ba7fe6df8e99787af019d406 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 27 Jul 2024 11:03:59 +0200 Subject: [PATCH 78/86] mptcp: fix user-space PM announced address accounting commit 167b93258d1e2230ee3e8a97669b4db4cc9e90aa upstream. Currently the per-connection announced address counter is never decreased. When the user-space PM is in use, this just affect the information exposed via diag/sockopt, but it could still foul the PM to wrong decision. Add the missing accounting for the user-space PM's sake. Fixes: 8b1c94da1e48 ("mptcp: only send RM_ADDR in nl_cmd_remove") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_netlink.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 3e2cbf0e6ce9..83610a689bbc 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1578,16 +1578,25 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }; struct mptcp_pm_addr_entry *entry; + int anno_nr = 0; list_for_each_entry(entry, rm_list, list) { - if ((remove_anno_list_by_saddr(msk, &entry->addr) || - lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) && - alist.nr < MPTCP_RM_IDS_MAX) - alist.ids[alist.nr++] = entry->addr.id; + if (alist.nr >= MPTCP_RM_IDS_MAX) + break; + + /* only delete if either announced or matching a subflow */ + if (remove_anno_list_by_saddr(msk, &entry->addr)) + anno_nr++; + else if (!lookup_subflow_by_saddr(&msk->conn_list, + &entry->addr)) + continue; + + alist.ids[alist.nr++] = entry->addr.id; } if (alist.nr) { spin_lock_bh(&msk->pm.lock); + msk->pm.add_addr_signaled -= anno_nr; mptcp_pm_remove_addr(msk, &alist); spin_unlock_bh(&msk->pm.lock); } From 09176f80995105c62939728fbad5c437d61e7ff4 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Sat, 27 Jul 2024 12:01:24 +0200 Subject: [PATCH 79/86] mptcp: distinguish rcv vs sent backup flag in requests commit efd340bf3d7779a3a8ec954d8ec0fb8a10f24982 upstream. When sending an MP_JOIN + SYN + ACK, it is possible to mark the subflow as 'backup' by setting the flag with the same name. Before this patch, the backup was set if the other peer set it in its MP_JOIN + SYN request. It is not correct: the backup flag should be set in the MPJ+SYN+ACK only if the host asks for it, and not mirroring what was done by the other peer. It is then required to have a dedicated bit for each direction, similar to what is done in the subflow context. Fixes: f296234c98a8 ("mptcp: Add handling of incoming MP_JOIN requests") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/mptcp/options.c | 2 +- net/mptcp/protocol.h | 1 + net/mptcp/subflow.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index a718ebcb5bc6..daf53d685b5f 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -904,7 +904,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, return true; } else if (subflow_req->mp_join) { opts->suboptions = OPTION_MPTCP_MPJ_SYNACK; - opts->backup = subflow_req->backup; + opts->backup = subflow_req->request_bkup; opts->join_id = subflow_req->local_id; opts->thmac = subflow_req->thmac; opts->nonce = subflow_req->local_nonce; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index eaed858c0ff9..a2b85eebb620 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -400,6 +400,7 @@ struct mptcp_subflow_request_sock { u16 mp_capable : 1, mp_join : 1, backup : 1, + request_bkup : 1, csum_reqd : 1, allow_join_id0 : 1; u8 local_id; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index f1d422396b28..77f70ba40a45 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1876,6 +1876,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->mp_join = 1; new_ctx->fully_established = 1; new_ctx->backup = subflow_req->backup; + new_ctx->request_bkup = subflow_req->request_bkup; WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; From 6d9719312170a9f3452a0c7588725533d2a06f3f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 27 Jul 2024 11:04:00 +0200 Subject: [PATCH 80/86] mptcp: fix NL PM announced address accounting commit 4b317e0eb287bd30a1b329513531157c25e8b692 upstream. Currently the per connection announced address counter is never decreased. As a consequence, after connection establishment, if the NL PM deletes an endpoint and adds a new/different one, no additional subflow is created for the new endpoint even if the current limits allow that. Address the issue properly updating the signaled address counter every time the NL PM removes such addresses. Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_netlink.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 83610a689bbc..0d459b82deb9 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1445,6 +1445,7 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, ret = remove_anno_list_by_saddr(msk, addr); if (ret || force) { spin_lock_bh(&msk->pm.lock); + msk->pm.add_addr_signaled -= ret; mptcp_pm_remove_addr(msk, &list); spin_unlock_bh(&msk->pm.lock); } @@ -1609,17 +1610,18 @@ void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, rm_list, list) { - if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) && - slist.nr < MPTCP_RM_IDS_MAX) + if (slist.nr < MPTCP_RM_IDS_MAX && + lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) slist.ids[slist.nr++] = entry->addr.id; - if (remove_anno_list_by_saddr(msk, &entry->addr) && - alist.nr < MPTCP_RM_IDS_MAX) + if (alist.nr < MPTCP_RM_IDS_MAX && + remove_anno_list_by_saddr(msk, &entry->addr)) alist.ids[alist.nr++] = entry->addr.id; } if (alist.nr) { spin_lock_bh(&msk->pm.lock); + msk->pm.add_addr_signaled -= alist.nr; mptcp_pm_remove_addr(msk, &alist); spin_unlock_bh(&msk->pm.lock); } From 882bbd872f8d91c6f886158bd52e280deca96c64 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 31 Jul 2024 12:10:14 +0200 Subject: [PATCH 81/86] mptcp: fix bad RCVPRUNED mib accounting commit 0a567c2a10033bf04ed618368d179bce6977984b upstream. Since its introduction, the mentioned MIB accounted for the wrong event: wake-up being skipped as not-needed on some edge condition instead of incoming skb being dropped after landing in the (subflow) receive queue. Move the increment in the correct location. Fixes: ce599c516386 ("mptcp: properly account bulk freed memory") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 19f9c1700661..75ae91c93129 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -363,8 +363,10 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, skb_orphan(skb); /* try to fetch required memory from subflow */ - if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) + if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); goto drop; + } has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; @@ -851,10 +853,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) sk_rbuf = ssk_rbuf; /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ - if (__mptcp_rmem(sk) > sk_rbuf) { - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); + if (__mptcp_rmem(sk) > sk_rbuf) return; - } /* Wake-up the reader only for in-sequence data */ mptcp_data_lock(sk); From 00f283a7097ef0ed32d269ae363f6fb728b511c1 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Sat, 27 Jul 2024 12:01:25 +0200 Subject: [PATCH 82/86] mptcp: pm: only set request_bkup flag when sending MP_PRIO commit 4258b94831bb7ff28ab80e3c8d94db37db930728 upstream. The 'backup' flag from mptcp_subflow_context structure is supposed to be set only when the other peer flagged a subflow as backup, not the opposite. Fixes: 067065422fcd ("mptcp: add the outgoing MP_PRIO support") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_netlink.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 0d459b82deb9..e9dff6382581 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -481,7 +481,6 @@ static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_con msk->last_snd = NULL; subflow->send_mp_prio = 1; - subflow->backup = backup; subflow->request_bkup = backup; } From 1e161339f558d6c454ee75f40c924a17dac12df1 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 31 Jul 2024 12:10:15 +0200 Subject: [PATCH 83/86] mptcp: fix duplicate data handling commit 68cc924729ffcfe90d0383177192030a9aeb2ee4 upstream. When a subflow receives and discards duplicate data, the mptcp stack assumes that the consumed offset inside the current skb is zero. With multiple subflows receiving data simultaneously such assertion does not held true. As a result the subflow-level copied_seq will be incorrectly increased and later on the same subflow will observe a bad mapping, leading to subflow reset. Address the issue taking into account the skb consumed offset in mptcp_subflow_discard_data(). Fixes: 04e4cd4f7ca4 ("mptcp: cleanup mptcp_subflow_discard_data()") Cc: stable@vger.kernel.org Link: https://github.com/multipath-tcp/mptcp_net-next/issues/501 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/mptcp/subflow.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 77f70ba40a45..96bdd4119578 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1103,14 +1103,22 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; - u32 incr; + struct tcp_sock *tp = tcp_sk(ssk); + u32 offset, incr, avail_len; - incr = limit >= skb->len ? skb->len + fin : limit; + offset = tp->copied_seq - TCP_SKB_CB(skb)->seq; + if (WARN_ON_ONCE(offset > skb->len)) + goto out; - pr_debug("discarding=%d len=%d seq=%d", incr, skb->len, - subflow->map_subflow_seq); + avail_len = skb->len - offset; + incr = limit >= avail_len ? avail_len + fin : limit; + + pr_debug("discarding=%d len=%d offset=%d seq=%d", incr, skb->len, + offset, subflow->map_subflow_seq); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA); tcp_sk(ssk)->copied_seq += incr; + +out: if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq)) sk_eat_skb(ssk, skb); if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) From 33e0f0e51e39f57cc6455977b5491888e3041ed6 Mon Sep 17 00:00:00 2001 From: Liu Jing Date: Sat, 27 Jul 2024 11:04:03 +0200 Subject: [PATCH 84/86] selftests: mptcp: always close input's FD if opened commit 7c70bcc2a84cf925f655ea1ac4b8088062b144a3 upstream. In main_loop_s function, when the open(cfg_input, O_RDONLY) function is run, the last fd is not closed if the "--cfg_repeat > 0" branch is not taken. Fixes: 05be5e273c84 ("selftests: mptcp: add disconnect tests") Cc: stable@vger.kernel.org Signed-off-by: Liu Jing Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index e6b514cb7bdd..b6b9f41dbc29 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1040,11 +1040,11 @@ again: return 1; } - if (--cfg_repeat > 0) { - if (cfg_input) - close(fd); + if (cfg_input) + close(fd); + + if (--cfg_repeat > 0) goto again; - } return 0; } From 4440ef0f583740136420915819887c20b0b9b622 Mon Sep 17 00:00:00 2001 From: Alexander Maltsev Date: Wed, 17 Apr 2024 18:51:41 +0500 Subject: [PATCH 85/86] netfilter: ipset: Add list flush to cancel_gc [ Upstream commit c1193d9bbbd379defe9be3c6de566de684de8a6f ] Flushing list in cancel_gc drops references to other lists right away, without waiting for RCU to destroy list. Fixes race when referenced ipsets can't be destroyed while referring list is scheduled for destroy. Fixes: 97f7cf1cd80e ("netfilter: ipset: fix performance regression in swap operation") Signed-off-by: Alexander Maltsev Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_list_set.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index e839c356bcb5..902ff2f3bc72 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -547,6 +547,9 @@ list_set_cancel_gc(struct ip_set *set) if (SET_WITH_TIMEOUT(set)) del_timer_sync(&map->gc); + + /* Flush list to drop references to other ipsets */ + list_set_flush(set); } static const struct ip_set_type_variant set_variant = { From 36790ef5e00b69ccb92817f95ba1928eea24eebb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 11 Aug 2024 12:36:02 +0200 Subject: [PATCH 86/86] Linux 6.1.104 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20240807150039.247123516@linuxfoundation.org Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Link: https://lore.kernel.org/r/20240808091131.014292134@linuxfoundation.org Tested-by: Miguel Ojeda Tested-by: ChromeOS CQ Test Tested-by: Pavel Machek (CIP) Tested-by: Linux Kernel Functional Testing Tested-by: Peter Schneider  Tested-by: Jon Hunter Tested-by: kernelci.org bot Tested-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 97149e46565a..0dd963d6d8d2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 103 +SUBLEVEL = 104 EXTRAVERSION = NAME = Curry Ramen