From 1c4f8fb026acba080e8aa98082ca636ef7eb671b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 6 Mar 2025 15:23:54 -0800 Subject: [PATCH 01/44] UPSTREAM: net_sched: Prevent creation of classes with TC_H_ROOT [ Upstream commit 0c3057a5a04d07120b3d0ec9c79568fceb9c921e ] The function qdisc_tree_reduce_backlog() uses TC_H_ROOT as a termination condition when traversing up the qdisc tree to update parent backlog counters. However, if a class is created with classid TC_H_ROOT, the traversal terminates prematurely at this class instead of reaching the actual root qdisc, causing parent statistics to be incorrectly maintained. In case of DRR, this could lead to a crash as reported by Mingi Cho. Prevent the creation of any Qdisc class with classid TC_H_ROOT (0xFFFFFFFF) across all qdisc types, as suggested by Jamal. Bug: 403920173 Reported-by: Mingi Cho Signed-off-by: Cong Wang Reviewed-by: Simon Horman Fixes: 066a3b5b2346 ("[NET_SCHED] sch_api: fix qdisc_tree_decrease_qlen() loop") Link: https://patch.msgid.link/20250306232355.93864-2-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 78533c4a29ac3aeddce4b481770beaaa4f3bfb67) Signed-off-by: Lee Jones Change-Id: Ieac912ddc0bc44e999fe0d29ddf3a3842abdfa14 From 81ea45b132e6bcf969ce8005ed32e7b4bf89697d Mon Sep 17 00:00:00 2001 From: Bosser Ye Date: Sun, 27 Apr 2025 10:51:29 +0800 Subject: [PATCH 02/44] ANDROID: GKI: Update symbol list for mtk 1 function symbol(s) added 'bool usb_check_int_endpoints(const struct usb_interface*, const u8*)' Bug: 414032152 Change-Id: I74e2af13e5fcc7acd0ff060552f99485f5dda9f8 Signed-off-by: Bosser Ye --- android/abi_gki_aarch64.stg | 10 ++++++++++ android/abi_gki_aarch64_mtk | 1 + 2 files changed, 11 insertions(+) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index b50c2f469354..6c85b138870b 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -411986,6 +411986,15 @@ elf_symbol { type_id: 0xf38427c4 full_name: "usb_check_bulk_endpoints" } +elf_symbol { + id: 0xcf4d7b06 + name: "usb_check_int_endpoints" + is_defined: true + symbol_type: FUNCTION + crc: 0x18bb04b2 + type_id: 0xf38427c4 + full_name: "usb_check_int_endpoints" +} elf_symbol { id: 0x23a5ab99 name: "usb_choose_configuration" @@ -426159,6 +426168,7 @@ interface { symbol_id: 0x3d66dcb8 symbol_id: 0x1f68a496 symbol_id: 0x12289dad + symbol_id: 0xcf4d7b06 symbol_id: 0x23a5ab99 symbol_id: 0x2a589f64 symbol_id: 0x7da41bc7 diff --git a/android/abi_gki_aarch64_mtk b/android/abi_gki_aarch64_mtk index 26727ae17c50..9bf16e696442 100644 --- a/android/abi_gki_aarch64_mtk +++ b/android/abi_gki_aarch64_mtk @@ -3229,6 +3229,7 @@ usb_autopm_put_interface usb_autopm_put_interface_async usb_check_bulk_endpoints + usb_check_int_endpoints usb_clear_halt usb_composite_probe usb_composite_unregister From 242f90b45e895bb0721721f486e2497054eeb0c1 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Sun, 26 Mar 2023 09:38:13 -0700 Subject: [PATCH 03/44] UPSTREAM: drm/msm: Rename drm_msm_gem_submit_reloc::or in C++ code Clashes with C++ `or` keyword Signed-off-by: Danylo Piliaiev Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/528751/ Link: https://lore.kernel.org/r/20230326163813.535762-1-robdclark@gmail.com Signed-off-by: Dmitry Baryshkov Bug: 409896277 Change-Id: Ib8894a7facce6e4f80d586575b3cffaf53a22a1e (cherry picked from commit f1af066bcfd38daa9eee7195ef772dadaaa18520) Signed-off-by: Mahadevan --- include/uapi/drm/msm_drm.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 3c7b097c4e3d..16f2eb4b65ee 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -180,7 +180,11 @@ struct drm_msm_gem_cpu_fini { */ struct drm_msm_gem_submit_reloc { __u32 submit_offset; /* in, offset from submit_bo */ +#ifdef __cplusplus + __u32 _or; /* in, value OR'd with result */ +#else __u32 or; /* in, value OR'd with result */ +#endif __s32 shift; /* in, amount of left shift (can be negative) */ __u32 reloc_idx; /* in, index of reloc_bo buffer */ __u64 reloc_offset; /* in, offset from start of reloc_bo */ From 0c1a07d9c284e99188696b78ddd6faae7b402ae6 Mon Sep 17 00:00:00 2001 From: Seiya Wang Date: Wed, 30 Apr 2025 16:22:12 +0800 Subject: [PATCH 04/44] ANDROID: GKI: Update the symbol list for mtk 1 function symbol(s) added 'ssize_t hdmi_audio_infoframe_pack_for_dp(const struct hdmi_audio_infoframe*, struct dp_sdp*, u8)' Bug: 414724747 Change-Id: I15c9372703d94a787df4e6af83551ca183d4fae9 Signed-off-by: Seiya Wang --- android/abi_gki_aarch64.stg | 88 +++++++++++++++++++++++++++++++++++++ android/abi_gki_aarch64_mtk | 1 + 2 files changed, 89 insertions(+) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 6c85b138870b..1ec9533fbf5c 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -24638,6 +24638,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0xe5b47ef3 } +pointer_reference { + id: 0x340055b2 + kind: POINTER + pointee_type_id: 0xfa41b054 +} pointer_reference { id: 0x34016e82 kind: POINTER @@ -26753,6 +26758,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0xca69055f } +pointer_reference { + id: 0x380af085 + kind: POINTER + pointee_type_id: 0xca6b248b +} pointer_reference { id: 0x380c33d1 kind: POINTER @@ -35938,6 +35948,11 @@ qualified { qualifier: CONST qualified_type_id: 0x98e32fbd } +qualified { + id: 0xfa41b054 + qualifier: CONST + qualified_type_id: 0x994177db +} qualified { id: 0xfa455c97 qualifier: CONST @@ -41974,6 +41989,29 @@ member { type_id: 0x92233392 offset: 1024 } +member { + id: 0x9583c91c + name: "HB0" + type_id: 0x295c7202 +} +member { + id: 0xd444664c + name: "HB1" + type_id: 0x295c7202 + offset: 8 +} +member { + id: 0x1603fa52 + name: "HB2" + type_id: 0x295c7202 + offset: 16 +} +member { + id: 0x56c4b67f + name: "HB3" + type_id: 0x295c7202 + offset: 24 +} member { id: 0xc31a401f name: "InOctetsDecrypted" @@ -73029,6 +73067,12 @@ member { type_id: 0x295c7202 offset: 112 } +member { + id: 0xd75b166e + name: "db" + type_id: 0x5e9b9471 + offset: 32 +} member { id: 0xf540eeb1 name: "db_off" @@ -176291,6 +176335,11 @@ member { type_id: 0xc9082b19 offset: 10176 } +member { + id: 0x0fed75b2 + name: "sdp_header" + type_id: 0x90206a5a +} member { id: 0x682990f8 name: "sdp_max" @@ -230244,6 +230293,28 @@ struct_union { member_id: 0x24d1edc6 } } +struct_union { + id: 0xca6b248b + kind: STRUCT + name: "dp_sdp" + definition { + bytesize: 36 + member_id: 0x0fed75b2 + member_id: 0xd75b166e + } +} +struct_union { + id: 0x90206a5a + kind: STRUCT + name: "dp_sdp_header" + definition { + bytesize: 4 + member_id: 0x9583c91c + member_id: 0xd444664c + member_id: 0x1603fa52 + member_id: 0x56c4b67f + } +} struct_union { id: 0x33fed362 kind: STRUCT @@ -301796,6 +301867,13 @@ function { parameter_id: 0x25653b02 parameter_id: 0x35d17e4b } +function { + id: 0x191b8833 + return_type_id: 0xd5cc9c9a + parameter_id: 0x340055b2 + parameter_id: 0x380af085 + parameter_id: 0x295c7202 +} function { id: 0x191d871c return_type_id: 0x48b5725f @@ -378871,6 +378949,15 @@ elf_symbol { type_id: 0x1f9e005b full_name: "hdmi_audio_infoframe_pack" } +elf_symbol { + id: 0x6392ceb7 + name: "hdmi_audio_infoframe_pack_for_dp" + is_defined: true + symbol_type: FUNCTION + crc: 0x7e0b255f + type_id: 0x191b8833 + full_name: "hdmi_audio_infoframe_pack_for_dp" +} elf_symbol { id: 0x0e1d2fa4 name: "hdmi_avi_infoframe_check" @@ -422488,6 +422575,7 @@ interface { symbol_id: 0x3163ad8e symbol_id: 0x78d85567 symbol_id: 0xcd9a2048 + symbol_id: 0x6392ceb7 symbol_id: 0x0e1d2fa4 symbol_id: 0x306e3b3d symbol_id: 0x684435da diff --git a/android/abi_gki_aarch64_mtk b/android/abi_gki_aarch64_mtk index 9bf16e696442..238637837c11 100644 --- a/android/abi_gki_aarch64_mtk +++ b/android/abi_gki_aarch64_mtk @@ -1155,6 +1155,7 @@ handle_sysrq have_governor_per_policy hci_cmd_sync_status + hdmi_audio_infoframe_pack_for_dp hex2bin hex_asc hex_asc_upper From 7b89b57429c2ec59cc879a864d6c5480ef6f3606 Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Tue, 11 Mar 2025 23:16:10 -0700 Subject: [PATCH 05/44] ANDROID: mm/memfd-ashmem-shim: Simplify buffer name retrieval The current way of getting the name for a buffer always requires a buffer to be allocated for the name to be copied into. This is inefficient, as names for shmem buffers are always stored in the same field, and they do not change. Therefore, simplify the name retrieval to just read the buffer name from the field it is always stored in for shmem buffers. This also aligns the code to what is present on the android16-6.12 branch. Bug: 401214613 Bug: 111903542 Change-Id: Idd7b2d16601c890b78bd5705c92842bee470e75c Signed-off-by: Isaac J. Manjarres --- mm/memfd-ashmem-shim.c | 57 +++++++++++------------------------------- 1 file changed, 15 insertions(+), 42 deletions(-) diff --git a/mm/memfd-ashmem-shim.c b/mm/memfd-ashmem-shim.c index e09d95a8b274..258498cca9bb 100644 --- a/mm/memfd-ashmem-shim.c +++ b/mm/memfd-ashmem-shim.c @@ -16,58 +16,31 @@ #include "memfd-ashmem-shim.h" #include "memfd-ashmem-shim-internal.h" -/* file_path() returns the path of the file including the root, hence the additional "/". */ -#define MEMFD_PATH_PREFIX "/memfd:" -#define MEMFD_PATH_PREFIX_LEN (sizeof(MEMFD_PATH_PREFIX) - 1) +/* memfd file names all start with memfd: */ +#define MEMFD_PREFIX "memfd:" +#define MEMFD_PREFIX_LEN (sizeof(MEMFD_PREFIX) - 1) -/* All memfd files are unlinked, and are therefore suffixed with the " (deleted)" string. */ -#define UNLINKED_FILE_SUFFIX " (deleted)" -#define UNLINKED_FILE_SUFFIX_LEN (sizeof(UNLINKED_FILE_SUFFIX) - 1) - -/* - * 1 character for the start of the path (/), NAME_MAX for the maximum length of a full memfd file - * name, UNLINKED_FILE_SUFFIX_LEN for the " (deleted)" suffix, and 1 for the NUL terminating - * character. - */ -#define MAX_FILE_PATH_SIZE (1 + NAME_MAX + UNLINKED_FILE_SUFFIX_LEN + 1) - -static char *get_memfd_file_name(struct file *file, char *buf, size_t size) +static const char *get_memfd_name(struct file *file) { - char *name_end; - char *path = file_path(file, buf, size); + /* This pointer is always valid, so no need to check if it's NULL. */ + const char *file_name = file->f_path.dentry->d_name.name; - if (IS_ERR(path)) - return path; + if (file_name != strstr(file_name, MEMFD_PREFIX)) + return NULL; - /* Only handle memfds; we cannot make assumptions about other file names. */ - name_end = strstr(path, UNLINKED_FILE_SUFFIX); - if ((strstr(path, MEMFD_PATH_PREFIX) != path) || !name_end) - return ERR_PTR(-EINVAL); - - /* - * Since file_path() returns the full path of the file, including the root, the format will - * be: - * - * "/memfd:testbuf (deleted)" - * - * But the ASHMEM_GET_NAME ioctl only returns the name of the buffer without any prefixes - * or suffixes. So, terminate the string at the start of the " (deleted)" suffix so that - * strlen() can be used on it from the start of the name. - */ - *name_end = '\0'; - - /* return a pointer to the start of the name */ - return &path[MEMFD_PATH_PREFIX_LEN]; + return file_name; } static long get_name(struct file *file, void __user *name) { - char buf[MAX_FILE_PATH_SIZE]; - char *file_name = get_memfd_file_name(file, buf, sizeof(buf)); + const char *file_name = get_memfd_name(file); size_t len; - if (IS_ERR(file_name)) - return PTR_ERR(file_name); + if (!file_name) + return -EINVAL; + + /* Strip MEMFD_PREFIX to retain compatibility with ashmem driver. */ + file_name = &file_name[MEMFD_PREFIX_LEN]; /* * The expectation is that the user provided buffer is ASHMEM_NAME_LEN in size, which is From ca2f65da73b1771ac53c892a8eb3ac95c374d4f1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:16:31 -0700 Subject: [PATCH 06/44] UPSTREAM: codel: remove sch->q.qlen check before qdisc_tree_reduce_backlog() [ Upstream commit 342debc12183b51773b3345ba267e9263bdfaaef ] After making all ->qlen_notify() callbacks idempotent, now it is safe to remove the check of qlen!=0 from both fq_codel_dequeue() and codel_qdisc_dequeue(). Bug: 410432097 Reported-by: Gerrard Tai Fixes: 4b549a2ef4be ("fq_codel: Fair Queue Codel AQM") Fixes: 76e3cc126bb2 ("codel: Controlled Delay AQM") Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250403211636.166257-1-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin (cherry picked from commit 4d55144b12e742404bb3f8fee6038bafbf45619d) Signed-off-by: Lee Jones Change-Id: I9508beb45995f003612895517ea391ef4beee7b4 --- net/sched/sch_codel.c | 5 +---- net/sched/sch_fq_codel.c | 6 ++---- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index d7a4874543de..5f2e06815745 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -95,10 +95,7 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch) &q->stats, qdisc_pkt_len, codel_get_enqueue_time, drop_func, dequeue_func); - /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, - * or HTB crashes. Defer it for next round. - */ - if (q->stats.drop_count && sch->q.qlen) { + if (q->stats.drop_count) { qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len); q->stats.drop_count = 0; q->stats.drop_len = 0; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 8c4fee063436..9330923a624c 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -314,10 +314,8 @@ begin: } qdisc_bstats_update(sch, skb); flow->deficit -= qdisc_pkt_len(skb); - /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, - * or HTB crashes. Defer it for next round. - */ - if (q->cstats.drop_count && sch->q.qlen) { + + if (q->cstats.drop_count) { qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len); q->cstats.drop_count = 0; From 48ab183a3ee2b787b0e0386200b350a58f204b3d Mon Sep 17 00:00:00 2001 From: Pierre Couillaud Date: Tue, 6 May 2025 14:05:27 -0700 Subject: [PATCH 07/44] ANDROID: GKI: Update symbol list for bcmstb INFO: 1 function symbol(s) added 'int __hwspin_trylock(struct hwspinlock*, int, unsigned long*)' Bug: 416077180 Change-Id: I7186f775675b14a2323eb62eabb7a05e485b7464 Signed-off-by: Pierre Couillaud --- android/abi_gki_aarch64.stg | 17 +++++++++++++++++ android/abi_gki_aarch64_bcmstb | 1 + 2 files changed, 18 insertions(+) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 1ec9533fbf5c..fdead9fe8009 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -335580,6 +335580,13 @@ function { parameter_id: 0x33b77109 parameter_id: 0x3283ded6 } +function { + id: 0x9eb38da8 + return_type_id: 0x6720d32f + parameter_id: 0x0ab9fa4c + parameter_id: 0x6720d32f + parameter_id: 0x064d6086 +} function { id: 0x9eb3dea3 return_type_id: 0x6720d32f @@ -344383,6 +344390,15 @@ elf_symbol { type_id: 0x9e215925 full_name: "__hwspin_lock_timeout" } +elf_symbol { + id: 0xac885058 + name: "__hwspin_trylock" + is_defined: true + symbol_type: FUNCTION + crc: 0xe4d4ab7e + type_id: 0x9eb38da8 + full_name: "__hwspin_trylock" +} elf_symbol { id: 0x09a111a4 name: "__hwspin_unlock" @@ -418736,6 +418752,7 @@ interface { symbol_id: 0x58de7795 symbol_id: 0x7874d435 symbol_id: 0xc1db2428 + symbol_id: 0xac885058 symbol_id: 0x09a111a4 symbol_id: 0x9ff710d8 symbol_id: 0xee9e2392 diff --git a/android/abi_gki_aarch64_bcmstb b/android/abi_gki_aarch64_bcmstb index e41670aeff26..446862098288 100644 --- a/android/abi_gki_aarch64_bcmstb +++ b/android/abi_gki_aarch64_bcmstb @@ -1604,6 +1604,7 @@ sdhci_set_clock sdhci_set_uhs_signaling sdhci_setup_host + __hwspin_trylock # required by slcan.ko hex_asc_upper From b29cc3971e2657571e64ba9fac9e95c64ac2c600 Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Fri, 26 Apr 2024 10:35:12 +0530 Subject: [PATCH 08/44] UPSTREAM: usb: dwc3: core: Fix compile warning on s390 gcc in dwc3_get_phy call Recent commit introduced support for reading Multiport PHYs and while doing so iterated over an integer variable which runs from [0-254] in the worst case scenario. But S390 compiler treats it as a warning and complains that the integer write to string can go to 11 characters. Fix this by modifying iterator variable to u8. Bug: 254441685 Suggested-by: Johan Hovold Fixes: 30a46746ca5a ("usb: dwc3: core: Refactor PHY logic to support Multiport Controller") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202404241215.Mib19Cu7-lkp@intel.com/ Signed-off-by: Krishna Kurapati Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20240426050512.57384-1-quic_kriskura@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 3f12222a4bebeb13ce06ddecc1610ad32fa835dd) Signed-off-by: Lee Jones Change-Id: If11b5b866842a9e94edfdfcaa7c4aea1f575e0d2 --- drivers/usb/dwc3/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index b757fe1e5d96..1ec8522fc93d 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1480,7 +1480,7 @@ static int dwc3_core_get_phy(struct dwc3 *dwc) struct phy *temp_phy = NULL; char phy_name[9]; int ret; - int i; + u8 i; if (node) { dwc->usb2_phy = devm_usb_get_phy_by_phandle(dev, "usb-phy", 0); @@ -1510,7 +1510,7 @@ static int dwc3_core_get_phy(struct dwc3 *dwc) if (vdwc->num_usb2_ports == 1) snprintf(phy_name, sizeof(phy_name), "usb2-phy"); else - snprintf(phy_name, sizeof(phy_name), "usb2-%d", i); + snprintf(phy_name, sizeof(phy_name), "usb2-%u", i); temp_phy = devm_phy_get(dev, phy_name); if (IS_ERR(temp_phy)) { @@ -1532,7 +1532,7 @@ static int dwc3_core_get_phy(struct dwc3 *dwc) if (vdwc->num_usb3_ports == 1) snprintf(phy_name, sizeof(phy_name), "usb3-phy"); else - snprintf(phy_name, sizeof(phy_name), "usb3-%d", i); + snprintf(phy_name, sizeof(phy_name), "usb3-%u", i); temp_phy = devm_phy_get(dev, phy_name); if (IS_ERR(temp_phy)) { From b45e2c927411cd2e48dd5eae01165f2fe01ee27e Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 2 Oct 2024 14:22:23 +0200 Subject: [PATCH 09/44] UPSTREAM: PM: domains: Fix alloc/free in dev_pm_domain_attach|detach_list() The dev_pm_domain_attach|detach_list() functions are not resource managed, hence they should not use devm_* helpers to manage allocation/freeing of data. Let's fix this by converting to the traditional alloc/free functions. Bug: 254441685 Fixes: 161e16a5e50a ("PM: domains: Add helper functions to attach/detach multiple PM domains") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20241002122232.194245-3-ulf.hansson@linaro.org (cherry picked from commit 7738568885f2eaecfc10a3f530a2693e5f0ae3d0) Signed-off-by: Lee Jones Change-Id: If7138b246fcd6811001ba7b22c118b2e5132c463 --- drivers/base/power/common.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index 299496e1381d..d853c75dda43 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c @@ -195,6 +195,7 @@ int dev_pm_domain_attach_list(struct device *dev, struct device *pd_dev = NULL; int ret, i, num_pds = 0; bool by_id = true; + size_t size; u32 pd_flags = data ? data->pd_flags : 0; u32 link_flags = pd_flags & PD_FLAG_NO_DEV_LINK ? 0 : DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME; @@ -217,19 +218,17 @@ int dev_pm_domain_attach_list(struct device *dev, if (num_pds <= 0) return 0; - pds = devm_kzalloc(dev, sizeof(*pds), GFP_KERNEL); + pds = kzalloc(sizeof(*pds), GFP_KERNEL); if (!pds) return -ENOMEM; - pds->pd_devs = devm_kcalloc(dev, num_pds, sizeof(*pds->pd_devs), - GFP_KERNEL); - if (!pds->pd_devs) - return -ENOMEM; - - pds->pd_links = devm_kcalloc(dev, num_pds, sizeof(*pds->pd_links), - GFP_KERNEL); - if (!pds->pd_links) - return -ENOMEM; + size = sizeof(*pds->pd_devs) + sizeof(*pds->pd_links); + pds->pd_devs = kcalloc(num_pds, size, GFP_KERNEL); + if (!pds->pd_devs) { + ret = -ENOMEM; + goto free_pds; + } + pds->pd_links = (void *)(pds->pd_devs + num_pds); if (link_flags && pd_flags & PD_FLAG_DEV_LINK_ON) link_flags |= DL_FLAG_RPM_ACTIVE; @@ -272,6 +271,9 @@ err_attach: device_link_del(pds->pd_links[i]); dev_pm_domain_detach(pds->pd_devs[i], true); } + kfree(pds->pd_devs); +free_pds: + kfree(pds); return ret; } EXPORT_SYMBOL_GPL(dev_pm_domain_attach_list); @@ -318,6 +320,9 @@ void dev_pm_domain_detach_list(struct dev_pm_domain_list *list) device_link_del(list->pd_links[i]); dev_pm_domain_detach(list->pd_devs[i], true); } + + kfree(list->pd_devs); + kfree(list); } EXPORT_SYMBOL_GPL(dev_pm_domain_detach_list); From 7192539e3e36af08ac9d5159c3cb81bfe7f3af8b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2024 16:51:03 +0200 Subject: [PATCH 10/44] UPSTREAM: serial: qcom-geni: revert broken hibernation support This reverts commit 35781d8356a2eecaa6074ceeb80ee22e252fcdae. Hibernation is not supported on Qualcomm platforms with mainline kernels yet a broken vendor implementation for the GENI serial driver made it upstream. This is effectively dead code that cannot be tested and should just be removed, but if these paths were ever hit for an open non-console port they would crash the machine as the driver would fail to enable clocks during restore() (i.e. all ports would have to be closed by drivers and user space before hibernating the system to avoid this as a comment in the code hinted at). The broken implementation also added a random call to enable the receiver in the port setup code where it does not belong and which enables the receiver prematurely for console ports. Bug: 254441685 Fixes: 35781d8356a2 ("tty: serial: qcom-geni-serial: Add support for Hibernation feature") Cc: stable@vger.kernel.org # 6.2 Cc: Aniket Randive Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20241009145110.16847-3-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 19df76662a33d2f2fc41a66607cb8285fc02d6ec) Signed-off-by: Lee Jones Change-Id: I2ee5832b26e10ff03699e74a8f72d1c0393c9e22 --- drivers/tty/serial/qcom_geni_serial.c | 41 ++------------------------- 1 file changed, 2 insertions(+), 39 deletions(-) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index ae82136f0aeb..76ceff99ef5e 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -1118,7 +1118,6 @@ static int qcom_geni_serial_port_setup(struct uart_port *uport) false, true, true); geni_se_init(&port->se, UART_RX_WM, port->rx_fifo_depth - 2); geni_se_select_mode(&port->se, port->dev_data->mode); - qcom_geni_serial_start_rx(uport); port->setup = true; return 0; @@ -1734,38 +1733,6 @@ static int qcom_geni_serial_sys_resume(struct device *dev) return ret; } -static int qcom_geni_serial_sys_hib_resume(struct device *dev) -{ - int ret = 0; - struct uart_port *uport; - struct qcom_geni_private_data *private_data; - struct qcom_geni_serial_port *port = dev_get_drvdata(dev); - - uport = &port->uport; - private_data = uport->private_data; - - if (uart_console(uport)) { - geni_icc_set_tag(&port->se, QCOM_ICC_TAG_ALWAYS); - geni_icc_set_bw(&port->se); - ret = uart_resume_port(private_data->drv, uport); - /* - * For hibernation usecase clients for - * console UART won't call port setup during restore, - * hence call port setup for console uart. - */ - qcom_geni_serial_port_setup(uport); - } else { - /* - * Peripheral register settings are lost during hibernation. - * Update setup flag such that port setup happens again - * during next session. Clients of HS-UART will close and - * open the port during hibernation. - */ - port->setup = false; - } - return ret; -} - static const struct qcom_geni_device_data qcom_geni_console_data = { .console = true, .mode = GENI_SE_FIFO, @@ -1777,12 +1744,8 @@ static const struct qcom_geni_device_data qcom_geni_uart_data = { }; static const struct dev_pm_ops qcom_geni_serial_pm_ops = { - .suspend = pm_sleep_ptr(qcom_geni_serial_sys_suspend), - .resume = pm_sleep_ptr(qcom_geni_serial_sys_resume), - .freeze = pm_sleep_ptr(qcom_geni_serial_sys_suspend), - .poweroff = pm_sleep_ptr(qcom_geni_serial_sys_suspend), - .restore = pm_sleep_ptr(qcom_geni_serial_sys_hib_resume), - .thaw = pm_sleep_ptr(qcom_geni_serial_sys_hib_resume), + SYSTEM_SLEEP_PM_OPS(qcom_geni_serial_sys_suspend, + qcom_geni_serial_sys_resume) }; static const struct of_device_id qcom_geni_serial_match_table[] = { From a368123b90e52869829cd2fffec370c5224bf5d0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2024 16:51:05 +0200 Subject: [PATCH 11/44] UPSTREAM: serial: qcom-geni: fix dma rx cancellation Make sure to wait for the DMA transfer to complete when cancelling the rx command on stop_rx(). This specifically prevents the DMA completion interrupt from firing after rx has been restarted, something which can lead to an IOMMU fault and hosed rx when the interrupt handler unmaps the DMA buffer for the new command: qcom_geni_serial 988000.serial: serial engine reports 0 RX bytes in! arm-smmu 15000000.iommu: FSR = 00000402 [Format=2 TF], SID=0x563 arm-smmu 15000000.iommu: FSYNR0 = 00210013 [S1CBNDX=33 WNR PLVL=3] Bluetooth: hci0: command 0xfc00 tx timeout Bluetooth: hci0: Reading QCA version information failed (-110) Also add the missing state machine reset which is needed in case cancellation fails. Bug: 254441685 Fixes: 2aaa43c70778 ("tty: serial: qcom-geni-serial: add support for serial engine DMA") Cc: stable@vger.kernel.org # 6.3 Cc: Bartosz Golaszewski Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20241009145110.16847-5-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 23ee4a25661c33e6381d41e848a9060ed6d72845) Signed-off-by: Lee Jones Change-Id: Ie7e9dd51669db7f90057c2535ee8b51814ea7e93 --- drivers/tty/serial/qcom_geni_serial.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 76ceff99ef5e..31e938effbb7 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -764,17 +764,27 @@ static void qcom_geni_serial_start_rx_fifo(struct uart_port *uport) static void qcom_geni_serial_stop_rx_dma(struct uart_port *uport) { struct qcom_geni_serial_port *port = to_dev_port(uport); + bool done; if (!qcom_geni_serial_secondary_active(uport)) return; geni_se_cancel_s_cmd(&port->se); - qcom_geni_serial_poll_bit(uport, SE_GENI_S_IRQ_STATUS, - S_CMD_CANCEL_EN, true); - - if (qcom_geni_serial_secondary_active(uport)) + done = qcom_geni_serial_poll_bit(uport, SE_DMA_RX_IRQ_STAT, + RX_EOT, true); + if (done) { + writel(RX_EOT | RX_DMA_DONE, + uport->membase + SE_DMA_RX_IRQ_CLR); + } else { qcom_geni_serial_abort_rx(uport); + writel(1, uport->membase + SE_DMA_RX_FSM_RST); + qcom_geni_serial_poll_bit(uport, SE_DMA_RX_IRQ_STAT, + RX_RESET_DONE, true); + writel(RX_RESET_DONE | RX_DMA_DONE, + uport->membase + SE_DMA_RX_IRQ_CLR); + } + if (port->rx_dma_addr) { geni_se_rx_dma_unprep(&port->se, port->rx_dma_addr, DMA_RX_BUF_SIZE); From 98cb57aeb332147a0b77c392d806b4c3b16ca1c0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2024 16:51:06 +0200 Subject: [PATCH 12/44] UPSTREAM: serial: qcom-geni: fix receiver enable The receiver is supposed to be enabled in the startup() callback and not in set_termios() which is called also during console setup. This specifically avoids accepting input before the port has been opened (and interrupts enabled), something which can also break the GENI firmware (cancel fails and after abort, the "stale" counter handling appears to be broken so that later input is not processed until twelve chars have been received). There also does not appear to be any need to keep the receiver disabled while updating the port settings. Since commit 6f3c3cafb115 ("serial: qcom-geni: disable interrupts during console writes") the calls to manipulate the secondary interrupts, which were done without holding the port lock, can also lead to the receiver being left disabled when set_termios() races with the console code (e.g. when init opens the tty during boot). This can manifest itself as a serial getty not accepting input. The calls to stop and start rx in set_termios() can similarly race with DMA completion and, for example, cause the DMA buffer to be unmapped twice or the mapping to be leaked. Fix this by only enabling the receiver during startup and while holding the port lock to avoid racing with the console code. Bug: 254441685 Fixes: 6f3c3cafb115 ("serial: qcom-geni: disable interrupts during console writes") Fixes: 2aaa43c70778 ("tty: serial: qcom-geni-serial: add support for serial engine DMA") Fixes: c4f528795d1a ("tty: serial: msm_geni_serial: Add serial driver support for GENI based QUP") Cc: stable@vger.kernel.org # 6.3 Cc: Bartosz Golaszewski Signed-off-by: Johan Hovold Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20241009145110.16847-6-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman (cherry picked from commit fa103d2599e11e802c818684cff821baefe7f206) Signed-off-by: Lee Jones Change-Id: Ie5771faa0adbf570c9f726031cb973d013e04cca --- drivers/tty/serial/qcom_geni_serial.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 31e938effbb7..dda017e40cb8 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -1143,6 +1143,11 @@ static int qcom_geni_serial_startup(struct uart_port *uport) if (ret) return ret; } + + uart_port_lock_irq(uport); + qcom_geni_serial_start_rx(uport); + uart_port_unlock_irq(uport); + enable_irq(uport->irq); return 0; @@ -1227,7 +1232,6 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport, u32 ver, sampling_rate; unsigned int avg_bw_core; - qcom_geni_serial_stop_rx(uport); /* baud rate */ baud = uart_get_baud_rate(uport, termios, old, 300, 4000000); port->baud = baud; @@ -1244,7 +1248,7 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport, dev_err(port->se.dev, "Couldn't find suitable clock rate for %u\n", baud * sampling_rate); - goto out_restart_rx; + return; } dev_dbg(port->se.dev, "desired_rate = %u, clk_rate = %lu, clk_div = %u\n", @@ -1323,8 +1327,6 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport, writel(stop_bit_len, uport->membase + SE_UART_TX_STOP_BIT_LEN); writel(ser_clk_cfg, uport->membase + GENI_SER_M_CLK_CFG); writel(ser_clk_cfg, uport->membase + GENI_SER_S_CLK_CFG); -out_restart_rx: - qcom_geni_serial_start_rx(uport); } #ifdef CONFIG_SERIAL_QCOM_GENI_CONSOLE From 34f1eb99850e7df61a91c58d2afd2a18a5cf91ad Mon Sep 17 00:00:00 2001 From: Wei Xu Date: Mon, 14 Oct 2024 22:12:11 +0000 Subject: [PATCH 13/44] UPSTREAM: mm/mglru: only clear kswapd_failures if reclaimable lru_gen_shrink_node() unconditionally clears kswapd_failures, which can prevent kswapd from sleeping and cause 100% kswapd cpu usage even when kswapd repeatedly fails to make progress in reclaim. Only clear kswap_failures in lru_gen_shrink_node() if reclaim makes some progress, similar to shrink_node(). I happened to run into this problem in one of my tests recently. It requires a combination of several conditions: The allocator needs to allocate a right amount of pages such that it can wake up kswapd without itself being OOM killed; there is no memory for kswapd to reclaim (My test disables swap and cleans page cache first); no other process frees enough memory at the same time. Bug: 254441685 Link: https://lkml.kernel.org/r/20241014221211.832591-1-weixugc@google.com Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists") Signed-off-by: Wei Xu Cc: Axel Rasmussen Cc: Brian Geffon Cc: Jan Alexander Steffens Cc: Suleiman Souhlal Cc: Yu Zhao Cc: Signed-off-by: Andrew Morton (cherry picked from commit b130ba4a6259f6b64d8af15e9e7ab1e912bcb7ad) Signed-off-by: Lee Jones Change-Id: Ia2b4a0d71096d1e6cd0ee6054df3544724d4b665 --- mm/vmscan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index c14a16044515..08e98c9f0a90 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -5654,8 +5654,8 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control * blk_finish_plug(&plug); done: - /* kswapd should never fail */ - pgdat->kswapd_failures = 0; + if (sc->nr_reclaimed > reclaimed) + pgdat->kswapd_failures = 0; } /****************************************************************************** From 7c2011337f5bec67497f0bb527b9eceb9acab19d Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 17 Oct 2024 00:19:47 +0000 Subject: [PATCH 14/44] UPSTREAM: KVM: arm64: Ensure vgic_ready() is ordered against MMIO registration kvm_vgic_map_resources() prematurely marks the distributor as 'ready', potentially allowing vCPUs to enter the guest before the distributor's MMIO registration has been made visible. Plug the race by marking the distributor as ready only after MMIO registration is completed. Rely on the implied ordering of synchronize_srcu() to ensure the MMIO registration is visible before vgic_dist::ready. This also means that writers to vgic_dist::ready are now serialized by the slots_lock, which was effectively the case already as all writers held the slots_lock in addition to the config_lock. Bug: 254441685 Fixes: 59112e9c390b ("KVM: arm64: vgic: Fix a circular locking issue") Signed-off-by: Oliver Upton Link: https://lore.kernel.org/r/20241017001947.2707312-3-oliver.upton@linux.dev Signed-off-by: Marc Zyngier (cherry picked from commit 78a00555550042ed77b33ace7423aced228b3b4e) Signed-off-by: Lee Jones Change-Id: I01a7bdc92bbfe8642829c0c8f5e1bb55e1aea18f --- arch/arm64/kvm/vgic/vgic-init.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 059d00c17d26..0c976b3dd2b4 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -482,14 +482,23 @@ int kvm_vgic_map_resources(struct kvm *kvm) if (ret) goto out; - dist->ready = true; dist_base = dist->vgic_dist_base; mutex_unlock(&kvm->arch.config_lock); ret = vgic_register_dist_iodev(kvm, dist_base, type); - if (ret) + if (ret) { kvm_err("Unable to register VGIC dist MMIO regions\n"); + goto out_slots; + } + /* + * kvm_io_bus_register_dev() guarantees all readers see the new MMIO + * registration before returning through synchronize_srcu(), which also + * implies a full memory barrier. As such, marking the distributor as + * 'ready' here is guaranteed to be ordered after all vCPUs having seen + * a completely configured distributor. + */ + dist->ready = true; goto out_slots; out: mutex_unlock(&kvm->arch.config_lock); From c5abfe08fc657a2511592ee7c6a684b848af2def Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 15 Oct 2024 18:56:05 +0100 Subject: [PATCH 15/44] UPSTREAM: fork: do not invoke uffd on fork if error occurs Patch series "fork: do not expose incomplete mm on fork". During fork we may place the virtual memory address space into an inconsistent state before the fork operation is complete. In addition, we may encounter an error during the fork operation that indicates that the virtual memory address space is invalidated. As a result, we should not be exposing it in any way to external machinery that might interact with the mm or VMAs, machinery that is not designed to deal with incomplete state. We specifically update the fork logic to defer khugepaged and ksm to the end of the operation and only to be invoked if no error arose, and disallow uffd from observing fork events should an error have occurred. This patch (of 2): Currently on fork we expose the virtual address space of a process to userland unconditionally if uffd is registered in VMAs, regardless of whether an error arose in the fork. This is performed in dup_userfaultfd_complete() which is invoked unconditionally, and performs two duties - invoking registered handlers for the UFFD_EVENT_FORK event via dup_fctx(), and clearing down userfaultfd_fork_ctx objects established in dup_userfaultfd(). This is problematic, because the virtual address space may not yet be correctly initialised if an error arose. The change in commit d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()") makes this more pertinent as we may be in a state where entries in the maple tree are not yet consistent. We address this by, on fork error, ensuring that we roll back state that we would otherwise expect to clean up through the event being handled by userland and perform the memory freeing duty otherwise performed by dup_userfaultfd_complete(). We do this by implementing a new function, dup_userfaultfd_fail(), which performs the same loop, only decrementing reference counts. Note that we perform mmgrab() on the parent and child mm's, however userfaultfd_ctx_put() will mmdrop() this once the reference count drops to zero, so we will avoid memory leaks correctly here. Bug: 254441685 Link: https://lkml.kernel.org/r/cover.1729014377.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/d3691d58bb58712b6fb3df2be441d175bd3cdf07.1729014377.git.lorenzo.stoakes@oracle.com Fixes: d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()") Signed-off-by: Lorenzo Stoakes Reported-by: Jann Horn Reviewed-by: Jann Horn Reviewed-by: Liam R. Howlett Cc: Alexander Viro Cc: Christian Brauner Cc: Jan Kara Cc: Linus Torvalds Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton (cherry picked from commit f64e67e5d3a45a4a04286c47afade4b518acd47b) Signed-off-by: Lee Jones Change-Id: I9c2f774a0f4a0a75729b86c77c627fb38b8bb17b --- fs/userfaultfd.c | 28 ++++++++++++++++++++++++++++ include/linux/userfaultfd_k.h | 5 +++++ kernel/fork.c | 5 ++++- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f36e6e018b26..e8baf3e8a86a 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -692,6 +692,34 @@ void dup_userfaultfd_complete(struct list_head *fcs) } } +void dup_userfaultfd_fail(struct list_head *fcs) +{ + struct userfaultfd_fork_ctx *fctx, *n; + + /* + * An error has occurred on fork, we will tear memory down, but have + * allocated memory for fctx's and raised reference counts for both the + * original and child contexts (and on the mm for each as a result). + * + * These would ordinarily be taken care of by a user handling the event, + * but we are no longer doing so, so manually clean up here. + * + * mm tear down will take care of cleaning up VMA contexts. + */ + list_for_each_entry_safe(fctx, n, fcs, list) { + struct userfaultfd_ctx *octx = fctx->orig; + struct userfaultfd_ctx *ctx = fctx->new; + + atomic_dec(&octx->mmap_changing); + VM_BUG_ON(atomic_read(&octx->mmap_changing) < 0); + userfaultfd_ctx_put(octx); + userfaultfd_ctx_put(ctx); + + list_del(&fctx->list); + kfree(fctx); + } +} + void mremap_userfaultfd_prep(struct vm_area_struct *vma, struct vm_userfaultfd_ctx *vm_ctx) { diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 7d881c5df6ad..587f718ccc90 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -225,6 +225,7 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); extern void dup_userfaultfd_complete(struct list_head *); +void dup_userfaultfd_fail(struct list_head *); extern void mremap_userfaultfd_prep(struct vm_area_struct *, struct vm_userfaultfd_ctx *); @@ -299,6 +300,10 @@ static inline void dup_userfaultfd_complete(struct list_head *l) { } +static inline void dup_userfaultfd_fail(struct list_head *l) +{ +} + static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma, struct vm_userfaultfd_ctx *ctx) { diff --git a/kernel/fork.c b/kernel/fork.c index 2258ff82172e..82cd80f04f9d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -806,7 +806,10 @@ out: mmap_write_unlock(mm); flush_tlb_mm(oldmm); mmap_write_unlock(oldmm); - dup_userfaultfd_complete(&uf); + if (!retval) + dup_userfaultfd_complete(&uf); + else + dup_userfaultfd_fail(&uf); fail_uprobe_end: uprobe_end_dup_mmap(); return retval; From 1cc1e931722aa31c670505d112197cbca3d02831 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 10 Dec 2024 17:24:12 +0000 Subject: [PATCH 16/44] UPSTREAM: fork: avoid inappropriate uprobe access to invalid mm If dup_mmap() encounters an issue, currently uprobe is able to access the relevant mm via the reverse mapping (in build_map_info()), and if we are very unlucky with a race window, observe invalid XA_ZERO_ENTRY state which we establish as part of the fork error path. This occurs because uprobe_write_opcode() invokes anon_vma_prepare() which in turn invokes find_mergeable_anon_vma() that uses a VMA iterator, invoking vma_iter_load() which uses the advanced maple tree API and thus is able to observe XA_ZERO_ENTRY entries added to dup_mmap() in commit d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()"). This change was made on the assumption that only process tear-down code would actually observe (and make use of) these values. However this very unlikely but still possible edge case with uprobes exists and unfortunately does make these observable. The uprobe operation prevents races against the dup_mmap() operation via the dup_mmap_sem semaphore, which is acquired via uprobe_start_dup_mmap() and dropped via uprobe_end_dup_mmap(), and held across register_for_each_vma() prior to invoking build_map_info() which does the reverse mapping lookup. Currently these are acquired and dropped within dup_mmap(), which exposes the race window prior to error handling in the invoking dup_mm() which tears down the mm. We can avoid all this by just moving the invocation of uprobe_start_dup_mmap() and uprobe_end_dup_mmap() up a level to dup_mm() and only release this lock once the dup_mmap() operation succeeds or clean up is done. This means that the uprobe code can never observe an incompletely constructed mm and resolves the issue in this case. Bug: 254441685 Link: https://lkml.kernel.org/r/20241210172412.52995-1-lorenzo.stoakes@oracle.com Fixes: d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()") Signed-off-by: Lorenzo Stoakes Reported-by: syzbot+2d788f4f7cb660dac4b7@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/6756d273.050a0220.2477f.003d.GAE@google.com/ Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jann Horn Cc: Jiri Olsa Cc: Kan Liang Cc: Liam R. Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Oleg Nesterov Cc: Peng Zhang Cc: Peter Zijlstra Cc: Vlastimil Babka Cc: David Hildenbrand Signed-off-by: Andrew Morton (cherry picked from commit 8ac662f5da19f5873fdd94c48a5cdb45b2e1b58f) Signed-off-by: Lee Jones Change-Id: I915ed6b4f49d63d0d629dd8e9247d4684c664f3a --- kernel/fork.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 82cd80f04f9d..91357988f282 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -662,11 +662,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, LIST_HEAD(uf); MA_STATE(mas, &mm->mm_mt, 0, 0); - uprobe_start_dup_mmap(); - if (mmap_write_lock_killable(oldmm)) { - retval = -EINTR; - goto fail_uprobe_end; - } + if (mmap_write_lock_killable(oldmm)) + return -EINTR; flush_cache_dup_mm(oldmm); uprobe_dup_mmap(oldmm, mm); /* @@ -810,8 +807,6 @@ out: dup_userfaultfd_complete(&uf); else dup_userfaultfd_fail(&uf); -fail_uprobe_end: - uprobe_end_dup_mmap(); return retval; fail_nomem_anon_vma_fork: @@ -1642,9 +1637,11 @@ static struct mm_struct *dup_mm(struct task_struct *tsk, if (!mm_init(mm, tsk, mm->user_ns)) goto fail_nomem; + uprobe_start_dup_mmap(); err = dup_mmap(mm, oldmm); if (err) goto free_pt; + uprobe_end_dup_mmap(); mm->hiwater_rss = get_mm_rss(mm); mm->hiwater_vm = mm->total_vm; @@ -1659,6 +1656,8 @@ free_pt: mm->binfmt = NULL; mm_init_owner(mm, NULL); mmput(mm); + if (err) + uprobe_end_dup_mmap(); fail_nomem: return NULL; From 646380b087a5203408877933afdaa2b480c2bc57 Mon Sep 17 00:00:00 2001 From: Selvarasu Ganesan Date: Sat, 18 Jan 2025 11:31:33 +0530 Subject: [PATCH 17/44] UPSTREAM: usb: gadget: f_midi: Fixing wMaxPacketSize exceeded issue during MIDI bind retries The current implementation sets the wMaxPacketSize of bulk in/out endpoints to 1024 bytes at the end of the f_midi_bind function. However, in cases where there is a failure in the first midi bind attempt, consider rebinding. This scenario may encounter an f_midi_bind issue due to the previous bind setting the bulk endpoint's wMaxPacketSize to 1024 bytes, which exceeds the ep->maxpacket_limit where configured dwc3 TX/RX FIFO's maxpacket size of 512 bytes for IN/OUT endpoints in support HS speed only. Here the term "rebind" in this context refers to attempting to bind the MIDI function a second time in certain scenarios. The situations where rebinding is considered include: * When there is a failure in the first UDC write attempt, which may be caused by other functions bind along with MIDI. * Runtime composition change : Example : MIDI,ADB to MIDI. Or MIDI to MIDI,ADB. This commit addresses this issue by resetting the wMaxPacketSize before endpoint claim. And here there is no need to reset all values in the usb endpoint descriptor structure, as all members except wMaxPacketSize and bEndpointAddress have predefined values. This ensures that restores the endpoint to its expected configuration, and preventing conflicts with value of ep->maxpacket_limit. It also aligns with the approach used in other function drivers, which treat endpoint descriptors as if they were full speed before endpoint claim. Bug: 254441685 Fixes: 46decc82ffd5 ("usb: gadget: unconditionally allocate hs/ss descriptor in bind operation") Cc: stable@vger.kernel.org Signed-off-by: Selvarasu Ganesan Link: https://lore.kernel.org/r/20250118060134.927-1-selvarasu.g@samsung.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9e8b21410f310c50733f6e1730bae5a8e30d3570) Signed-off-by: Lee Jones Change-Id: I300e3f5aa42555faf1e3c97b716396a6f8c77770 --- drivers/usb/gadget/function/f_midi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c index 9c8d56a496c9..5f022db4fa71 100644 --- a/drivers/usb/gadget/function/f_midi.c +++ b/drivers/usb/gadget/function/f_midi.c @@ -906,6 +906,15 @@ static int f_midi_bind(struct usb_configuration *c, struct usb_function *f) status = -ENODEV; + /* + * Reset wMaxPacketSize with maximum packet size of FS bulk transfer before + * endpoint claim. This ensures that the wMaxPacketSize does not exceed the + * limit during bind retries where configured dwc3 TX/RX FIFO's maxpacket + * size of 512 bytes for IN/OUT endpoints in support HS speed only. + */ + bulk_in_desc.wMaxPacketSize = cpu_to_le16(64); + bulk_out_desc.wMaxPacketSize = cpu_to_le16(64); + /* allocate instance-specific endpoints */ midi->in_ep = usb_ep_autoconfig(cdev->gadget, &bulk_in_desc); if (!midi->in_ep) From 53b26534cce792bd8a1040c0dcddd31a34194ab0 Mon Sep 17 00:00:00 2001 From: Norihiko Hama Date: Wed, 15 May 2024 09:43:39 +0900 Subject: [PATCH 18/44] UPSTREAM: usb-storage: Optimize scan delay more precisely Current storage scan delay is reduced by the following old commit. a4a47bc03fe5 ("Lower USB storage settling delay to something more reasonable") It means that delay is at least 'one second', or zero with delay_use=0. 'one second' is still long delay especially for embedded system but when delay_use is set to 0 (no delay), still error observed on some USB drives. So delay_use should not be set to 0 but 'one second' is quite long. Especially for embedded system, it's important for end user how quickly access to USB drive when it's connected. That's why we have a chance to minimize such a constant long delay. This patch optimizes scan delay more precisely to minimize delay time but not to have any problems on USB drives by extending module parameter 'delay_use' in milliseconds internally. The parameter 'delay_use' optionally supports in milliseconds if it ends with 'ms'. It makes the range of value to 1 / 1000 in internal 32-bit value but it's still enough to set the delay time. By default, delay time is 'one second' for backward compatibility. For example, it seems to be good by changing delay_use=100ms, that is 100 millisecond delay without issues for most USB pen drives. Bug: 408977963 Change-Id: I77521bc01a7dadaa5bb94aecd361f2507892928c (cherry picked from commit 804da867ad016d53bf33373cfeaae041775455f1) Signed-off-by: Norihiko Hama Link: https://lore.kernel.org/r/20240515004339.29892-1-Norihiko.Hama@alpsalpine.com Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 3 + drivers/usb/storage/usb.c | 101 +++++++++++++++++- 2 files changed, 100 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index e969f2f14cd1..6593773a512d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6690,6 +6690,9 @@ usb-storage.delay_use= [UMS] The delay in seconds before a new device is scanned for Logical Units (default 1). + Optionally the delay in milliseconds if the value has + suffix with "ms". + Example: delay_use=2567ms usb-storage.quirks= [UMS] A list of quirks entries to supplement or diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index ed7c6ad96a74..dc4e3983f7c5 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -67,9 +67,102 @@ MODULE_AUTHOR("Matthew Dharm "); MODULE_DESCRIPTION("USB Mass Storage driver for Linux"); MODULE_LICENSE("GPL"); -static unsigned int delay_use = 1; -module_param(delay_use, uint, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(delay_use, "seconds to delay before using a new device"); +static unsigned int delay_use = 1 * MSEC_PER_SEC; + +/** + * parse_delay_str - parse an unsigned decimal integer delay + * @str: String to parse. + * @ndecimals: Number of decimal to scale up. + * @suffix: Suffix string to parse. + * @val: Where to store the parsed value. + * + * Parse an unsigned decimal value in @str, optionally end with @suffix. + * Stores the parsed value in @val just as it is if @str ends with @suffix. + * Otherwise store the value scale up by 10^(@ndecimal). + * + * Returns 0 on success, a negative error code otherwise. + */ +static int parse_delay_str(const char *str, int ndecimals, const char *suffix, + unsigned int *val) +{ + int n, n2, l; + char buf[16]; + + l = strlen(suffix); + n = strlen(str); + if (n > 0 && str[n - 1] == '\n') + --n; + if (n >= l && !strncmp(&str[n - l], suffix, l)) { + n -= l; + n2 = 0; + } else + n2 = ndecimals; + + if (n + n2 > sizeof(buf) - 1) + return -EINVAL; + + memcpy(buf, str, n); + while (n2-- > 0) + buf[n++] = '0'; + buf[n] = 0; + + return kstrtouint(buf, 10, val); +} + +/** + * format_delay_ms - format an integer value into a delay string + * @val: The integer value to format, scaled by 10^(@ndecimals). + * @ndecimals: Number of decimal to scale down. + * @suffix: Suffix string to format. + * @str: Where to store the formatted string. + * @size: The size of buffer for @str. + * + * Format an integer value in @val scale down by 10^(@ndecimals) without @suffix + * if @val is divisible by 10^(@ndecimals). + * Otherwise format a value in @val just as it is with @suffix + * + * Returns the number of characters written into @str. + */ +static int format_delay_ms(unsigned int val, int ndecimals, const char *suffix, + char *str, int size) +{ + u64 delay_ms = val; + unsigned int rem = do_div(delay_ms, int_pow(10, ndecimals)); + int ret; + + if (rem) + ret = scnprintf(str, size, "%u%s\n", val, suffix); + else + ret = scnprintf(str, size, "%u\n", (unsigned int)delay_ms); + return ret; +} + +static int delay_use_set(const char *s, const struct kernel_param *kp) +{ + unsigned int delay_ms; + int ret; + + ret = parse_delay_str(skip_spaces(s), 3, "ms", &delay_ms); + if (ret < 0) + return ret; + + *((unsigned int *)kp->arg) = delay_ms; + return 0; +} + +static int delay_use_get(char *s, const struct kernel_param *kp) +{ + unsigned int delay_ms = *((unsigned int *)kp->arg); + + return format_delay_ms(delay_ms, 3, "ms", s, PAGE_SIZE); +} + +static const struct kernel_param_ops delay_use_ops = { + .set = delay_use_set, + .get = delay_use_get, +}; +module_param_cb(delay_use, &delay_use_ops, &delay_use, 0644); +MODULE_PARM_DESC(delay_use, "time to delay before using a new device"); static char quirks[128]; module_param_string(quirks, quirks, sizeof(quirks), S_IRUGO | S_IWUSR); @@ -1066,7 +1159,7 @@ int usb_stor_probe2(struct us_data *us) if (delay_use > 0) dev_dbg(dev, "waiting for device to settle before scanning\n"); queue_delayed_work(system_freezable_wq, &us->scan_dwork, - delay_use * HZ); + msecs_to_jiffies(delay_use)); return 0; /* We come here if there are any problems */ From 3cd01bb5bd3786138b0613b342c8e7ea32d92851 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 16 Apr 2025 14:24:39 -0600 Subject: [PATCH 19/44] UPSTREAM: mm: Fix is_zero_page() usage in try_grab_page() The backport of upstream commit c8070b787519 ("mm: Don't pin ZERO_PAGE in pin_user_pages()") into v6.1.130 noted below in Fixes does not account for commit 0f0892356fa1 ("mm: allow multiple error returns in try_grab_page()"), which changed the return value of try_grab_page() from bool to int. Therefore returning 0, success in the upstream version, becomes an error here. Fix the return value. Bug: 411256892 Fixes: 476c1dfefab8 ("mm: Don't pin ZERO_PAGE in pin_user_pages()") Link: https://lore.kernel.org/all/Z_6uhLQjJ7SSzI13@eldamar.lan Reported-by: Salvatore Bonaccorso Reported-by: Milan Broz Reviewed-by: David Hildenbrand Cc: stable@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Cc: Sasha Levin Change-Id: I1a124e70161e48c9b3374aa3fc541c6a13ea1ff0 Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman (cherry picked from commit edde34b792edb58a65cf16971cf34b5619c0959a) Signed-off-by: Will McVicker --- mm/gup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/gup.c b/mm/gup.c index b1daaa9d89aa..76a2b0943e2d 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -232,7 +232,7 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags) * and it is used in a *lot* of places. */ if (is_zero_page(page)) - return 0; + return true; /* * Similar to try_grab_folio(): be sure to *also* From 218e2bd245875f4ad97f836cabf5970c044eee0f Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Thu, 8 May 2025 16:26:42 -0700 Subject: [PATCH 20/44] FROMGIT: perf/aux: Allocate non-contiguous AUX pages by default perf always allocates contiguous AUX pages based on aux_watermark. However, this contiguous allocation doesn't benefit all PMUs. For instance, ARM SPE and TRBE operate with virtual pages, and Coresight ETR allocates a separate buffer. For these PMUs, allocating contiguous AUX pages unnecessarily exacerbates memory fragmentation. This fragmentation can prevent their use on long-running devices. This patch modifies the perf driver to be memory-friendly by default, by allocating non-contiguous AUX pages. For PMUs requiring contiguous pages (Intel BTS and some Intel PT), the existing PERF_PMU_CAP_AUX_NO_SG capability can be used. For PMUs that don't require but can benefit from contiguous pages (some Intel PT), a new capability, PERF_PMU_CAP_AUX_PREFER_LARGE, is added to maintain their existing behavior. Bug: 393467632 (cherry picked from commit 18049c8cff9cc89daadc4df6975f7d9069638926 git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core) Change-Id: Iaff554201726bf271c7625a6df59fb35c6cfbc5d Signed-off-by: Yabin Cui Signed-off-by: Ingo Molnar Reviewed-by: James Clark Reviewed-by: Anshuman Khandual Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20250508232642.148767-1-yabinc@google.com --- arch/x86/events/intel/pt.c | 2 ++ include/linux/perf_event.h | 1 + kernel/events/ring_buffer.c | 29 ++++++++++++++++++++--------- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 7ee8dc80a359..5ce05f559c64 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1793,6 +1793,8 @@ static __init int pt_init(void) if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG; + else + pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_PREFER_LARGE; pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE; pt_pmu.pmu.attr_groups = pt_attr_groups; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 92d866352f35..6ef9152c8348 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -286,6 +286,7 @@ struct perf_event; #define PERF_PMU_CAP_NO_EXCLUDE 0x0080 #define PERF_PMU_CAP_AUX_OUTPUT 0x0100 #define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0200 +#define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400 struct perf_output_handle; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 644dfed04926..b9c010a0e0fe 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -672,15 +672,23 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event, { bool overwrite = !(flags & RING_BUFFER_WRITABLE); int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu); - int ret = -ENOMEM, max_order; + bool use_contiguous_pages = event->pmu->capabilities & ( + PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_PREFER_LARGE); + /* + * Initialize max_order to 0 for page allocation. This allocates single + * pages to minimize memory fragmentation. This is overridden if the + * PMU needs or prefers contiguous pages (use_contiguous_pages = true). + */ + int max_order = 0; + int ret = -ENOMEM; if (!has_aux(event)) return -EOPNOTSUPP; if (!overwrite) { /* - * Watermark defaults to half the buffer, and so does the - * max_order, to aid PMU drivers in double buffering. + * Watermark defaults to half the buffer, to aid PMU drivers + * in double buffering. */ if (!watermark) watermark = min_t(unsigned long, @@ -688,16 +696,19 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event, (unsigned long)nr_pages << (PAGE_SHIFT - 1)); /* - * Use aux_watermark as the basis for chunking to - * help PMU drivers honor the watermark. + * If using contiguous pages, use aux_watermark as the basis + * for chunking to help PMU drivers honor the watermark. */ - max_order = get_order(watermark); + if (use_contiguous_pages) + max_order = get_order(watermark); } else { /* - * We need to start with the max_order that fits in nr_pages, - * not the other way around, hence ilog2() and not get_order. + * If using contiguous pages, we need to start with the + * max_order that fits in nr_pages, not the other way around, + * hence ilog2() and not get_order. */ - max_order = ilog2(nr_pages); + if (use_contiguous_pages) + max_order = ilog2(nr_pages); watermark = 0; } From a0fa2316cce1bc23e788603d7a6d0a3db7b100d3 Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Thu, 15 May 2025 14:40:02 -0700 Subject: [PATCH 21/44] ANDROID: ABI: Update pixel symbol list Adding the following symbols: - irq_check_status_bit - irq_get_percpu_devid_partition - irq_work_run - perf_aux_output_skip - this_cpu_has_cap Bug: 393467632 Change-Id: I8e9f34b6b40ec078586d175efb835a6898cbc4f1 Signed-off-by: Yabin Cui --- android/abi_gki_aarch64.stg | 42 +++++++++++++++++++++++++++++++++++ android/abi_gki_aarch64_pixel | 5 +++++ 2 files changed, 47 insertions(+) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index fdead9fe8009..97585255cc27 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -313053,6 +313053,12 @@ function { return_type_id: 0x3e10b518 parameter_id: 0x6720d32f } +function { + id: 0x8c02526b + return_type_id: 0x6720d32f + parameter_id: 0x4585663f + parameter_id: 0x38fa32ef +} function { id: 0x8c19f874 return_type_id: 0x6720d32f @@ -337217,6 +337223,12 @@ function { parameter_id: 0x0d30b9c3 parameter_id: 0x15a30023 } +function { + id: 0x9fe48d4f + return_type_id: 0x6720d32f + parameter_id: 0x0aa6efc8 + parameter_id: 0x33756485 +} function { id: 0x9fe6297b return_type_id: 0x6720d32f @@ -383195,6 +383207,15 @@ elf_symbol { type_id: 0x8d53ba62 full_name: "irq_get_irqchip_state" } +elf_symbol { + id: 0xe9124d83 + name: "irq_get_percpu_devid_partition" + is_defined: true + symbol_type: FUNCTION + crc: 0x6cfe3b6f + type_id: 0x8c02526b + full_name: "irq_get_percpu_devid_partition" +} elf_symbol { id: 0x28fabc56 name: "irq_modify_status" @@ -393203,6 +393224,15 @@ elf_symbol { type_id: 0x107966af full_name: "perf_aux_output_flag" } +elf_symbol { + id: 0x4e76f69f + name: "perf_aux_output_skip" + is_defined: true + symbol_type: FUNCTION + crc: 0x5a8de386 + type_id: 0x9fe48d4f + full_name: "perf_aux_output_skip" +} elf_symbol { id: 0x84bf9f80 name: "perf_event_addr_filters_sync" @@ -408543,6 +408573,15 @@ elf_symbol { type_id: 0x95a406a6 full_name: "thermal_zone_unbind_cooling_device" } +elf_symbol { + id: 0xc20a8a50 + name: "this_cpu_has_cap" + is_defined: true + symbol_type: FUNCTION + crc: 0x46e67a71 + type_id: 0xefc5028b + full_name: "this_cpu_has_cap" +} elf_symbol { id: 0x68e98442 name: "thread_group_cputime_adjusted" @@ -423062,6 +423101,7 @@ interface { symbol_id: 0x2ed6bfeb symbol_id: 0xa9c80d6c symbol_id: 0xb1d265b1 + symbol_id: 0xe9124d83 symbol_id: 0x28fabc56 symbol_id: 0x68e07680 symbol_id: 0xcd991820 @@ -424174,6 +424214,7 @@ interface { symbol_id: 0xf9e83d36 symbol_id: 0x80bebca0 symbol_id: 0xe207c73b + symbol_id: 0x4e76f69f symbol_id: 0x84bf9f80 symbol_id: 0xea4938d9 symbol_id: 0xdc02a166 @@ -425879,6 +425920,7 @@ interface { symbol_id: 0x66782435 symbol_id: 0x793a755b symbol_id: 0x61ab1273 + symbol_id: 0xc20a8a50 symbol_id: 0x68e98442 symbol_id: 0x1cf36c3c symbol_id: 0x9fc8421c diff --git a/android/abi_gki_aarch64_pixel b/android/abi_gki_aarch64_pixel index 8c48602bccf2..5b8c7b54bde4 100644 --- a/android/abi_gki_aarch64_pixel +++ b/android/abi_gki_aarch64_pixel @@ -1198,6 +1198,7 @@ __ipv6_addr_type __irq_alloc_descs __irq_apply_affinity_hint + irq_check_status_bit irq_create_mapping_affinity irq_create_of_mapping __irq_domain_add @@ -1208,6 +1209,7 @@ irq_domain_xlate_twocell irq_force_affinity irq_get_irq_data + irq_get_percpu_devid_partition irq_modify_status irq_of_parse_and_map __irq_resolve_mapping @@ -1219,6 +1221,7 @@ irq_set_irq_wake irq_to_desc irq_work_queue + irq_work_run irq_work_sync is_vmalloc_addr jiffies @@ -1651,6 +1654,7 @@ perf_aux_output_begin perf_aux_output_end perf_aux_output_flag + perf_aux_output_skip perf_event_addr_filters_sync perf_event_create_kernel_counter perf_event_disable @@ -2337,6 +2341,7 @@ thermal_zone_device_update thermal_zone_get_temp thermal_zone_get_zone_by_name + this_cpu_has_cap thread_group_cputime_adjusted tick_nohz_get_idle_calls_cpu time64_to_tm From 3e7cb920f1676ddf15f979d5b32e111384f7b46c Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Mon, 7 Apr 2025 13:24:07 -0700 Subject: [PATCH 22/44] UPSTREAM: net_sched: sch_sfq: use a temporary work area for validating configuration [ Upstream commit 8c0cea59d40cf6dd13c2950437631dd614fbade6 ] Many configuration parameters have influence on others (e.g. divisor -> flows -> limit, depth -> limit) and so it is difficult to correctly do all of the validation before applying the configuration. And if a validation error is detected late it is difficult to roll back a partially applied configuration. To avoid these issues use a temporary work area to update and validate the configuration and only then apply the configuration to the internal state. Bug: 413623519 Signed-off-by: Octavian Purdila Acked-by: Cong Wang Signed-off-by: David S. Miller Stable-dep-of: b3bf8f63e617 ("net_sched: sch_sfq: move the limit validation") Signed-off-by: Sasha Levin (cherry picked from commit 70449ca40609ec77f58b93ed154d54e1fdb197b6) Signed-off-by: Lee Jones Change-Id: Icab9dc62eddd23f6a2c5d06dd1f8457294716fb8 --- net/sched/sch_sfq.c | 56 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 60754f366ab7..68e909e8fabd 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -631,6 +631,15 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, struct red_parms *p = NULL; struct sk_buff *to_free = NULL; struct sk_buff *tail = NULL; + unsigned int maxflows; + unsigned int quantum; + unsigned int divisor; + int perturb_period; + u8 headdrop; + u8 maxdepth; + int limit; + u8 flags; + if (opt->nla_len < nla_attr_size(sizeof(*ctl))) return -EINVAL; @@ -656,36 +665,59 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, NL_SET_ERR_MSG_MOD(extack, "invalid limit"); return -EINVAL; } + sch_tree_lock(sch); + + limit = q->limit; + divisor = q->divisor; + headdrop = q->headdrop; + maxdepth = q->maxdepth; + maxflows = q->maxflows; + perturb_period = q->perturb_period; + quantum = q->quantum; + flags = q->flags; + + /* update and validate configuration */ if (ctl->quantum) - q->quantum = ctl->quantum; - WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ); + quantum = ctl->quantum; + perturb_period = ctl->perturb_period * HZ; if (ctl->flows) - q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); + maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); if (ctl->divisor) { - q->divisor = ctl->divisor; - q->maxflows = min_t(u32, q->maxflows, q->divisor); + divisor = ctl->divisor; + maxflows = min_t(u32, maxflows, divisor); } if (ctl_v1) { if (ctl_v1->depth) - q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); + maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); if (p) { - swap(q->red_parms, p); - red_set_parms(q->red_parms, + red_set_parms(p, ctl_v1->qth_min, ctl_v1->qth_max, ctl_v1->Wlog, ctl_v1->Plog, ctl_v1->Scell_log, NULL, ctl_v1->max_P); } - q->flags = ctl_v1->flags; - q->headdrop = ctl_v1->headdrop; + flags = ctl_v1->flags; + headdrop = ctl_v1->headdrop; } if (ctl->limit) { - q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows); - q->maxflows = min_t(u32, q->maxflows, q->limit); + limit = min_t(u32, ctl->limit, maxdepth * maxflows); + maxflows = min_t(u32, maxflows, limit); } + /* commit configuration */ + q->limit = limit; + q->divisor = divisor; + q->headdrop = headdrop; + q->maxdepth = maxdepth; + q->maxflows = maxflows; + WRITE_ONCE(q->perturb_period, perturb_period); + q->quantum = quantum; + q->flags = flags; + if (p) + swap(q->red_parms, p); + qlen = sch->q.qlen; while (sch->q.qlen > q->limit) { dropped += sfq_drop(sch, &to_free); From 228e0f23bdeb774233bafc0383084518ec6f2ad1 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Mon, 7 Apr 2025 13:24:08 -0700 Subject: [PATCH 23/44] UPSTREAM: net_sched: sch_sfq: move the limit validation [ Upstream commit b3bf8f63e6179076b57c9de660c9f80b5abefe70 ] It is not sufficient to directly validate the limit on the data that the user passes as it can be updated based on how the other parameters are changed. Move the check at the end of the configuration update process to also catch scenarios where the limit is indirectly updated, for example with the following configurations: tc qdisc add dev dummy0 handle 1: root sfq limit 2 flows 1 depth 1 tc qdisc add dev dummy0 handle 1: root sfq limit 2 flows 1 divisor 1 This fixes the following syzkaller reported crash: ------------[ cut here ]------------ UBSAN: array-index-out-of-bounds in net/sched/sch_sfq.c:203:6 index 65535 is out of range for type 'struct sfq_head[128]' CPU: 1 UID: 0 PID: 3037 Comm: syz.2.16 Not tainted 6.14.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 12/27/2024 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x201/0x300 lib/dump_stack.c:120 ubsan_epilogue lib/ubsan.c:231 [inline] __ubsan_handle_out_of_bounds+0xf5/0x120 lib/ubsan.c:429 sfq_link net/sched/sch_sfq.c:203 [inline] sfq_dec+0x53c/0x610 net/sched/sch_sfq.c:231 sfq_dequeue+0x34e/0x8c0 net/sched/sch_sfq.c:493 sfq_reset+0x17/0x60 net/sched/sch_sfq.c:518 qdisc_reset+0x12e/0x600 net/sched/sch_generic.c:1035 tbf_reset+0x41/0x110 net/sched/sch_tbf.c:339 qdisc_reset+0x12e/0x600 net/sched/sch_generic.c:1035 dev_reset_queue+0x100/0x1b0 net/sched/sch_generic.c:1311 netdev_for_each_tx_queue include/linux/netdevice.h:2590 [inline] dev_deactivate_many+0x7e5/0xe70 net/sched/sch_generic.c:1375 Bug: 413623519 Reported-by: syzbot Fixes: 10685681bafc ("net_sched: sch_sfq: don't allow 1 packet limit") Signed-off-by: Octavian Purdila Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (cherry picked from commit f86293adce0c201cfabb283ef9d6f21292089bb8) Signed-off-by: Lee Jones Change-Id: Ie5fc222b52c59eaa1070cc03402f8a624af60cd9 --- net/sched/sch_sfq.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 68e909e8fabd..002941d35b64 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -661,10 +661,6 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, if (!p) return -ENOMEM; } - if (ctl->limit == 1) { - NL_SET_ERR_MSG_MOD(extack, "invalid limit"); - return -EINVAL; - } sch_tree_lock(sch); @@ -705,6 +701,12 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, limit = min_t(u32, ctl->limit, maxdepth * maxflows); maxflows = min_t(u32, maxflows, limit); } + if (limit == 1) { + sch_tree_unlock(sch); + kfree(p); + NL_SET_ERR_MSG_MOD(extack, "invalid limit"); + return -EINVAL; + } /* commit configuration */ q->limit = limit; From e30317e116ff3009a12abaa710008ff88f168188 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Mon, 19 May 2025 11:10:04 -0700 Subject: [PATCH 24/44] ANDROID: 16K: Remove ELF padding entry from map_file ranges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symbolization techniques use address ranges as reported in /proc/*/maps to infer the corresponding /proc/*/map_files/ entry. Per Daniel, this is done because the path in /proc/*/maps is problematic for at least two reasons: 1. The file could have been deleted from the file system (this is indicated with the (deleted) suffix), meaning that you can't actually open it through the "regular" file system. However, while the mapping is alive, the kernel keeps the inode accessible via the corresponding /proc/*/map_files entry, allowing for access after all. 2. It makes dealing with changed root and file system namespaces much more painful. The /proc/*/maps path is relative, and so now you need to concatenate paths etc. Accessing file through /proc/*/map_files just works (assuming necessary permissions), as the kernel redirects the request to the proper inode, irrespective of how it is exposed through the non-proc filesystem. Android extends ELF padding regions to be contiguously mapped in memory to mitigate increase in unreclaimable VMA slab memory usage. Commit 8c2a805a857914324b077708b45c31c2f20d02da [1] emulates the padding region of such extended mappings to be outputted as PROT_NONE [page size compat] entries from /proc/*/[s]maps. This breaks the use case of /proc/*/maps_files/, as the ranges in /proc/*/map_files/ are the true ranges of the actual underlying VMA layout; while those in /proc/*/[s]maps are the emulated (shortened) ranges. Remove the padding (extended) ranges from /proc/*/maps_files entries. ====== Example Output ====== === maps === ❯ adb shell cat /proc/1/maps | grep -A1 libdl_android.so | sed '$d' 7f76663df000-7f76663e0000 r--p 00000000 fe:09 1911 /system/lib64/bootstrap/libdl_android.so 7f76663e0000-7f76663e3000 ---p 00000000 00:00 0 [page size compat] 7f76663e3000-7f76663e4000 r-xp 00004000 fe:09 1911 /system/lib64/bootstrap/libdl_android.so 7f76663e4000-7f76663e7000 ---p 00000000 00:00 0 [page size compat] 7f76663e7000-7f76663e8000 r--p 00008000 fe:09 1911 /system/lib64/bootstrap/libdl_android.s === map_files - Before patch === ❯ adb shell ls /proc/1/map_files | grep -A2 7f76663df000 7f76663df000-7f76663e3000 7f76663e3000-7f76663e7000 7f76663e7000-7f76663e8000 === map_files - After patch === ❯ adb shell ls /proc/1/map_files | grep -A2 7f76663df000 7f76663df000-7f76663e0000 7f76663e3000-7f76663e4000 7f76663e7000-7f76663e8000 [1] https://android.googlesource.com/kernel/common/+/8c2a805a857914324b077708b45c31c2f20d02da Bug: 418042003 Change-Id: I0f6d703715a0e709fa1d4bd52241b5fd913dd55e Reported-by: Daniel Müller Signed-off-by: Kalesh Singh --- fs/proc/base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 77b3b1efe43d..bf14f94df793 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include @@ -2476,7 +2477,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) } p->start = vma->vm_start; - p->end = vma->vm_end; + p->end = VMA_PAD_START(vma); p->mode = vma->vm_file->f_mode; } mmap_read_unlock(mm); From ad7902a401f68e107b74c3543650798f454740b2 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Fri, 9 May 2025 10:09:12 +1200 Subject: [PATCH 25/44] BACKPORT: mm: userfaultfd: correct dirty flags set for both present and swap pte MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As David pointed out, what truly matters for mremap and userfaultfd move operations is the soft dirty bit. The current comment and implementation—which always sets the dirty bit for present PTEs and fails to set the soft dirty bit for swap PTEs—are incorrect. This could break features like Checkpoint-Restore in Userspace (CRIU). This patch updates the behavior to correctly set the soft dirty bit for both present and swap PTEs in accordance with mremap. Link: https://lkml.kernel.org/r/20250508220912.7275-1-21cnbao@gmail.com Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Signed-off-by: Barry Song Reported-by: David Hildenbrand Closes: https://lore.kernel.org/linux-mm/02f14ee1-923f-47e3-a994-4950afb9afcc@redhat.com/ Acked-by: Peter Xu Reviewed-by: Suren Baghdasaryan Cc: Lokesh Gidra Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton (cherry picked from commit 75cb1cca2c880179a11c7dd9380b6f14e41a06a4) Merge Conflicts: 1. pte_mkwrite() doesn't take vma as second argument, so removed it. Change-Id: I5fc25f9028ad7972ea1b6d873f072fd15f9c7214 Signed-off-by: Lokesh Gidra --- mm/userfaultfd.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index b45edacc7436..468747538b41 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -966,8 +966,13 @@ static int move_present_pte(struct mm_struct *mm, WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr)); orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot); - /* Follow mremap() behavior and treat the entry dirty after the move */ - orig_dst_pte = pte_mkwrite(pte_mkdirty(orig_dst_pte)); + /* Set soft dirty bit so userspace can notice the pte was moved */ +#ifdef CONFIG_MEM_SOFT_DIRTY + orig_dst_pte = pte_mksoft_dirty(orig_dst_pte); +#endif + if (pte_dirty(orig_src_pte)) + orig_dst_pte = pte_mkdirty(orig_dst_pte); + orig_dst_pte = pte_mkwrite(orig_dst_pte); set_pte_at(mm, dst_addr, dst_pte, orig_dst_pte); out: @@ -1001,6 +1006,9 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma, } orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte); +#ifdef CONFIG_MEM_SOFT_DIRTY + orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte); +#endif set_pte_at(mm, dst_addr, dst_pte, orig_src_pte); double_pt_unlock(dst_ptl, src_ptl); From 58b3f63bc69f12b923960b99683f6af8527a82f7 Mon Sep 17 00:00:00 2001 From: Marcus Ma Date: Mon, 19 May 2025 21:04:39 +0800 Subject: [PATCH 26/44] ANDROID: vendor_hooks: Add hooks for pcp related optimization. We want to make some optimizations to the pcp buffer. First, when directly recycling, we skip drain_all_pages when it is known that the pcp buffer is small to reduce zone->lock contention. In addition, the default pcp buffer size is still relatively small for mobile phones with large memory. We want to increase the pcp buffer area to reduce zone->lock contention. Bug: 418695654 Change-Id: I38c7a3715500918d839e4363bbcc41cdbf4bd643 Signed-off-by: Marcus Ma --- drivers/android/vendor_hooks.c | 2 ++ include/trace/hooks/mm.h | 8 ++++++++ mm/page_alloc.c | 7 ++++++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/android/vendor_hooks.c b/drivers/android/vendor_hooks.c index 77dd1eb1b1f3..84184cc4a83e 100644 --- a/drivers/android/vendor_hooks.c +++ b/drivers/android/vendor_hooks.c @@ -481,3 +481,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_filemap_map_pages_range); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_vprintk_store); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_folio_referenced_check_bypass); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_calculate_totalreserve_pages); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_drain_all_pages_bypass); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_pageset_update); diff --git a/include/trace/hooks/mm.h b/include/trace/hooks/mm.h index fdbdd8080370..0e0403625dbf 100644 --- a/include/trace/hooks/mm.h +++ b/include/trace/hooks/mm.h @@ -315,6 +315,14 @@ DECLARE_HOOK(android_vh_filemap_map_pages_range, DECLARE_HOOK(android_vh_calculate_totalreserve_pages, TP_PROTO(bool *skip), TP_ARGS(skip)); +DECLARE_HOOK(android_vh_drain_all_pages_bypass, + TP_PROTO(gfp_t gfp_mask, unsigned int order, unsigned long alloc_flags, + int migratetype, unsigned long did_some_progress, + bool *bypass), + TP_ARGS(gfp_mask, order, alloc_flags, migratetype, did_some_progress, bypass)); +DECLARE_HOOK(android_vh_pageset_update, + TP_PROTO(unsigned long *high, unsigned long *batch), + TP_ARGS(high, batch)); #endif /* _TRACE_HOOK_MM_H */ /* This part must be outside protection */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ffc6abc47849..a1605834867e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5026,6 +5026,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, struct page *page = NULL; unsigned long pflags; bool drained = false; + bool skip_pcp_drain = false; trace_android_vh_mm_alloc_pages_direct_reclaim_enter(order); psi_memstall_enter(&pflags); @@ -5043,7 +5044,10 @@ retry: */ if (!page && !drained) { unreserve_highatomic_pageblock(ac, false); - drain_all_pages(NULL); + trace_android_vh_drain_all_pages_bypass(gfp_mask, order, + alloc_flags, ac->migratetype, *did_some_progress, &skip_pcp_drain); + if (!skip_pcp_drain) + drain_all_pages(NULL); drained = true; ++retry_times; goto retry; @@ -7475,6 +7479,7 @@ static int zone_highsize(struct zone *zone, int batch, int cpu_online) static void pageset_update(struct per_cpu_pages *pcp, unsigned long high, unsigned long batch) { + trace_android_vh_pageset_update(&high, &batch); WRITE_ONCE(pcp->batch, batch); WRITE_ONCE(pcp->high, high); } From b07be5e5113bbf7168faf3876d3fff08fa4789ed Mon Sep 17 00:00:00 2001 From: Marcus Ma Date: Mon, 19 May 2025 22:34:49 +0800 Subject: [PATCH 27/44] ANDROID: GKI: update symbol list file for xiaomi add 2 function: trace_android_vh_drain_all_pages_bypass() trace_android_vh_pageset_update() Bug: 418695654 Change-Id: Id1bbb269b7650528dcb2dfac29e7a611154954b3 Signed-off-by: Marcus Ma --- android/abi_gki_aarch64.stg | 51 +++++++++++++++++++++++ android/abi_gki_aarch64_xiaomi | 5 +++ android/abi_gki_protected_exports_aarch64 | 2 +- 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 97585255cc27..24898f7746c4 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -322851,6 +322851,17 @@ function { parameter_id: 0x4585663f parameter_id: 0x33756485 } +function { + id: 0x98789491 + return_type_id: 0x6720d32f + parameter_id: 0x18bd6530 + parameter_id: 0xf1a6dfed + parameter_id: 0x4585663f + parameter_id: 0x33756485 + parameter_id: 0x6720d32f + parameter_id: 0x33756485 + parameter_id: 0x11cfee5a +} function { id: 0x98792c3d return_type_id: 0x3e10b518 @@ -348507,6 +348518,15 @@ elf_symbol { type_id: 0x9b2a7922 full_name: "__traceiter_android_vh_do_wp_page" } +elf_symbol { + id: 0xf80eb64b + name: "__traceiter_android_vh_drain_all_pages_bypass" + is_defined: true + symbol_type: FUNCTION + crc: 0xe9b2635d + type_id: 0x98789491 + full_name: "__traceiter_android_vh_drain_all_pages_bypass" +} elf_symbol { id: 0x42312ccc name: "__traceiter_android_vh_dump_throttled_rt_tasks" @@ -349470,6 +349490,15 @@ elf_symbol { type_id: 0x9b26096d full_name: "__traceiter_android_vh_page_should_be_protected" } +elf_symbol { + id: 0x1fc96009 + name: "__traceiter_android_vh_pageset_update" + is_defined: true + symbol_type: FUNCTION + crc: 0xf5c57c7a + type_id: 0x9bb71cb9 + full_name: "__traceiter_android_vh_pageset_update" +} elf_symbol { id: 0x13b0736e name: "__traceiter_android_vh_percpu_rwsem_down_read" @@ -353610,6 +353639,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_vh_do_wp_page" } +elf_symbol { + id: 0x8405c9a1 + name: "__tracepoint_android_vh_drain_all_pages_bypass" + is_defined: true + symbol_type: OBJECT + crc: 0x786ed430 + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_vh_drain_all_pages_bypass" +} elf_symbol { id: 0x988719fa name: "__tracepoint_android_vh_dump_throttled_rt_tasks" @@ -354573,6 +354611,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_vh_page_should_be_protected" } +elf_symbol { + id: 0x94cb1cab + name: "__tracepoint_android_vh_pageset_update" + is_defined: true + symbol_type: OBJECT + crc: 0x644520c9 + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_vh_pageset_update" +} elf_symbol { id: 0xa4c454d8 name: "__tracepoint_android_vh_percpu_rwsem_down_read" @@ -419247,6 +419294,7 @@ interface { symbol_id: 0x54bc5972 symbol_id: 0x9dbd7b92 symbol_id: 0x2576f1c7 + symbol_id: 0xf80eb64b symbol_id: 0x42312ccc symbol_id: 0xf432d1c9 symbol_id: 0x02c8f91b @@ -419354,6 +419402,7 @@ interface { symbol_id: 0xacaadcc9 symbol_id: 0x3246acbb symbol_id: 0xb4d5ffdc + symbol_id: 0x1fc96009 symbol_id: 0x13b0736e symbol_id: 0xc72f2012 symbol_id: 0xd14f3adb @@ -419814,6 +419863,7 @@ interface { symbol_id: 0xeb9f1c78 symbol_id: 0xe2d7542c symbol_id: 0x15374b6d + symbol_id: 0x8405c9a1 symbol_id: 0x988719fa symbol_id: 0x732a182b symbol_id: 0xe5deb919 @@ -419921,6 +419971,7 @@ interface { symbol_id: 0x20d2ceb3 symbol_id: 0x4a5e6e41 symbol_id: 0x352038ba + symbol_id: 0x94cb1cab symbol_id: 0xa4c454d8 symbol_id: 0x7d42b7c8 symbol_id: 0x3d63616d diff --git a/android/abi_gki_aarch64_xiaomi b/android/abi_gki_aarch64_xiaomi index da618470dc5f..2ebc76e2121d 100644 --- a/android/abi_gki_aarch64_xiaomi +++ b/android/abi_gki_aarch64_xiaomi @@ -524,3 +524,8 @@ __tracepoint_android_vh_filemap_map_pages_range #required by rtase.ko proc_get_parent_data netdev_stats_to_stats64 + +__traceiter_android_vh_drain_all_pages_bypass +__tracepoint_android_vh_drain_all_pages_bypass +__traceiter_android_vh_pageset_update +__tracepoint_android_vh_pageset_update diff --git a/android/abi_gki_protected_exports_aarch64 b/android/abi_gki_protected_exports_aarch64 index c9f44faef737..c2cc936669e5 100644 --- a/android/abi_gki_protected_exports_aarch64 +++ b/android/abi_gki_protected_exports_aarch64 @@ -351,4 +351,4 @@ wwan_port_txoff wwan_port_txon wwan_register_ops wwan_remove_port -wwan_unregister_ops +wwan_unregister_ops \ No newline at end of file From d653b32842d8960e6504293790ab29d25ed4fa88 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 12 May 2025 15:57:22 +0100 Subject: [PATCH 28/44] Revert "ANDROID: KVM: arm64: Use enum instead of helper for fp state" This reverts commit 26d24625b310b48b0d671075e02117624f7110d4, which didn't introduce any functional change. This is reverted because backported commits rely on the helpers that the commit has removed. Reverting it makes it easier and cleaner to apply the backports. No functional change intended. Bug: 411040189 Change-Id: Ie29ece274cfc970cf116f8781b841b9ac2c5aa56 Signed-off-by: Fuad Tabba --- arch/arm64/kvm/hyp/include/hyp/switch.h | 6 ++++++ arch/arm64/kvm/hyp/nvhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 89f7a56dac73..3999a372078a 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -37,6 +37,12 @@ struct kvm_exception_table_entry { extern struct kvm_exception_table_entry __start___kvm_ex_table; extern struct kvm_exception_table_entry __stop___kvm_ex_table; +/* Check whether the FP regs are owned by the guest */ +static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED; +} + /* Save the 32-bit only FPSIMD system register state */ static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 2363f862abc9..b96612fca21d 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -45,7 +45,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val = vcpu->arch.cptr_el2; val |= CPTR_EL2_TTA | CPTR_EL2_TAM; - if (vcpu->arch.fp_state != FP_STATE_GUEST_OWNED) { + if (!guest_owns_fp_regs(vcpu)) { val |= CPTR_EL2_TFP | CPTR_EL2_TZ; __activate_traps_fpsimd32(vcpu); } diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 91211f86fec2..747e4eeaab59 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -55,7 +55,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val |= CPTR_EL2_TAM; - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { + if (guest_owns_fp_regs(vcpu)) { if (vcpu_has_sve(vcpu)) val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; } else { From c3b505e78c572d582c22437ae10fb914662fffab Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 12 May 2025 16:17:40 +0100 Subject: [PATCH 29/44] ANDROID: KVM: arm64: Remove pkvm_set_max_sve_vq() This function doesn't encapsulate that much code, and removing it makes backporting SVE-fix patches easier and cleaner. No functional change intended. Bug: 411040189 Change-Id: I27b3fe467b1896a393751349b86771ddbb1bd62b Signed-off-by: Fuad Tabba --- arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 6 ------ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 3 ++- arch/arm64/kvm/hyp/nvhe/switch.c | 3 ++- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index 943cf7fc7124..00526b8863e8 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -149,12 +149,6 @@ static inline bool pkvm_ipa_range_has_pvmfw(struct pkvm_hyp_vm *vm, return ipa_end > pkvm->pvmfw_load_addr && ipa_start < pvmfw_load_end; } -static inline void pkvm_set_max_sve_vq(void) -{ - sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, - SYS_ZCR_EL2); -} - int pkvm_load_pvmfw_pages(struct pkvm_hyp_vm *vm, u64 ipa, phys_addr_t phys, u64 size); void pkvm_poison_pvmfw_pages(void); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 931152dc3aa8..ff993f0c8705 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -696,7 +696,8 @@ static void fpsimd_host_restore(void) struct kvm_host_sve_state *sve_state = get_host_sve_state(vcpu); write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR); - pkvm_set_max_sve_vq(); + sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, + SYS_ZCR_EL2); __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl), &sve_state->fpsr); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index b96612fca21d..e75794c40e39 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -201,7 +201,8 @@ static void kvm_hyp_handle_fpsimd_host(struct kvm_vcpu *vcpu) struct kvm_host_sve_state *sve_state = get_host_sve_state(vcpu); sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR); - pkvm_set_max_sve_vq(); + sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, + SYS_ZCR_EL2); __sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl), &sve_state->fpsr); From 1b3dfc7c3845207142f928885d4dda598f4eac02 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Wed, 7 May 2025 12:40:11 +0100 Subject: [PATCH 30/44] ANDROID: KVM: arm64: Move kvm_hyp_handle_fpsimd_host() to switch.h Move kvm_hyp_handle_fpsimd_host() to the shared switch header, instead of having separate implementations in the vhe/nvhe switch.c files. Subsequent patches will remove all specific implementations from switch.c and include switch.h in other files. Bug: 411040189 Change-Id: I07f1d92f96b072435ded5f0b84a446df4e6a81ab Signed-off-by: Fuad Tabba --- arch/arm64/include/asm/kvm_hyp.h | 3 +++ arch/arm64/kvm/hyp/include/hyp/switch.h | 26 ++++++++++++++++++++++++- arch/arm64/kvm/hyp/nvhe/switch.c | 25 ------------------------ arch/arm64/kvm/hyp/vhe/switch.c | 5 ----- 4 files changed, 28 insertions(+), 31 deletions(-) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 861049a6416f..5f16c6f800a6 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -121,6 +121,9 @@ void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); #ifdef __KVM_NVHE_HYPERVISOR__ struct user_fpsimd_state *get_host_fpsimd_state(struct kvm_vcpu *vcpu); struct kvm_host_sve_state *get_host_sve_state(struct kvm_vcpu *vcpu); +#else +#define get_host_fpsimd_state(vcpu) (vcpu)->arch.host_fpsimd_state +#define get_host_sve_state(vcpu) NULL #endif extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 3999a372078a..efcfd44e7012 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -167,7 +168,30 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR); } -static void kvm_hyp_handle_fpsimd_host(struct kvm_vcpu *vcpu); +static void kvm_hyp_handle_fpsimd_host(struct kvm_vcpu *vcpu) +{ + /* + * Non-protected kvm relies on the host restoring its sve state. + * Protected kvm restores the host's sve state as not to reveal that + * fpsimd was used by a guest nor leak upper sve bits. + */ + if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) { + struct kvm_host_sve_state *sve_state = get_host_sve_state(vcpu); + + sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR); + sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, + SYS_ZCR_EL2); + __sve_save_state(sve_state->sve_regs + + sve_ffr_offset(kvm_host_sve_max_vl), + &sve_state->fpsr); + + /* Still trap SVE since it's handled by hyp in pKVM. */ + if (!vcpu_has_sve(vcpu)) + sysreg_clear_set(cptr_el2, 0, CPTR_EL2_TZ); + } else { + __fpsimd_save_state(get_host_fpsimd_state(vcpu)); + } +} static void __deactivate_fpsimd_traps(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index e75794c40e39..8240ae1ebeb3 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -190,31 +190,6 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code) kvm_handle_pvm_sysreg(vcpu, exit_code)); } -static void kvm_hyp_handle_fpsimd_host(struct kvm_vcpu *vcpu) -{ - /* - * Non-protected kvm relies on the host restoring its sve state. - * Protected kvm restores the host's sve state as not to reveal that - * fpsimd was used by a guest nor leak upper sve bits. - */ - if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) { - struct kvm_host_sve_state *sve_state = get_host_sve_state(vcpu); - - sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR); - sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, - SYS_ZCR_EL2); - __sve_save_state(sve_state->sve_regs + - sve_ffr_offset(kvm_host_sve_max_vl), - &sve_state->fpsr); - - /* Still trap SVE since it's handled by hyp in pKVM. */ - if (!vcpu_has_sve(vcpu)) - sysreg_clear_set(cptr_el2, 0, CPTR_EL2_TZ); - } else { - __fpsimd_save_state(get_host_fpsimd_state(vcpu)); - } -} - static const exit_handler_fn hyp_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 747e4eeaab59..b9c8cd61ec7a 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -112,11 +112,6 @@ static void __deactivate_fpsimd_traps(struct kvm_vcpu *vcpu) sysreg_clear_set(cpacr_el1, 0, reg); } -static void kvm_hyp_handle_fpsimd_host(struct kvm_vcpu *vcpu) -{ - __fpsimd_save_state(vcpu->arch.host_fpsimd_state); -} - static const exit_handler_fn hyp_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, From d871a6444c978a769e7b97c71e5b1a1ad843eacc Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 1 May 2025 13:21:50 +0100 Subject: [PATCH 31/44] ANDROID: KVM: arm64: Move __deactivate_fpsimd_traps() to switch.h Move __deactivate_fpsimd_traps() to the shared switch header, instead of having separate implementations in the vhe/nvhe switch.c files. Subsequent patches will remove all specific implementations from switch.c and include switch.h in other files. Bug: 411040189 Change-Id: I42c545e939b230366fbd9ad8e41a614193169bce Signed-off-by: Fuad Tabba --- arch/arm64/kvm/hyp/include/hyp/switch.h | 22 +++++++++++++++++++++- arch/arm64/kvm/hyp/nvhe/switch.c | 12 ------------ arch/arm64/kvm/hyp/vhe/switch.c | 10 ---------- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index efcfd44e7012..087a3cef4116 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -193,7 +193,27 @@ static void kvm_hyp_handle_fpsimd_host(struct kvm_vcpu *vcpu) } } -static void __deactivate_fpsimd_traps(struct kvm_vcpu *vcpu); +static void __deactivate_fpsimd_traps(struct kvm_vcpu *vcpu) +{ + if (has_vhe()) { + u64 reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN; + + if (vcpu_has_sve(vcpu)) + reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; + + sysreg_clear_set(cpacr_el1, 0, reg); + + } else { + u64 reg = CPTR_EL2_TFP; + + if (vcpu_has_sve(vcpu) || + (is_protected_kvm_enabled() && system_supports_sve())) { + reg |= CPTR_EL2_TZ; + } + + sysreg_clear_set(cptr_el2, reg, 0); + } +} /* * We trap the first access to the FP/SIMD to save the host context and diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 8240ae1ebeb3..f1890454628d 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -108,18 +108,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(__kvm_hyp_host_vector, vbar_el2); } -static void __deactivate_fpsimd_traps(struct kvm_vcpu *vcpu) -{ - u64 reg = CPTR_EL2_TFP; - - if (vcpu_has_sve(vcpu) || - (is_protected_kvm_enabled() && system_supports_sve())) { - reg |= CPTR_EL2_TZ; - } - - sysreg_clear_set(cptr_el2, reg, 0); -} - /* Save VGICv3 state on non-VHE systems */ static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index b9c8cd61ec7a..45ac4a59cc2c 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -102,16 +102,6 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu) __deactivate_traps_common(vcpu); } -static void __deactivate_fpsimd_traps(struct kvm_vcpu *vcpu) -{ - u64 reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN; - - if (vcpu_has_sve(vcpu)) - reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; - - sysreg_clear_set(cpacr_el1, 0, reg); -} - static const exit_handler_fn hyp_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, From 21c687a8c532784d133469ba9eb5491271b1bd04 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 1 May 2025 10:21:31 +0100 Subject: [PATCH 32/44] ANDROID: KVM: arm64: Eagerly restore host FPSIMD/SVE state in pKVM Eagerly restore the host fpsimd/sve state after every vcpu run in protected mode if the fpsimd/sve unit was used by the guest, instead of setting fpsimd/simd traps and restoring if the host triggers them. Note that the behavior with this patch is the existing behavior in Android 16 (except for restoring ZCL_EL2, which is being fixed in conjunction with this patch there as well). Bug: 411040189 Change-Id: I5702590331093937c1cd0d08ac754c634054c7f7 Signed-off-by: Fuad Tabba --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 100 +++++++++++------------------ 1 file changed, 38 insertions(+), 62 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index ff993f0c8705..09bd468dfadb 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -34,6 +34,8 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); +static void fpsimd_host_restore(struct kvm_vcpu *vcpu); + static bool (*default_host_smc_handler)(struct kvm_cpu_context *host_ctxt); static bool (*default_trap_handler)(struct kvm_cpu_context *host_ctxt); @@ -580,6 +582,8 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) hyp_entry_exit_handler_fn ec_handler; u8 esr_ec; + hyp_vcpu->vcpu.arch.fp_state = FP_STATE_HOST_OWNED; + /* * If we deal with a non-protected guest and the state is potentially * dirty (from a host perspective), copy the state back into the hyp @@ -666,50 +670,48 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, u32 exit_reason) else host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags; + if (hyp_vcpu->vcpu.arch.fp_state != FP_STATE_HOST_OWNED) + fpsimd_host_restore(&hyp_vcpu->vcpu); + hyp_vcpu->exit_code = exit_reason; } -static void __hyp_sve_save_guest(struct pkvm_hyp_vcpu *hyp_vcpu) +static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu; - __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr); __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL1); } -static void fpsimd_host_restore(void) +static void fpsimd_host_restore(struct kvm_vcpu *vcpu) { - sysreg_clear_set(cptr_el2, CPTR_EL2_TZ | CPTR_EL2_TFP, 0); - isb(); - - if (unlikely(is_protected_kvm_enabled())) { - struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); - struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu; - - if (vcpu_has_sve(vcpu)) - __hyp_sve_save_guest(hyp_vcpu); - else - __fpsimd_save_state(&hyp_vcpu->vcpu.arch.ctxt.fp_regs); - - if (system_supports_sve()) { - struct kvm_host_sve_state *sve_state = get_host_sve_state(vcpu); - - write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR); - sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, - SYS_ZCR_EL2); - __sve_restore_state(sve_state->sve_regs + - sve_ffr_offset(kvm_host_sve_max_vl), - &sve_state->fpsr); - } else { - __fpsimd_restore_state(get_host_fpsimd_state(vcpu)); - } - - hyp_vcpu->vcpu.arch.fp_state = FP_STATE_HOST_OWNED; - } + u64 reg = CPTR_EL2_TFP; if (system_supports_sve()) - sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); + reg |= CPTR_EL2_TZ; + + sysreg_clear_set(cptr_el2, reg, 0); + isb(); + + if (vcpu_has_sve(vcpu)) + __hyp_sve_save_guest(vcpu); + else + __fpsimd_save_state(&vcpu->arch.ctxt.fp_regs); + + if (system_supports_sve()) { + struct kvm_host_sve_state *sve_state = get_host_sve_state(vcpu); + + write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR); + sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, + SYS_ZCR_EL2); + __sve_restore_state(sve_state->sve_regs + + sve_ffr_offset(kvm_host_sve_max_vl), + &sve_state->fpsr); + } else { + __fpsimd_restore_state(get_host_fpsimd_state(vcpu)); + } + + vcpu->arch.fp_state = FP_STATE_HOST_OWNED; } static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt) @@ -740,8 +742,6 @@ static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt) *last_ran = hyp_vcpu->vcpu.vcpu_id; } - hyp_vcpu->vcpu.arch.fp_state = FP_STATE_HOST_OWNED; - if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) { /* Propagate WFx trapping flags, trap ptrauth */ hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWE | HCR_TWI | @@ -761,9 +761,6 @@ static void handle___pkvm_vcpu_put(struct kvm_cpu_context *host_ctxt) if (hyp_vcpu) { struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; - if (hyp_vcpu->vcpu.arch.fp_state == FP_STATE_GUEST_OWNED) - fpsimd_host_restore(); - if (!pkvm_hyp_vcpu_is_protected(hyp_vcpu) && !vcpu_get_flag(host_vcpu, PKVM_HOST_STATE_DIRTY)) { __sync_hyp_vcpu(hyp_vcpu); @@ -784,9 +781,6 @@ static void handle___pkvm_vcpu_sync_state(struct kvm_cpu_context *host_ctxt) if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu)) return; - if (hyp_vcpu->vcpu.arch.fp_state == FP_STATE_GUEST_OWNED) - fpsimd_host_restore(); - __sync_hyp_vcpu(hyp_vcpu); } @@ -849,23 +843,8 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) goto out; flush_hyp_vcpu(hyp_vcpu); - ret = __kvm_vcpu_run(&hyp_vcpu->vcpu); - sync_hyp_vcpu(hyp_vcpu, ret); - - if (hyp_vcpu->vcpu.arch.fp_state == FP_STATE_GUEST_OWNED) { - /* - * The guest has used the FP, trap all accesses - * from the host (both FP and SVE). - */ - u64 reg = CPTR_EL2_TFP; - - if (system_supports_sve()) - reg |= CPTR_EL2_TZ; - - sysreg_clear_set(cptr_el2, 0, reg); - } } else { /* The host is fully trusted, run its vCPU directly. */ ret = __kvm_vcpu_run(host_vcpu); @@ -1383,13 +1362,8 @@ inval: static void handle_host_smc(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(u64, func_id, host_ctxt, 0); - struct pkvm_hyp_vcpu *hyp_vcpu; bool handled; - hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); - if (hyp_vcpu && hyp_vcpu->vcpu.arch.fp_state == FP_STATE_GUEST_OWNED) - fpsimd_host_restore(); - handled = kvm_host_psci_handler(host_ctxt); if (!handled) handled = kvm_host_ffa_handler(host_ctxt); @@ -1421,9 +1395,11 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) case ESR_ELx_EC_SMC64: handle_host_smc(host_ctxt); break; - case ESR_ELx_EC_FP_ASIMD: case ESR_ELx_EC_SVE: - fpsimd_host_restore(); + BUG_ON(is_protected_kvm_enabled()); + sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0); + isb(); + sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); break; case ESR_ELx_EC_IABT_LOW: case ESR_ELx_EC_DABT_LOW: From f1df93017ee768fda390c733eb15d0cb858e332e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 15 Nov 2022 09:46:33 +0000 Subject: [PATCH 33/44] BACKPORT: KVM: arm64: Discard any SVE state when entering KVM guests [ Upstream commit 93ae6b01bafee8fa385aa25ee7ebdb40057f6abe ] Since 8383741ab2e773a99 (KVM: arm64: Get rid of host SVE tracking/saving) KVM has not tracked the host SVE state, relying on the fact that we currently disable SVE whenever we perform a syscall. This may not be true in future since performance optimisation may result in us keeping SVE enabled in order to avoid needing to take access traps to reenable it. Handle this by clearing TIF_SVE and converting the stored task state to FPSIMD format when preparing to run the guest. This is done with a new call fpsimd_kvm_prepare() to keep the direct state manipulation functions internal to fpsimd.c. Change-Id: Ie011c8f17dfebd82f796aaaa62d1502a3207c7db Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20221115094640.112848-2-broonie@kernel.org Signed-off-by: Will Deacon [ Mark: trivial backport to v6.1 ] Signed-off-by: Mark Rutland Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman Signed-off-by: Fuad Tabba --- arch/arm64/include/asm/fpsimd.h | 1 + arch/arm64/kernel/fpsimd.c | 23 +++++++++++++++++++++++ arch/arm64/kvm/fpsimd.c | 3 ++- 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 930b0e6c9462..3544dfcc67a1 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -56,6 +56,7 @@ extern void fpsimd_signal_preserve_current_state(void); extern void fpsimd_preserve_current_state(void); extern void fpsimd_restore_current_state(void); extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); +extern void fpsimd_kvm_prepare(void); extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, void *sve_state, unsigned int sve_vl, diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 43afe07c74fd..1dc4254a99f2 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1643,6 +1643,29 @@ void fpsimd_signal_preserve_current_state(void) sve_to_fpsimd(current); } +/* + * Called by KVM when entering the guest. + */ +void fpsimd_kvm_prepare(void) +{ + if (!system_supports_sve()) + return; + + /* + * KVM does not save host SVE state since we can only enter + * the guest from a syscall so the ABI means that only the + * non-saved SVE state needs to be saved. If we have left + * SVE enabled for performance reasons then update the task + * state to be FPSIMD only. + */ + get_cpu_fpsimd_context(); + + if (test_and_clear_thread_flag(TIF_SVE)) + sve_to_fpsimd(current); + + put_cpu_fpsimd_context(); +} + /* * Associate current's FPSIMD context with this cpu * The caller must have ownership of the cpu FPSIMD context before calling diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 453c6a541d87..a42d676466c5 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -52,11 +52,12 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) { BUG_ON(!current->mm); - BUG_ON(test_thread_flag(TIF_SVE)); if (!system_supports_fpsimd()) return; + fpsimd_kvm_prepare(); + vcpu->arch.fp_state = FP_STATE_HOST_OWNED; vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); From 12921b6e2348899199063e4e3e029e117f2b85b8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2025 19:52:19 +0000 Subject: [PATCH 34/44] BACKPORT: KVM: arm64: Unconditionally save+flush host FPSIMD/SVE/SME state [ Upstream commit fbc7e61195e23f744814e78524b73b59faa54ab4 ] There are several problems with the way hyp code lazily saves the host's FPSIMD/SVE state, including: * Host SVE being discarded unexpectedly due to inconsistent configuration of TIF_SVE and CPACR_ELx.ZEN. This has been seen to result in QEMU crashes where SVE is used by memmove(), as reported by Eric Auger: https://issues.redhat.com/browse/RHEL-68997 * Host SVE state is discarded *after* modification by ptrace, which was an unintentional ptrace ABI change introduced with lazy discarding of SVE state. * The host FPMR value can be discarded when running a non-protected VM, where FPMR support is not exposed to a VM, and that VM uses FPSIMD/SVE. In these cases the hyp code does not save the host's FPMR before unbinding the host's FPSIMD/SVE/SME state, leaving a stale value in memory. Avoid these by eagerly saving and "flushing" the host's FPSIMD/SVE/SME state when loading a vCPU such that KVM does not need to save any of the host's FPSIMD/SVE/SME state. For clarity, fpsimd_kvm_prepare() is removed and the necessary call to fpsimd_save_and_flush_cpu_state() is placed in kvm_arch_vcpu_load_fp(). As 'fpsimd_state' and 'fpmr_ptr' should not be used, they are set to NULL; all uses of these will be removed in subsequent patches. Historical problems go back at least as far as v5.17, e.g. erroneous assumptions about TIF_SVE being clear in commit: 8383741ab2e773a9 ("KVM: arm64: Get rid of host SVE tracking/saving") ... and so this eager save+flush probably needs to be backported to ALL stable trees. Bug: 411040189 Fixes: 93ae6b01bafee8fa ("KVM: arm64: Discard any SVE state when entering KVM guests") Fixes: 8c845e2731041f0f ("arm64/sve: Leave SVE enabled on syscall if we don't context switch") Fixes: ef3be86021c3bdf3 ("KVM: arm64: Add save/restore support for FPMR") Reported-by: Eric Auger Reported-by: Wilco Dijkstra Reviewed-by: Mark Brown Tested-by: Mark Brown Tested-by: Eric Auger Acked-by: Will Deacon Cc: Catalin Marinas Cc: Florian Weimer Cc: Fuad Tabba Cc: Jeremy Linton Cc: Marc Zyngier Cc: Oliver Upton Cc: Paolo Bonzini Change-Id: I2c230b8db86f5c68ebf24f06d1e4787da284c80d Signed-off-by: Mark Rutland Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250210195226.1215254-2-mark.rutland@arm.com Signed-off-by: Marc Zyngier [ Mark: Handle vcpu/host flag conflict, remove host_data_ptr() ] Signed-off-by: Mark Rutland Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman Signed-off-by: Fuad Tabba --- arch/arm64/kernel/fpsimd.c | 23 ----------------------- arch/arm64/kvm/fpsimd.c | 18 ++++++++++-------- 2 files changed, 10 insertions(+), 31 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 1dc4254a99f2..43afe07c74fd 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1643,29 +1643,6 @@ void fpsimd_signal_preserve_current_state(void) sve_to_fpsimd(current); } -/* - * Called by KVM when entering the guest. - */ -void fpsimd_kvm_prepare(void) -{ - if (!system_supports_sve()) - return; - - /* - * KVM does not save host SVE state since we can only enter - * the guest from a syscall so the ABI means that only the - * non-saved SVE state needs to be saved. If we have left - * SVE enabled for performance reasons then update the task - * state to be FPSIMD only. - */ - get_cpu_fpsimd_context(); - - if (test_and_clear_thread_flag(TIF_SVE)) - sve_to_fpsimd(current); - - put_cpu_fpsimd_context(); -} - /* * Associate current's FPSIMD context with this cpu * The caller must have ownership of the cpu FPSIMD context before calling diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index a42d676466c5..43bec87ee74d 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -56,9 +56,16 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) if (!system_supports_fpsimd()) return; - fpsimd_kvm_prepare(); - - vcpu->arch.fp_state = FP_STATE_HOST_OWNED; + /* + * Ensure that any host FPSIMD/SVE/SME state is saved and unbound such + * that the host kernel is responsible for restoring this state upon + * return to userspace, and the hyp code doesn't need to save anything. + * + * When the host may use SME, fpsimd_save_and_flush_cpu_state() ensures + * that PSTATE.{SM,ZA} == {0,0}. + */ + fpsimd_save_and_flush_cpu_state(); + vcpu->arch.fp_state = FP_STATE_FREE; vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) @@ -77,11 +84,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) vcpu_clear_flag(vcpu, HOST_SME_ENABLED); if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) vcpu_set_flag(vcpu, HOST_SME_ENABLED); - - if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) { - vcpu->arch.fp_state = FP_STATE_FREE; - fpsimd_save_and_flush_cpu_state(); - } } } From a08391468f2fcc251ecac861d59a904a65018d64 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2025 19:52:20 +0000 Subject: [PATCH 35/44] BACKPORT: KVM: arm64: Remove host FPSIMD saving for non-protected KVM [ Upstream commit 8eca7f6d5100b6997df4f532090bc3f7e0203bef ] Now that the host eagerly saves its own FPSIMD/SVE/SME state, non-protected KVM never needs to save the host FPSIMD/SVE/SME state, and the code to do this is never used. Protected KVM still needs to save/restore the host FPSIMD/SVE state to avoid leaking guest state to the host (and to avoid revealing to the host whether the guest used FPSIMD/SVE/SME), and that code needs to be retained. Remove the unused code and data structures. To avoid the need for a stub copy of kvm_hyp_save_fpsimd_host() in the VHE hyp code, the nVHE/hVHE version is moved into the shared switch header, where it is only invoked when KVM is in protected mode. [tabba@ Kept user_fpsimd_state as to not break the KMI.] Bug: 411040189 Change-Id: I0088db7c5f75c9331956867040b8eb69976aabf8 Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Tested-by: Mark Brown Acked-by: Will Deacon Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Oliver Upton Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250210195226.1215254-3-mark.rutland@arm.com Signed-off-by: Marc Zyngier Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman Signed-off-by: Fuad Tabba --- arch/arm64/include/asm/kvm_host.h | 3 ++- arch/arm64/include/asm/kvm_hyp.h | 2 +- arch/arm64/kvm/fpsimd.c | 2 -- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++-- arch/arm64/kvm/hyp/nvhe/pkvm.c | 3 --- 5 files changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 1ad2a045f8e7..b894dc38e205 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -488,7 +488,8 @@ struct kvm_vcpu_arch { struct kvm_guest_debug_arch vcpu_debug_state; struct kvm_guest_debug_arch external_debug_state; - struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ + /* DO NOT USE: Removed upstream. Kept to not break the KMI. */ + struct user_fpsimd_state *host_fpsimd_state; struct { /* {Break,watch}point registers */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 5f16c6f800a6..b0ed038ff4e9 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -122,7 +122,7 @@ void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); struct user_fpsimd_state *get_host_fpsimd_state(struct kvm_vcpu *vcpu); struct kvm_host_sve_state *get_host_sve_state(struct kvm_vcpu *vcpu); #else -#define get_host_fpsimd_state(vcpu) (vcpu)->arch.host_fpsimd_state +#define get_host_fpsimd_state(vcpu) NULL #define get_host_sve_state(vcpu) NULL #endif diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 43bec87ee74d..552434b96595 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -37,8 +37,6 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) if (ret) return ret; - vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd); - return 0; } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 087a3cef4116..221d2b72a513 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -175,7 +175,7 @@ static void kvm_hyp_handle_fpsimd_host(struct kvm_vcpu *vcpu) * Protected kvm restores the host's sve state as not to reveal that * fpsimd was used by a guest nor leak upper sve bits. */ - if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) { + if (system_supports_sve()) { struct kvm_host_sve_state *sve_state = get_host_sve_state(vcpu); sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR); @@ -243,7 +243,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) isb(); /* Write out the host state if it's in the registers */ - if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED) + if (is_protected_kvm_enabled() && vcpu->arch.fp_state == FP_STATE_HOST_OWNED) kvm_hyp_handle_fpsimd_host(vcpu); /* Restore the guest state */ diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index b3acb60a2d6c..85227166252a 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -54,9 +54,6 @@ static void *__get_host_fpsimd_bytes(void) struct user_fpsimd_state *get_host_fpsimd_state(struct kvm_vcpu *vcpu) { - if (likely(!is_protected_kvm_enabled())) - return vcpu->arch.host_fpsimd_state; - WARN_ON(system_supports_sve()); return __get_host_fpsimd_bytes(); } From c952e23cf8dfc18357b9e2300b09e75807d13ef0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2025 19:52:21 +0000 Subject: [PATCH 36/44] BACKPORT: KVM: arm64: Remove VHE host restore of CPACR_EL1.ZEN [ Upstream commit 459f059be702056d91537b99a129994aa6ccdd35 ] When KVM is in VHE mode, the host kernel tries to save and restore the configuration of CPACR_EL1.ZEN (i.e. CPTR_EL2.ZEN when HCR_EL2.E2H=1) across kvm_arch_vcpu_load_fp() and kvm_arch_vcpu_put_fp(), since the configuration may be clobbered by hyp when running a vCPU. This logic is currently redundant. The VHE hyp code unconditionally configures CPTR_EL2.ZEN to 0b01 when returning to the host, permitting host kernel usage of SVE. Now that the host eagerly saves and unbinds its own FPSIMD/SVE/SME state, there's no need to save/restore the state of the EL0 SVE trap. The kernel can safely save/restore state without trapping, as described above, and will restore userspace state (including trap controls) before returning to userspace. Remove the redundant logic. Bug: 411040189 Change-Id: I43bf5587223aae54caf9389eb3de17f155043d96 Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Tested-by: Mark Brown Acked-by: Will Deacon Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Oliver Upton Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250210195226.1215254-4-mark.rutland@arm.com Signed-off-by: Marc Zyngier [Rework for refactoring of where the flags are stored -- broonie] Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman Signed-off-by: Fuad Tabba --- arch/arm64/include/asm/kvm_host.h | 2 -- arch/arm64/kvm/fpsimd.c | 16 ---------------- 2 files changed, 18 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b894dc38e205..c414593343bf 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -693,8 +693,6 @@ struct kvm_vcpu_arch { /* pKVM host vcpu state is dirty, needs resync */ #define PKVM_HOST_STATE_DIRTY __vcpu_single_flag(iflags, BIT(7)) -/* SVE enabled for host EL0 */ -#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0)) /* SME enabled for EL0 */ #define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1)) /* Physical CPU not in supported_cpus */ diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 552434b96595..b4b3b9031543 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -65,10 +65,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) fpsimd_save_and_flush_cpu_state(); vcpu->arch.fp_state = FP_STATE_FREE; - vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); - if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) - vcpu_set_flag(vcpu, HOST_SVE_ENABLED); - /* * We don't currently support SME guests but if we leave * things in streaming mode then when the guest starts running @@ -174,18 +170,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) } fpsimd_save_and_flush_cpu_state(); - } else if (has_vhe() && system_supports_sve()) { - /* - * The FPSIMD/SVE state in the CPU has not been touched, and we - * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been - * reset to CPACR_EL1_DEFAULT by the Hyp code, disabling SVE - * for EL0. To avoid spurious traps, restore the trap state - * seen by kvm_arch_vcpu_load_fp(): - */ - if (vcpu_get_flag(vcpu, HOST_SVE_ENABLED)) - sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN); - else - sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0); } update_thread_flag(TIF_SVE, 0); From c00c44bea22d5c0ecc2f9663a461e774b3d90eae Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2025 19:52:22 +0000 Subject: [PATCH 37/44] BACKPORT: KVM: arm64: Remove VHE host restore of CPACR_EL1.SMEN [ Upstream commit 407a99c4654e8ea65393f412c421a55cac539f5b ] When KVM is in VHE mode, the host kernel tries to save and restore the configuration of CPACR_EL1.SMEN (i.e. CPTR_EL2.SMEN when HCR_EL2.E2H=1) across kvm_arch_vcpu_load_fp() and kvm_arch_vcpu_put_fp(), since the configuration may be clobbered by hyp when running a vCPU. This logic has historically been broken, and is currently redundant. This logic was originally introduced in commit: 861262ab86270206 ("KVM: arm64: Handle SME host state when running guests") At the time, the VHE hyp code would reset CPTR_EL2.SMEN to 0b00 when returning to the host, trapping host access to SME state. Unfortunately, this was unsafe as the host could take a softirq before calling kvm_arch_vcpu_put_fp(), and if a softirq handler were to use kernel mode NEON the resulting attempt to save the live FPSIMD/SVE/SME state would result in a fatal trap. That issue was limited to VHE mode. For nVHE/hVHE modes, KVM always saved/restored the host kernel's CPACR_EL1 value, and configured CPTR_EL2.TSM to 0b0, ensuring that host usage of SME would not be trapped. The issue above was incidentally fixed by commit: 375110ab51dec5dc ("KVM: arm64: Fix resetting SME trap values on reset for (h)VHE") That commit changed the VHE hyp code to configure CPTR_EL2.SMEN to 0b01 when returning to the host, permitting host kernel usage of SME, avoiding the issue described above. At the time, this was not identified as a fix for commit 861262ab86270206. Now that the host eagerly saves and unbinds its own FPSIMD/SVE/SME state, there's no need to save/restore the state of the EL0 SME trap. The kernel can safely save/restore state without trapping, as described above, and will restore userspace state (including trap controls) before returning to userspace. Remove the redundant logic. Bug: 411040189 Change-Id: Ia2fbb22a21da8e63f0a3b9a76d47ee2c987e2fa5 Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Tested-by: Mark Brown Acked-by: Will Deacon Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Oliver Upton Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250210195226.1215254-5-mark.rutland@arm.com Signed-off-by: Marc Zyngier [Update for rework of flags storage -- broonie] Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman Signed-off-by: Fuad Tabba --- arch/arm64/include/asm/kvm_host.h | 2 -- arch/arm64/kvm/fpsimd.c | 31 ------------------------------- 2 files changed, 33 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c414593343bf..c5b392b18401 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -693,8 +693,6 @@ struct kvm_vcpu_arch { /* pKVM host vcpu state is dirty, needs resync */ #define PKVM_HOST_STATE_DIRTY __vcpu_single_flag(iflags, BIT(7)) -/* SME enabled for EL0 */ -#define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1)) /* Physical CPU not in supported_cpus */ #define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2)) /* WFIT instruction trapped */ diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index b4b3b9031543..5e7078d44713 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -64,21 +64,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) */ fpsimd_save_and_flush_cpu_state(); vcpu->arch.fp_state = FP_STATE_FREE; - - /* - * We don't currently support SME guests but if we leave - * things in streaming mode then when the guest starts running - * FPSIMD or SVE code it may generate SME traps so as a - * special case if we are in streaming mode we force the host - * state to be saved now and exit streaming mode so that we - * don't have to handle any SME traps for valid guest - * operations. Do this for ZA as well for now for simplicity. - */ - if (system_supports_sme()) { - vcpu_clear_flag(vcpu, HOST_SME_ENABLED); - if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) - vcpu_set_flag(vcpu, HOST_SME_ENABLED); - } } /* @@ -132,22 +117,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) local_irq_save(flags); - /* - * If we have VHE then the Hyp code will reset CPACR_EL1 to - * CPACR_EL1_DEFAULT and we need to reenable SME. - */ - if (has_vhe() && system_supports_sme()) { - /* Also restore EL0 state seen on entry */ - if (vcpu_get_flag(vcpu, HOST_SME_ENABLED)) - sysreg_clear_set(CPACR_EL1, 0, - CPACR_EL1_SMEN_EL0EN | - CPACR_EL1_SMEN_EL1EN); - else - sysreg_clear_set(CPACR_EL1, - CPACR_EL1_SMEN_EL0EN, - CPACR_EL1_SMEN_EL1EN); - } - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { if (vcpu_has_sve(vcpu)) { __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); From b9b8d84f6cfc459941e2dd32eed7b92e3a3af6a9 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2025 19:52:24 +0000 Subject: [PATCH 38/44] BACKPORT: KVM: arm64: Refactor exit handlers [ Upstream commit 9b66195063c5a145843547b1d692bd189be85287 ] The hyp exit handling logic is largely shared between VHE and nVHE/hVHE, with common logic in arch/arm64/kvm/hyp/include/hyp/switch.h. The code in the header depends on function definitions provided by arch/arm64/kvm/hyp/vhe/switch.c and arch/arm64/kvm/hyp/nvhe/switch.c when they include the header. This is an unusual header dependency, and prevents the use of arch/arm64/kvm/hyp/include/hyp/switch.h in other files as this would result in compiler warnings regarding missing definitions, e.g. | In file included from arch/arm64/kvm/hyp/nvhe/hyp-main.c:8: | ./arch/arm64/kvm/hyp/include/hyp/switch.h:733:31: warning: 'kvm_get_exit_handler_array' used but never defined | 733 | static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu); | | ^~~~~~~~~~~~~~~~~~~~~~~~~~ | ./arch/arm64/kvm/hyp/include/hyp/switch.h:735:13: warning: 'early_exit_filter' used but never defined | 735 | static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code); | | ^~~~~~~~~~~~~~~~~ Refactor the logic such that the header doesn't depend on anything from the C files. There should be no functional change as a result of this patch. Bug: 411040189 Change-Id: I4e58bad80763afd73fd03f9653ed4e66dfe97255 Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Tested-by: Mark Brown Acked-by: Will Deacon Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Oliver Upton Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250210195226.1215254-7-mark.rutland@arm.com Signed-off-by: Marc Zyngier Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman Signed-off-by: Fuad Tabba --- arch/arm64/kvm/hyp/include/hyp/switch.h | 30 +++++------------------- arch/arm64/kvm/hyp/nvhe/switch.c | 31 ++++++++++++++----------- arch/arm64/kvm/hyp/vhe/switch.c | 8 +++---- 3 files changed, 27 insertions(+), 42 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 221d2b72a513..182983c308b8 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -437,23 +437,16 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *); -static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu); - -static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code); - /* * Allow the hypervisor to handle the exit with an exit handler if it has one. * * Returns true if the hypervisor handled the exit, and control should go back * to the guest, or false if it hasn't. */ -static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code, + const exit_handler_fn *handlers) { - const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu); - exit_handler_fn fn; - - fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; - + exit_handler_fn fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; if (fn) return fn(vcpu, exit_code); @@ -483,20 +476,9 @@ static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code * the guest, false when we should restore the host state and return to the * main run loop. */ -static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool __fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code, + const exit_handler_fn *handlers) { - /* - * Save PSTATE early so that we can evaluate the vcpu mode - * early on. - */ - synchronize_vcpu_pstate(vcpu, exit_code); - - /* - * Check whether we want to repaint the state one way or - * another. - */ - early_exit_filter(vcpu, exit_code); - if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); @@ -526,7 +508,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) goto exit; /* Check if there's an exit handler and allow it to handle the exit. */ - if (kvm_hyp_handle_exit(vcpu, exit_code)) + if (kvm_hyp_handle_exit(vcpu, exit_code, handlers)) goto guest; exit: /* Return to the host kernel and handle the exit */ diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index f1890454628d..cf7c3be0e620 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -211,20 +211,23 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) return hyp_exit_handlers; } -/* - * Some guests (e.g., protected VMs) are not be allowed to run in AArch32. - * The ARMv8 architecture does not give the hypervisor a mechanism to prevent a - * guest from dropping to AArch32 EL0 if implemented by the CPU. If the - * hypervisor spots a guest in such a state ensure it is handled, and don't - * trust the host to spot or fix it. The check below is based on the one in - * kvm_arch_vcpu_ioctl_run(). - * - * Returns false if the guest ran in AArch32 when it shouldn't have, and - * thus should exit to the host, or true if a the guest run loop can continue. - */ -static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { - if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) { + const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu); + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + + synchronize_vcpu_pstate(vcpu, exit_code); + + /* + * Some guests (e.g., protected VMs) are not be allowed to run in + * AArch32. The ARMv8 architecture does not give the hypervisor a + * mechanism to prevent a guest from dropping to AArch32 EL0 if + * implemented by the CPU. If the hypervisor spots a guest in such a + * state ensure it is handled, and don't trust the host to spot or fix + * it. The check below is based on the one in + * kvm_arch_vcpu_ioctl_run(). + */ + if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) { /* * As we have caught the guest red-handed, decide that it isn't * fit for purpose anymore by making the vcpu invalid. The VMM @@ -236,6 +239,8 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT); *exit_code |= ARM_EXCEPTION_IL; } + + return __fixup_guest_exit(vcpu, exit_code, handlers); } /* Switch to the guest for legacy non-VHE systems */ diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 45ac4a59cc2c..f24569ac26c2 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -114,13 +114,11 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; -static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) +static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { - return hyp_exit_handlers; -} + synchronize_vcpu_pstate(vcpu, exit_code); -static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) -{ + return __fixup_guest_exit(vcpu, exit_code, hyp_exit_handlers); } /* Switch to the guest for VHE systems running in EL2 */ From 89720e9e1bc3138967c5b89e1bb2a6fe40602104 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2025 19:52:25 +0000 Subject: [PATCH 39/44] BACKPORT: KVM: arm64: Mark some header functions as inline [ Upstream commit f9dd00de1e53a47763dfad601635d18542c3836d ] The shared hyp switch header has a number of static functions which might not be used by all files that include the header, and when unused they will provoke compiler warnings, e.g. | In file included from arch/arm64/kvm/hyp/nvhe/hyp-main.c:8: | ./arch/arm64/kvm/hyp/include/hyp/switch.h:703:13: warning: 'kvm_hyp_handle_dabt_low' defined but not used [-Wunused-function] | 703 | static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) | | ^~~~~~~~~~~~~~~~~~~~~~~ | ./arch/arm64/kvm/hyp/include/hyp/switch.h:682:13: warning: 'kvm_hyp_handle_cp15_32' defined but not used [-Wunused-function] | 682 | static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) | | ^~~~~~~~~~~~~~~~~~~~~~ | ./arch/arm64/kvm/hyp/include/hyp/switch.h:662:13: warning: 'kvm_hyp_handle_sysreg' defined but not used [-Wunused-function] | 662 | static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) | | ^~~~~~~~~~~~~~~~~~~~~ | ./arch/arm64/kvm/hyp/include/hyp/switch.h:458:13: warning: 'kvm_hyp_handle_fpsimd' defined but not used [-Wunused-function] | 458 | static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) | | ^~~~~~~~~~~~~~~~~~~~~ | ./arch/arm64/kvm/hyp/include/hyp/switch.h:329:13: warning: 'kvm_hyp_handle_mops' defined but not used [-Wunused-function] | 329 | static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code) | | ^~~~~~~~~~~~~~~~~~~ Mark these functions as 'inline' to suppress this warning. This shouldn't result in any functional change. At the same time, avoid the use of __alias() in the header and alias kvm_hyp_handle_iabt_low() and kvm_hyp_handle_watchpt_low() to kvm_hyp_handle_memory_fault() using CPP, matching the style in the rest of the kernel. For consistency, kvm_hyp_handle_memory_fault() is also marked as 'inline'. Bug: 411040189 Change-Id: I5766401542afda440f737c1fee1810a73e89e86d Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Tested-by: Mark Brown Acked-by: Will Deacon Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Oliver Upton Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250210195226.1215254-8-mark.rutland@arm.com Signed-off-by: Marc Zyngier Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman Signed-off-by: Fuad Tabba --- arch/arm64/kvm/hyp/include/hyp/switch.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 182983c308b8..c1d4a74854e2 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -221,7 +221,7 @@ static void __deactivate_fpsimd_traps(struct kvm_vcpu *vcpu) * If FP/SIMD is not implemented, handle the trap and inject an undefined * instruction exception to the guest. Similarly for trapped SVE accesses. */ -static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) { bool sve_guest; u8 esr_ec; @@ -370,7 +370,7 @@ static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code) return true; } -static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) { if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && handle_tx2_tvm(vcpu)) @@ -386,7 +386,7 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) return false; } -static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) { if (static_branch_unlikely(&vgic_v3_cpuif_trap) && __vgic_v3_perform_cpuif_access(vcpu) == 1) @@ -395,19 +395,18 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) return false; } -static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, + u64 *exit_code) { if (!__populate_fault_info(vcpu)) return true; return false; } -static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) - __alias(kvm_hyp_handle_memory_fault); -static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code) - __alias(kvm_hyp_handle_memory_fault); +#define kvm_hyp_handle_iabt_low kvm_hyp_handle_memory_fault +#define kvm_hyp_handle_watchpt_low kvm_hyp_handle_memory_fault -static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) { if (kvm_hyp_handle_memory_fault(vcpu, exit_code)) return true; From 6a31e426c64ea7be25090feb683be9964ddea94f Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 16 Dec 2024 10:50:52 +0000 Subject: [PATCH 40/44] BACKPORT: KVM: arm64: Calculate cptr_el2 traps on activating traps [ Upstream commit 2fd5b4b0e7b440602455b79977bfa64dea101e6c ] Similar to VHE, calculate the value of cptr_el2 from scratch on activate traps. This removes the need to store cptr_el2 in every vcpu structure. Moreover, some traps, such as whether the guest owns the fp registers, need to be set on every vcpu run. [tabba@ Kept cptr_el2 as to not break the KMI.] Bug: 411040189 Reported-by: James Clark Fixes: 5294afdbf45a ("KVM: arm64: Exclude FP ownership from kvm_vcpu_arch") Change-Id: Iba65e9bb65d8498007423dc5b137dedc602359de Signed-off-by: Fuad Tabba Link: https://lore.kernel.org/r/20241216105057.579031-13-tabba@google.com Signed-off-by: Marc Zyngier Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/kvm/arm.c | 1 - arch/arm64/kvm/hyp/nvhe/pkvm.c | 14 ------------ arch/arm64/kvm/hyp/nvhe/switch.c | 38 ++++++++++++++++++++++--------- 4 files changed, 29 insertions(+), 26 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c5b392b18401..3fc67e4b1b60 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -438,6 +438,8 @@ struct kvm_vcpu_arch { /* Values of trap registers for the guest. */ u64 hcr_el2; u64 mdcr_el2; + + /* DO NOT USE: Removed upstream. Kept to not break the KMI. */ u64 cptr_el2; /* Values of trap registers for the host before guest entry. */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 915100a97191..4386e156c019 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1357,7 +1357,6 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, } vcpu_reset_hcr(vcpu); - vcpu->arch.cptr_el2 = CPTR_EL2_DEFAULT; /* * Handle the "start in power-off" case. diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 85227166252a..ba09c7de6901 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -73,7 +73,6 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1); u64 hcr_set = HCR_RW; u64 hcr_clear = 0; - u64 cptr_set = 0; /* Protected KVM does not support AArch32 guests. */ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), @@ -100,16 +99,10 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) /* Trap AMU */ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU), feature_ids)) { hcr_clear |= HCR_AMVOFFEN; - cptr_set |= CPTR_EL2_TAM; } - /* Trap SVE */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) - cptr_set |= CPTR_EL2_TZ; - vcpu->arch.hcr_el2 |= hcr_set; vcpu->arch.hcr_el2 &= ~hcr_clear; - vcpu->arch.cptr_el2 |= cptr_set; } /* @@ -139,7 +132,6 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1); u64 mdcr_set = 0; u64 mdcr_clear = 0; - u64 cptr_set = 0; /* Trap/constrain PMU */ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), feature_ids)) { @@ -166,13 +158,8 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids)) mdcr_set |= MDCR_EL2_TTRF; - /* Trap Trace */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids)) - cptr_set |= CPTR_EL2_TTA; - vcpu->arch.mdcr_el2 |= mdcr_set; vcpu->arch.mdcr_el2 &= ~mdcr_clear; - vcpu->arch.cptr_el2 |= cptr_set; } /* @@ -237,7 +224,6 @@ static void pvm_init_trap_regs(struct kvm_vcpu *vcpu) */ static void pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu) { - hyp_vcpu->vcpu.arch.cptr_el2 = CPTR_EL2_DEFAULT; hyp_vcpu->vcpu.arch.mdcr_el2 = 0; if (!pkvm_hyp_vcpu_is_protected(hyp_vcpu)) { diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index cf7c3be0e620..1fc46f7a99f8 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -36,23 +36,39 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector); extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); -static void __activate_traps(struct kvm_vcpu *vcpu) +static void __activate_cptr_traps(struct kvm_vcpu *vcpu) { - u64 val; + u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */ - ___activate_traps(vcpu); - __activate_traps_common(vcpu); + /* !hVHE case upstream */ + if (1) { + val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1; - val = vcpu->arch.cptr_el2; - val |= CPTR_EL2_TTA | CPTR_EL2_TAM; - if (!guest_owns_fp_regs(vcpu)) { - val |= CPTR_EL2_TFP | CPTR_EL2_TZ; - __activate_traps_fpsimd32(vcpu); - } - if (cpus_have_final_cap(ARM64_SME)) + /* + * Always trap SME since it's not supported in KVM. + * TSM is RES1 if SME isn't implemented. + */ val |= CPTR_EL2_TSM; + if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs(vcpu)) + val |= CPTR_EL2_TZ; + + if (!guest_owns_fp_regs(vcpu)) + val |= CPTR_EL2_TFP; + } + + if (!guest_owns_fp_regs(vcpu)) + __activate_traps_fpsimd32(vcpu); + write_sysreg(val, cptr_el2); +} + +static void __activate_traps(struct kvm_vcpu *vcpu) +{ + ___activate_traps(vcpu); + __activate_traps_common(vcpu); + __activate_cptr_traps(vcpu); + write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2); if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { From 785e577258509dc0a11b4eff53a85ef73ff88bf2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2025 19:52:26 +0000 Subject: [PATCH 41/44] BACKPORT: KVM: arm64: Eagerly switch ZCR_EL{1,2} [ Upstream commit 59419f10045bc955d2229819c7cf7a8b0b9c5b59 ] In non-protected KVM modes, while the guest FPSIMD/SVE/SME state is live on the CPU, the host's active SVE VL may differ from the guest's maximum SVE VL: * For VHE hosts, when a VM uses NV, ZCR_EL2 contains a value constrained by the guest hypervisor, which may be less than or equal to that guest's maximum VL. Note: in this case the value of ZCR_EL1 is immaterial due to E2H. * For nVHE/hVHE hosts, ZCR_EL1 contains a value written by the guest, which may be less than or greater than the guest's maximum VL. Note: in this case hyp code traps host SVE usage and lazily restores ZCR_EL2 to the host's maximum VL, which may be greater than the guest's maximum VL. This can be the case between exiting a guest and kvm_arch_vcpu_put_fp(). If a softirq is taken during this period and the softirq handler tries to use kernel-mode NEON, then the kernel will fail to save the guest's FPSIMD/SVE state, and will pend a SIGKILL for the current thread. This happens because kvm_arch_vcpu_ctxsync_fp() binds the guest's live FPSIMD/SVE state with the guest's maximum SVE VL, and fpsimd_save_user_state() verifies that the live SVE VL is as expected before attempting to save the register state: | if (WARN_ON(sve_get_vl() != vl)) { | force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); | return; | } Fix this and make this a bit easier to reason about by always eagerly switching ZCR_EL{1,2} at hyp during guest<->host transitions. With this happening, there's no need to trap host SVE usage, and the nVHE/nVHE __deactivate_cptr_traps() logic can be simplified to enable host access to all present FPSIMD/SVE/SME features. In protected nVHE/hVHE modes, the host's state is always saved/restored by hyp, and the guest's state is saved prior to exit to the host, so from the host's PoV the guest never has live FPSIMD/SVE/SME state, and the host's ZCR_EL1 is never clobbered by hyp. Bug: 411040189 Change-Id: Ifecd5024230fadd0b73755587950ba651b94dae0 Fixes: 8c8010d69c132273 ("KVM: arm64: Save/restore SVE state for nVHE") Fixes: 2e3cf82063a00ea0 ("KVM: arm64: nv: Ensure correct VL is loaded before saving SVE state") Signed-off-by: Mark Rutland Reviewed-by: Mark Brown Tested-by: Mark Brown Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Oliver Upton Cc: Will Deacon Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250210195226.1215254-9-mark.rutland@arm.com Signed-off-by: Marc Zyngier [ v6.6 lacks pKVM saving of host SVE state, pull in discovery of maximum host VL separately -- broonie ] Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman Signed-off-by: Fuad Tabba --- arch/arm64/kvm/fpsimd.c | 30 +++++--------- arch/arm64/kvm/hyp/entry.S | 5 +++ arch/arm64/kvm/hyp/include/hyp/switch.h | 55 +++++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 9 ++-- arch/arm64/kvm/hyp/nvhe/switch.c | 30 +++++++++----- arch/arm64/kvm/hyp/vhe/switch.c | 4 ++ arch/arm64/kvm/reset.c | 1 + 7 files changed, 97 insertions(+), 37 deletions(-) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 5e7078d44713..ad716eadc4c8 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -118,26 +118,16 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) local_irq_save(flags); if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { - if (vcpu_has_sve(vcpu)) { - __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); - - /* - * Restore the VL that was saved when bound to the CPU, - * which is the maximum VL for the guest. Because - * the layout of the data when saving the sve state - * depends on the VL, we need to use a consistent VL. - * Note that this means that at guest exit ZCR_EL1 is - * not necessarily the same as on guest entry. - * - * Flushing the cpu state sets the TIF_FOREIGN_FPSTATE - * bit for the context, which lets the kernel restore - * the sve state, including ZCR_EL1 later. - */ - if (!has_vhe()) - sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, - SYS_ZCR_EL1); - } - + /* + * Flush (save and invalidate) the fpsimd/sve state so that if + * the host tries to use fpsimd/sve, it's not using stale data + * from the guest. + * + * Flushing the state sets the TIF_FOREIGN_FPSTATE bit for the + * context unconditionally, in both nVHE and VHE. This allows + * the kernel to restore the fpsimd/sve state, including ZCR_EL1 + * when needed. + */ fpsimd_save_and_flush_cpu_state(); } diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 435346ea1504..d8c94c45cb2f 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -44,6 +44,11 @@ alternative_if ARM64_HAS_RAS_EXTN alternative_else_nop_endif mrs x1, isr_el1 cbz x1, 1f + + // Ensure that __guest_enter() always provides a context + // synchronization event so that callers don't need ISBs for anything + // that would usually be synchonized by the ERET. + isb mov x0, #ARM_EXCEPTION_IRQ ret diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index c1d4a74854e2..33f6af14ba3b 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -168,6 +168,61 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR); } +static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu) +{ + u64 zcr_el1, zcr_el2; + + if (!guest_owns_fp_regs(vcpu)) + return; + + if (vcpu_has_sve(vcpu)) { + zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; + + write_sysreg_el2(zcr_el2, SYS_ZCR); + + zcr_el1 = __vcpu_sys_reg(vcpu, ZCR_EL1); + write_sysreg_el1(zcr_el1, SYS_ZCR); + } +} + +static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu) +{ + u64 zcr_el1, zcr_el2; + + if (!guest_owns_fp_regs(vcpu)) + return; + + /* + * When the guest owns the FP regs, we know that guest+hyp traps for + * any FPSIMD/SVE/SME features exposed to the guest have been disabled + * by either fpsimd_lazy_switch_to_guest() or kvm_hyp_handle_fpsimd() + * prior to __guest_entry(). As __guest_entry() guarantees a context + * synchronization event, we don't need an ISB here to avoid taking + * traps for anything that was exposed to the guest. + */ + if (vcpu_has_sve(vcpu)) { + zcr_el1 = read_sysreg_el1(SYS_ZCR); + __vcpu_sys_reg(vcpu, ZCR_EL1) = zcr_el1; + + /* + * The guest's state is always saved using the guest's max VL. + * Ensure that the host has the guest's max VL active such that + * the host can save the guest's state lazily, but don't + * artificially restrict the host to the guest's max VL. + */ + if (has_vhe()) { + zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; + write_sysreg_el2(zcr_el2, SYS_ZCR); + } else { + zcr_el2 = sve_vq_from_vl(kvm_host_sve_max_vl) - 1; + write_sysreg_el2(zcr_el2, SYS_ZCR); + + zcr_el1 = vcpu_sve_max_vq(vcpu) - 1; + write_sysreg_el1(zcr_el1, SYS_ZCR); + } + } +} + static void kvm_hyp_handle_fpsimd_host(struct kvm_vcpu *vcpu) { /* diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 09bd468dfadb..1b5fdbfa6de8 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -847,7 +848,9 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) sync_hyp_vcpu(hyp_vcpu, ret); } else { /* The host is fully trusted, run its vCPU directly. */ + fpsimd_lazy_switch_to_guest(kern_hyp_va(host_vcpu)); ret = __kvm_vcpu_run(host_vcpu); + fpsimd_lazy_switch_to_host(kern_hyp_va(host_vcpu)); } out: cpu_reg(host_ctxt, 1) = ret; @@ -1395,12 +1398,6 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) case ESR_ELx_EC_SMC64: handle_host_smc(host_ctxt); break; - case ESR_ELx_EC_SVE: - BUG_ON(is_protected_kvm_enabled()); - sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0); - isb(); - sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); - break; case ESR_ELx_EC_IABT_LOW: case ESR_ELx_EC_DABT_LOW: handle_host_mem_abort(host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 1fc46f7a99f8..a496258d42e6 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -40,6 +40,9 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu) { u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */ + if (!guest_owns_fp_regs(vcpu)) + __activate_traps_fpsimd32(vcpu); + /* !hVHE case upstream */ if (1) { val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1; @@ -55,12 +58,24 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu) if (!guest_owns_fp_regs(vcpu)) val |= CPTR_EL2_TFP; + + write_sysreg(val, cptr_el2); } +} - if (!guest_owns_fp_regs(vcpu)) - __activate_traps_fpsimd32(vcpu); +static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) +{ + /* !hVHE case upstream */ + if (1) { + u64 val = CPTR_NVHE_EL2_RES1; - write_sysreg(val, cptr_el2); + if (!cpus_have_final_cap(ARM64_SVE)) + val |= CPTR_EL2_TZ; + if (!cpus_have_final_cap(ARM64_SME)) + val |= CPTR_EL2_TSM; + + write_sysreg(val, cptr_el2); + } } static void __activate_traps(struct kvm_vcpu *vcpu) @@ -89,7 +104,6 @@ static void __activate_traps(struct kvm_vcpu *vcpu) static void __deactivate_traps(struct kvm_vcpu *vcpu) { extern char __kvm_hyp_host_vector[]; - u64 cptr; ___deactivate_traps(vcpu); @@ -114,13 +128,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2); - cptr = CPTR_EL2_DEFAULT; - if (vcpu_has_sve(vcpu) && (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)) - cptr |= CPTR_EL2_TZ; - if (cpus_have_final_cap(ARM64_SME)) - cptr &= ~CPTR_EL2_TSM; - - write_sysreg(cptr, cptr_el2); + __deactivate_cptr_traps(vcpu); write_sysreg(__kvm_hyp_host_vector, vbar_el2); } diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index f24569ac26c2..179152bb9e42 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -134,6 +134,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_save_host_state_vhe(host_ctxt); + fpsimd_lazy_switch_to_guest(vcpu); + /* * ARM erratum 1165522 requires us to configure both stage 1 and * stage 2 translation for the guest context before we clear @@ -164,6 +166,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __deactivate_traps(vcpu); + fpsimd_lazy_switch_to_host(vcpu); + sysreg_restore_host_state_vhe(host_ctxt); if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 4b80f4e2b438..324b3338ab6b 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -40,6 +40,7 @@ int kvm_arm_init_sve(void) if (system_supports_sve()) { kvm_sve_max_vl = sve_max_virtualisable_vl(); kvm_host_sve_max_vl = sve_max_vl(); + kvm_nvhe_sym(kvm_host_sve_max_vl) = kvm_host_sve_max_vl; /* * The get_sve_reg()/set_sve_reg() ioctl interface will need From 5b71d364254c468f68c6db5f449f8fcef42cb425 Mon Sep 17 00:00:00 2001 From: zhanghao56 Date: Tue, 13 May 2025 16:23:13 +0800 Subject: [PATCH 42/44] ANDROID: binder: fix minimum node priority comparison The "desired" priority for a transaction can be adjusted depending on various factors. For instance, it might be set to SCHED_NORMAL 120, when the caller is RT and the target node has !inherit_rt. However, instead of using these adjustments, the existing logic compares the minimum node priority against the original transaction priority. If the transaction priority is "higher", then the minimum node priority is ignored. This is particularly a problem when the "desired" priority has been changed to SCHED_NORMAL. This patch corrects the logic, comparing the minimum node priority against the (potentially adjusted) "desired" priority. This guarantees that the node's minimum priority is honored. Bug: 417382411 Cc: Martijn Coenen Fixes: c46810c23565 ("ANDROID: binder: add RT inheritance flag to node.") Change-Id: I813073241b996c1c38c29f20849b247023697102 Signed-off-by: zhanghao56 Signed-off-by: Carlos Llamas --- drivers/android/binder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 7811ced3ac50..877a7e357c8b 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -842,8 +842,8 @@ static void binder_transaction_priority(struct binder_thread *thread, desired.sched_policy = SCHED_NORMAL; } - if (node_prio.prio < t->priority.prio || - (node_prio.prio == t->priority.prio && + if (node_prio.prio < desired.prio || + (node_prio.prio == desired.prio && node_prio.sched_policy == SCHED_FIFO)) { /* * In case the minimum priority on the node is From ed6999107ec6d6a8696b201f5cc71217e2f62a87 Mon Sep 17 00:00:00 2001 From: Chungkai Mei Date: Mon, 19 May 2025 03:12:34 +0000 Subject: [PATCH 43/44] ANDROID: vendor_hook: add trace_android_rvh_setscheduler_prio To modify priority of specific tasks, add the vendor hook in __setscheduler_prio Bug: 409176857 Change-Id: Id5a2309378f1a8c3ecc1de71c20f44f73b3f7557 Signed-off-by: Chungkai Mei --- include/trace/hooks/sched.h | 4 ++++ kernel/sched/core.c | 1 + kernel/sched/vendor_hooks.c | 1 + 3 files changed, 6 insertions(+) diff --git a/include/trace/hooks/sched.h b/include/trace/hooks/sched.h index ce26b58b9e3c..af49cae410e6 100644 --- a/include/trace/hooks/sched.h +++ b/include/trace/hooks/sched.h @@ -84,6 +84,10 @@ DECLARE_RESTRICTED_HOOK(android_rvh_setscheduler, TP_PROTO(struct task_struct *p), TP_ARGS(p), 1); +DECLARE_RESTRICTED_HOOK(android_rvh_setscheduler_prio, + TP_PROTO(struct task_struct *p), + TP_ARGS(p), 1); + struct sched_group; DECLARE_RESTRICTED_HOOK(android_rvh_find_busiest_group, TP_PROTO(struct sched_group *busiest, struct rq *dst_rq, int *out_balance), diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 64a231649cce..2174dd3ffb2a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7085,6 +7085,7 @@ static void __setscheduler_prio(struct task_struct *p, int prio) p->sched_class = &fair_sched_class; p->prio = prio; + trace_android_rvh_setscheduler_prio(p); } #ifdef CONFIG_RT_MUTEXES diff --git a/kernel/sched/vendor_hooks.c b/kernel/sched/vendor_hooks.c index 46a67d9b6344..89a4a4174cde 100644 --- a/kernel/sched/vendor_hooks.c +++ b/kernel/sched/vendor_hooks.c @@ -27,6 +27,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_rtmutex_prepare_setprio); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_set_user_nice); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_set_user_nice_locked); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_setscheduler); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_setscheduler_prio); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_find_busiest_group); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_dump_throttled_rt_tasks); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_jiffies_update); From 3c6d0251e1fb722e884184d964421fe6f0586534 Mon Sep 17 00:00:00 2001 From: Chungkai Mei Date: Tue, 20 May 2025 03:59:59 +0000 Subject: [PATCH 44/44] ANDROID: ABI: Update pixel symbol list Adding the following symbols: - param_ops_ullong - __traceiter_android_rvh_setscheduler_prio - __tracepoint_android_rvh_setscheduler_prio - usb_gadget_connect - usb_gadget_disconnect Bug: 409176857 Change-Id: I026c6a80ef4c31577bb2fc28b0b3d9e2e709a200 Signed-off-by: Chungkai Mei --- android/abi_gki_aarch64.stg | 20 ++++++++++++++++++++ android/abi_gki_aarch64_pixel | 9 +++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 24898f7746c4..f149d8523964 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -347330,6 +347330,15 @@ elf_symbol { type_id: 0x9bdbdcc4 full_name: "__traceiter_android_rvh_setscheduler" } +elf_symbol { + id: 0x1228e7e9 + name: "__traceiter_android_rvh_setscheduler_prio" + is_defined: true + symbol_type: FUNCTION + crc: 0x116cab3c + type_id: 0x9bdbdcc4 + full_name: "__traceiter_android_rvh_setscheduler_prio" +} elf_symbol { id: 0x73c83ef4 name: "__traceiter_android_rvh_shmem_get_folio" @@ -352451,6 +352460,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_rvh_setscheduler" } +elf_symbol { + id: 0x8a4070f7 + name: "__tracepoint_android_rvh_setscheduler_prio" + is_defined: true + symbol_type: OBJECT + crc: 0xa79bc306 + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_rvh_setscheduler_prio" +} elf_symbol { id: 0x00b7ed82 name: "__tracepoint_android_rvh_shmem_get_folio" @@ -419162,6 +419180,7 @@ interface { symbol_id: 0x9b0cc890 symbol_id: 0x559e0725 symbol_id: 0xa01b20ce + symbol_id: 0x1228e7e9 symbol_id: 0x73c83ef4 symbol_id: 0x46515de8 symbol_id: 0x955e6fc1 @@ -419731,6 +419750,7 @@ interface { symbol_id: 0x42fff08e symbol_id: 0x74f29f73 symbol_id: 0xe48123a4 + symbol_id: 0x8a4070f7 symbol_id: 0x00b7ed82 symbol_id: 0xe8cacf26 symbol_id: 0xad588d93 diff --git a/android/abi_gki_aarch64_pixel b/android/abi_gki_aarch64_pixel index 5b8c7b54bde4..4678f40d85ef 100644 --- a/android/abi_gki_aarch64_pixel +++ b/android/abi_gki_aarch64_pixel @@ -1607,6 +1607,7 @@ param_ops_long param_ops_string param_ops_uint + param_ops_ullong param_ops_ulong param_set_copystring param_set_int @@ -2392,6 +2393,7 @@ __traceiter_android_rvh_set_cpus_allowed_by_task __traceiter_android_rvh_set_iowait __traceiter_android_rvh_setscheduler + __traceiter_android_rvh_setscheduler_prio __traceiter_android_rvh_set_task_cpu __traceiter_android_rvh_set_user_nice __traceiter_android_rvh_set_user_nice_locked @@ -2417,6 +2419,7 @@ __traceiter_android_vh_binder_proc_transaction_finish __traceiter_android_vh_binder_restore_priority __traceiter_android_vh_binder_set_priority + __traceiter_android_vh_calculate_totalreserve_pages __traceiter_android_vh_cpu_idle_enter __traceiter_android_vh_cpu_idle_exit __traceiter_android_vh_dump_throttled_rt_tasks @@ -2460,7 +2463,6 @@ __traceiter_android_vh_usb_dev_resume __traceiter_android_vh_use_amu_fie __traceiter_android_vh_vmscan_kswapd_done - __traceiter_android_vh_calculate_totalreserve_pages __traceiter_clock_set_rate __traceiter_cma_alloc_finish __traceiter_cma_alloc_start @@ -2533,6 +2535,7 @@ __tracepoint_android_rvh_set_cpus_allowed_by_task __tracepoint_android_rvh_set_iowait __tracepoint_android_rvh_setscheduler + __tracepoint_android_rvh_setscheduler_prio __tracepoint_android_rvh_set_task_cpu __tracepoint_android_rvh_set_user_nice __tracepoint_android_rvh_set_user_nice_locked @@ -2558,6 +2561,7 @@ __tracepoint_android_vh_binder_proc_transaction_finish __tracepoint_android_vh_binder_restore_priority __tracepoint_android_vh_binder_set_priority + __tracepoint_android_vh_calculate_totalreserve_pages __tracepoint_android_vh_cpu_idle_enter __tracepoint_android_vh_cpu_idle_exit __tracepoint_android_vh_dump_throttled_rt_tasks @@ -2601,7 +2605,6 @@ __tracepoint_android_vh_usb_dev_resume __tracepoint_android_vh_use_amu_fie __tracepoint_android_vh_vmscan_kswapd_done - __tracepoint_android_vh_calculate_totalreserve_pages __tracepoint_clock_set_rate __tracepoint_cma_alloc_finish __tracepoint_cma_alloc_start @@ -2737,7 +2740,9 @@ usb_function_register usb_function_unregister usb_gadget_activate + usb_gadget_connect usb_gadget_deactivate + usb_gadget_disconnect usb_gadget_set_state usb_gstrings_attach usb_hcd_is_primary_hcd