From 1c4f8fb026acba080e8aa98082ca636ef7eb671b Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 6 Mar 2025 15:23:54 -0800
Subject: [PATCH 01/44] UPSTREAM: net_sched: Prevent creation of classes with
 TC_H_ROOT

[ Upstream commit 0c3057a5a04d07120b3d0ec9c79568fceb9c921e ]

The function qdisc_tree_reduce_backlog() uses TC_H_ROOT as a termination
condition when traversing up the qdisc tree to update parent backlog
counters. However, if a class is created with classid TC_H_ROOT, the
traversal terminates prematurely at this class instead of reaching the
actual root qdisc, causing parent statistics to be incorrectly maintained.
In case of DRR, this could lead to a crash as reported by Mingi Cho.

Prevent the creation of any Qdisc class with classid TC_H_ROOT
(0xFFFFFFFF) across all qdisc types, as suggested by Jamal.

Bug: 403920173
Reported-by: Mingi Cho <mincho@theori.io>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Fixes: 066a3b5b2346 ("[NET_SCHED] sch_api: fix qdisc_tree_decrease_qlen() loop")
Link: https://patch.msgid.link/20250306232355.93864-2-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
(cherry picked from commit 78533c4a29ac3aeddce4b481770beaaa4f3bfb67)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: Ieac912ddc0bc44e999fe0d29ddf3a3842abdfa14

From 81ea45b132e6bcf969ce8005ed32e7b4bf89697d Mon Sep 17 00:00:00 2001
From: Bosser Ye <bo.ye@mediatek.com>
Date: Sun, 27 Apr 2025 10:51:29 +0800
Subject: [PATCH 02/44] ANDROID: GKI: Update symbol list for mtk

      1 function symbol(s) added
          'bool usb_check_int_endpoints(const struct usb_interface*, const u8*)'

Bug: 414032152
Change-Id: I74e2af13e5fcc7acd0ff060552f99485f5dda9f8
Signed-off-by: Bosser Ye <bo.ye@mediatek.com>
---
 android/abi_gki_aarch64.stg | 10 ++++++++++
 android/abi_gki_aarch64_mtk |  1 +
 2 files changed, 11 insertions(+)

diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg
index b50c2f469354..6c85b138870b 100644
--- a/android/abi_gki_aarch64.stg
+++ b/android/abi_gki_aarch64.stg
@@ -411986,6 +411986,15 @@ elf_symbol {
   type_id: 0xf38427c4
   full_name: "usb_check_bulk_endpoints"
 }
+elf_symbol {
+  id: 0xcf4d7b06
+  name: "usb_check_int_endpoints"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x18bb04b2
+  type_id: 0xf38427c4
+  full_name: "usb_check_int_endpoints"
+}
 elf_symbol {
   id: 0x23a5ab99
   name: "usb_choose_configuration"
@@ -426159,6 +426168,7 @@ interface {
   symbol_id: 0x3d66dcb8
   symbol_id: 0x1f68a496
   symbol_id: 0x12289dad
+  symbol_id: 0xcf4d7b06
   symbol_id: 0x23a5ab99
   symbol_id: 0x2a589f64
   symbol_id: 0x7da41bc7
diff --git a/android/abi_gki_aarch64_mtk b/android/abi_gki_aarch64_mtk
index 26727ae17c50..9bf16e696442 100644
--- a/android/abi_gki_aarch64_mtk
+++ b/android/abi_gki_aarch64_mtk
@@ -3229,6 +3229,7 @@
   usb_autopm_put_interface
   usb_autopm_put_interface_async
   usb_check_bulk_endpoints
+  usb_check_int_endpoints
   usb_clear_halt
   usb_composite_probe
   usb_composite_unregister

From 242f90b45e895bb0721721f486e2497054eeb0c1 Mon Sep 17 00:00:00 2001
From: Danylo Piliaiev <dpiliaiev@igalia.com>
Date: Sun, 26 Mar 2023 09:38:13 -0700
Subject: [PATCH 03/44] UPSTREAM: drm/msm: Rename drm_msm_gem_submit_reloc::or
 in C++ code

Clashes with C++ `or` keyword

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/528751/
Link: https://lore.kernel.org/r/20230326163813.535762-1-robdclark@gmail.com
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>

Bug: 409896277
Change-Id: Ib8894a7facce6e4f80d586575b3cffaf53a22a1e
(cherry picked from commit f1af066bcfd38daa9eee7195ef772dadaaa18520)
Signed-off-by: Mahadevan <quic_mahap@quicinc.com>
---
 include/uapi/drm/msm_drm.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 3c7b097c4e3d..16f2eb4b65ee 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -180,7 +180,11 @@ struct drm_msm_gem_cpu_fini {
  */
 struct drm_msm_gem_submit_reloc {
 	__u32 submit_offset;  /* in, offset from submit_bo */
+#ifdef __cplusplus
+	__u32 _or;            /* in, value OR'd with result */
+#else
 	__u32 or;             /* in, value OR'd with result */
+#endif
 	__s32 shift;          /* in, amount of left shift (can be negative) */
 	__u32 reloc_idx;      /* in, index of reloc_bo buffer */
 	__u64 reloc_offset;   /* in, offset from start of reloc_bo */

From 0c1a07d9c284e99188696b78ddd6faae7b402ae6 Mon Sep 17 00:00:00 2001
From: Seiya Wang <seiya.wang@mediatek.com>
Date: Wed, 30 Apr 2025 16:22:12 +0800
Subject: [PATCH 04/44] ANDROID: GKI: Update the symbol list for mtk

1 function symbol(s) added
  'ssize_t hdmi_audio_infoframe_pack_for_dp(const struct hdmi_audio_infoframe*, struct dp_sdp*, u8)'

Bug: 414724747
Change-Id: I15c9372703d94a787df4e6af83551ca183d4fae9
Signed-off-by: Seiya Wang <seiya.wang@mediatek.com>
---
 android/abi_gki_aarch64.stg | 88 +++++++++++++++++++++++++++++++++++++
 android/abi_gki_aarch64_mtk |  1 +
 2 files changed, 89 insertions(+)

diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg
index 6c85b138870b..1ec9533fbf5c 100644
--- a/android/abi_gki_aarch64.stg
+++ b/android/abi_gki_aarch64.stg
@@ -24638,6 +24638,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0xe5b47ef3
 }
+pointer_reference {
+  id: 0x340055b2
+  kind: POINTER
+  pointee_type_id: 0xfa41b054
+}
 pointer_reference {
   id: 0x34016e82
   kind: POINTER
@@ -26753,6 +26758,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0xca69055f
 }
+pointer_reference {
+  id: 0x380af085
+  kind: POINTER
+  pointee_type_id: 0xca6b248b
+}
 pointer_reference {
   id: 0x380c33d1
   kind: POINTER
@@ -35938,6 +35948,11 @@ qualified {
   qualifier: CONST
   qualified_type_id: 0x98e32fbd
 }
+qualified {
+  id: 0xfa41b054
+  qualifier: CONST
+  qualified_type_id: 0x994177db
+}
 qualified {
   id: 0xfa455c97
   qualifier: CONST
@@ -41974,6 +41989,29 @@ member {
   type_id: 0x92233392
   offset: 1024
 }
+member {
+  id: 0x9583c91c
+  name: "HB0"
+  type_id: 0x295c7202
+}
+member {
+  id: 0xd444664c
+  name: "HB1"
+  type_id: 0x295c7202
+  offset: 8
+}
+member {
+  id: 0x1603fa52
+  name: "HB2"
+  type_id: 0x295c7202
+  offset: 16
+}
+member {
+  id: 0x56c4b67f
+  name: "HB3"
+  type_id: 0x295c7202
+  offset: 24
+}
 member {
   id: 0xc31a401f
   name: "InOctetsDecrypted"
@@ -73029,6 +73067,12 @@ member {
   type_id: 0x295c7202
   offset: 112
 }
+member {
+  id: 0xd75b166e
+  name: "db"
+  type_id: 0x5e9b9471
+  offset: 32
+}
 member {
   id: 0xf540eeb1
   name: "db_off"
@@ -176291,6 +176335,11 @@ member {
   type_id: 0xc9082b19
   offset: 10176
 }
+member {
+  id: 0x0fed75b2
+  name: "sdp_header"
+  type_id: 0x90206a5a
+}
 member {
   id: 0x682990f8
   name: "sdp_max"
@@ -230244,6 +230293,28 @@ struct_union {
     member_id: 0x24d1edc6
   }
 }
+struct_union {
+  id: 0xca6b248b
+  kind: STRUCT
+  name: "dp_sdp"
+  definition {
+    bytesize: 36
+    member_id: 0x0fed75b2
+    member_id: 0xd75b166e
+  }
+}
+struct_union {
+  id: 0x90206a5a
+  kind: STRUCT
+  name: "dp_sdp_header"
+  definition {
+    bytesize: 4
+    member_id: 0x9583c91c
+    member_id: 0xd444664c
+    member_id: 0x1603fa52
+    member_id: 0x56c4b67f
+  }
+}
 struct_union {
   id: 0x33fed362
   kind: STRUCT
@@ -301796,6 +301867,13 @@ function {
   parameter_id: 0x25653b02
   parameter_id: 0x35d17e4b
 }
+function {
+  id: 0x191b8833
+  return_type_id: 0xd5cc9c9a
+  parameter_id: 0x340055b2
+  parameter_id: 0x380af085
+  parameter_id: 0x295c7202
+}
 function {
   id: 0x191d871c
   return_type_id: 0x48b5725f
@@ -378871,6 +378949,15 @@ elf_symbol {
   type_id: 0x1f9e005b
   full_name: "hdmi_audio_infoframe_pack"
 }
+elf_symbol {
+  id: 0x6392ceb7
+  name: "hdmi_audio_infoframe_pack_for_dp"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x7e0b255f
+  type_id: 0x191b8833
+  full_name: "hdmi_audio_infoframe_pack_for_dp"
+}
 elf_symbol {
   id: 0x0e1d2fa4
   name: "hdmi_avi_infoframe_check"
@@ -422488,6 +422575,7 @@ interface {
   symbol_id: 0x3163ad8e
   symbol_id: 0x78d85567
   symbol_id: 0xcd9a2048
+  symbol_id: 0x6392ceb7
   symbol_id: 0x0e1d2fa4
   symbol_id: 0x306e3b3d
   symbol_id: 0x684435da
diff --git a/android/abi_gki_aarch64_mtk b/android/abi_gki_aarch64_mtk
index 9bf16e696442..238637837c11 100644
--- a/android/abi_gki_aarch64_mtk
+++ b/android/abi_gki_aarch64_mtk
@@ -1155,6 +1155,7 @@
   handle_sysrq
   have_governor_per_policy
   hci_cmd_sync_status
+  hdmi_audio_infoframe_pack_for_dp
   hex2bin
   hex_asc
   hex_asc_upper

From 7b89b57429c2ec59cc879a864d6c5480ef6f3606 Mon Sep 17 00:00:00 2001
From: "Isaac J. Manjarres" <isaacmanjarres@google.com>
Date: Tue, 11 Mar 2025 23:16:10 -0700
Subject: [PATCH 05/44] ANDROID: mm/memfd-ashmem-shim: Simplify buffer name
 retrieval

The current way of getting the name for a buffer always requires a
buffer to be allocated for the name to be copied into. This is
inefficient, as names for shmem buffers are always stored in the
same field, and they do not change.

Therefore, simplify the name retrieval to just read the buffer name
from the field it is always stored in for shmem buffers. This also
aligns the code to what is present on the android16-6.12 branch.

Bug: 401214613
Bug: 111903542
Change-Id: Idd7b2d16601c890b78bd5705c92842bee470e75c
Signed-off-by: Isaac J. Manjarres <isaacmanjarres@google.com>
---
 mm/memfd-ashmem-shim.c | 57 +++++++++++-------------------------------
 1 file changed, 15 insertions(+), 42 deletions(-)

diff --git a/mm/memfd-ashmem-shim.c b/mm/memfd-ashmem-shim.c
index e09d95a8b274..258498cca9bb 100644
--- a/mm/memfd-ashmem-shim.c
+++ b/mm/memfd-ashmem-shim.c
@@ -16,58 +16,31 @@
 #include "memfd-ashmem-shim.h"
 #include "memfd-ashmem-shim-internal.h"
 
-/* file_path() returns the path of the file including the root, hence the additional "/". */
-#define MEMFD_PATH_PREFIX "/memfd:"
-#define MEMFD_PATH_PREFIX_LEN (sizeof(MEMFD_PATH_PREFIX) - 1)
+/* memfd file names all start with memfd: */
+#define MEMFD_PREFIX "memfd:"
+#define MEMFD_PREFIX_LEN (sizeof(MEMFD_PREFIX) - 1)
 
-/* All memfd files are unlinked, and are therefore suffixed with the " (deleted)" string. */
-#define UNLINKED_FILE_SUFFIX " (deleted)"
-#define UNLINKED_FILE_SUFFIX_LEN (sizeof(UNLINKED_FILE_SUFFIX) - 1)
-
-/*
- * 1 character for the start of the path (/), NAME_MAX for the maximum length of a full memfd file
- * name, UNLINKED_FILE_SUFFIX_LEN for the " (deleted)" suffix, and 1 for the NUL terminating
- * character.
- */
-#define MAX_FILE_PATH_SIZE (1 + NAME_MAX + UNLINKED_FILE_SUFFIX_LEN + 1)
-
-static char *get_memfd_file_name(struct file *file, char *buf, size_t size)
+static const char *get_memfd_name(struct file *file)
 {
-	char *name_end;
-	char *path = file_path(file, buf, size);
+	/* This pointer is always valid, so no need to check if it's NULL. */
+	const char *file_name = file->f_path.dentry->d_name.name;
 
-	if (IS_ERR(path))
-		return path;
+	if (file_name != strstr(file_name, MEMFD_PREFIX))
+		return NULL;
 
-	/* Only handle memfds; we cannot make assumptions about other file names. */
-	name_end = strstr(path, UNLINKED_FILE_SUFFIX);
-	if ((strstr(path, MEMFD_PATH_PREFIX) != path) || !name_end)
-		return ERR_PTR(-EINVAL);
-
-	/*
-	 * Since file_path() returns the full path of the file, including the root, the format will
-	 * be:
-	 *
-	 * "/memfd:testbuf (deleted)"
-	 *
-	 * But the ASHMEM_GET_NAME ioctl only returns the name of the buffer without any prefixes
-	 * or suffixes. So, terminate the string at the start of the " (deleted)" suffix so that
-	 * strlen() can be used on it from the start of the name.
-	 */
-	*name_end = '\0';
-
-	/* return a pointer to the start of the name */
-	return &path[MEMFD_PATH_PREFIX_LEN];
+	return file_name;
 }
 
 static long get_name(struct file *file, void __user *name)
 {
-	char buf[MAX_FILE_PATH_SIZE];
-	char *file_name = get_memfd_file_name(file, buf, sizeof(buf));
+	const char *file_name = get_memfd_name(file);
 	size_t len;
 
-	if (IS_ERR(file_name))
-		return PTR_ERR(file_name);
+	if (!file_name)
+		return -EINVAL;
+
+	/* Strip MEMFD_PREFIX to retain compatibility with ashmem driver. */
+	file_name = &file_name[MEMFD_PREFIX_LEN];
 
 	/*
 	 * The expectation is that the user provided buffer is ASHMEM_NAME_LEN in size, which is

From ca2f65da73b1771ac53c892a8eb3ac95c374d4f1 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 3 Apr 2025 14:16:31 -0700
Subject: [PATCH 06/44] UPSTREAM: codel: remove sch->q.qlen check before
 qdisc_tree_reduce_backlog()

[ Upstream commit 342debc12183b51773b3345ba267e9263bdfaaef ]

After making all ->qlen_notify() callbacks idempotent, now it is safe to
remove the check of qlen!=0 from both fq_codel_dequeue() and
codel_qdisc_dequeue().

Bug: 410432097
Reported-by: Gerrard Tai <gerrard.tai@starlabs.sg>
Fixes: 4b549a2ef4be ("fq_codel: Fair Queue Codel AQM")
Fixes: 76e3cc126bb2 ("codel: Controlled Delay AQM")
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250403211636.166257-1-xiyou.wangcong@gmail.com
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
(cherry picked from commit 4d55144b12e742404bb3f8fee6038bafbf45619d)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: I9508beb45995f003612895517ea391ef4beee7b4
---
 net/sched/sch_codel.c    | 5 +----
 net/sched/sch_fq_codel.c | 6 ++----
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index d7a4874543de..5f2e06815745 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -95,10 +95,7 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
 			    &q->stats, qdisc_pkt_len, codel_get_enqueue_time,
 			    drop_func, dequeue_func);
 
-	/* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
-	 * or HTB crashes. Defer it for next round.
-	 */
-	if (q->stats.drop_count && sch->q.qlen) {
+	if (q->stats.drop_count) {
 		qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len);
 		q->stats.drop_count = 0;
 		q->stats.drop_len = 0;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 8c4fee063436..9330923a624c 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -314,10 +314,8 @@ begin:
 	}
 	qdisc_bstats_update(sch, skb);
 	flow->deficit -= qdisc_pkt_len(skb);
-	/* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
-	 * or HTB crashes. Defer it for next round.
-	 */
-	if (q->cstats.drop_count && sch->q.qlen) {
+
+	if (q->cstats.drop_count) {
 		qdisc_tree_reduce_backlog(sch, q->cstats.drop_count,
 					  q->cstats.drop_len);
 		q->cstats.drop_count = 0;

From 48ab183a3ee2b787b0e0386200b350a58f204b3d Mon Sep 17 00:00:00 2001
From: Pierre Couillaud <pierre@broadcom.com>
Date: Tue, 6 May 2025 14:05:27 -0700
Subject: [PATCH 07/44] ANDROID: GKI: Update symbol list for bcmstb

INFO: 1 function symbol(s) added
  'int __hwspin_trylock(struct hwspinlock*, int, unsigned long*)'

Bug: 416077180
Change-Id: I7186f775675b14a2323eb62eabb7a05e485b7464
Signed-off-by: Pierre Couillaud <pierre@broadcom.com>
---
 android/abi_gki_aarch64.stg    | 17 +++++++++++++++++
 android/abi_gki_aarch64_bcmstb |  1 +
 2 files changed, 18 insertions(+)

diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg
index 1ec9533fbf5c..fdead9fe8009 100644
--- a/android/abi_gki_aarch64.stg
+++ b/android/abi_gki_aarch64.stg
@@ -335580,6 +335580,13 @@ function {
   parameter_id: 0x33b77109
   parameter_id: 0x3283ded6
 }
+function {
+  id: 0x9eb38da8
+  return_type_id: 0x6720d32f
+  parameter_id: 0x0ab9fa4c
+  parameter_id: 0x6720d32f
+  parameter_id: 0x064d6086
+}
 function {
   id: 0x9eb3dea3
   return_type_id: 0x6720d32f
@@ -344383,6 +344390,15 @@ elf_symbol {
   type_id: 0x9e215925
   full_name: "__hwspin_lock_timeout"
 }
+elf_symbol {
+  id: 0xac885058
+  name: "__hwspin_trylock"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0xe4d4ab7e
+  type_id: 0x9eb38da8
+  full_name: "__hwspin_trylock"
+}
 elf_symbol {
   id: 0x09a111a4
   name: "__hwspin_unlock"
@@ -418736,6 +418752,7 @@ interface {
   symbol_id: 0x58de7795
   symbol_id: 0x7874d435
   symbol_id: 0xc1db2428
+  symbol_id: 0xac885058
   symbol_id: 0x09a111a4
   symbol_id: 0x9ff710d8
   symbol_id: 0xee9e2392
diff --git a/android/abi_gki_aarch64_bcmstb b/android/abi_gki_aarch64_bcmstb
index e41670aeff26..446862098288 100644
--- a/android/abi_gki_aarch64_bcmstb
+++ b/android/abi_gki_aarch64_bcmstb
@@ -1604,6 +1604,7 @@
   sdhci_set_clock
   sdhci_set_uhs_signaling
   sdhci_setup_host
+  __hwspin_trylock
 
 # required by slcan.ko
   hex_asc_upper

From b29cc3971e2657571e64ba9fac9e95c64ac2c600 Mon Sep 17 00:00:00 2001
From: Krishna Kurapati <quic_kriskura@quicinc.com>
Date: Fri, 26 Apr 2024 10:35:12 +0530
Subject: [PATCH 08/44] UPSTREAM: usb: dwc3: core: Fix compile warning on s390
 gcc in dwc3_get_phy call

Recent commit introduced support for reading Multiport PHYs and
while doing so iterated over an integer variable which runs from
[0-254] in the worst case scenario. But S390 compiler treats it as a
warning and complains that the integer write to string can go to 11
characters. Fix this by modifying iterator variable to u8.

Bug: 254441685
Suggested-by: Johan Hovold <johan@kernel.org>
Fixes: 30a46746ca5a ("usb: dwc3: core: Refactor PHY logic to support Multiport Controller")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202404241215.Mib19Cu7-lkp@intel.com/
Signed-off-by: Krishna Kurapati <quic_kriskura@quicinc.com>
Reviewed-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20240426050512.57384-1-quic_kriskura@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 3f12222a4bebeb13ce06ddecc1610ad32fa835dd)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: If11b5b866842a9e94edfdfcaa7c4aea1f575e0d2
---
 drivers/usb/dwc3/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index b757fe1e5d96..1ec8522fc93d 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1480,7 +1480,7 @@ static int dwc3_core_get_phy(struct dwc3 *dwc)
 	struct phy		*temp_phy = NULL;
 	char phy_name[9];
 	int ret;
-	int i;
+	u8 i;
 
 	if (node) {
 		dwc->usb2_phy = devm_usb_get_phy_by_phandle(dev, "usb-phy", 0);
@@ -1510,7 +1510,7 @@ static int dwc3_core_get_phy(struct dwc3 *dwc)
 		if (vdwc->num_usb2_ports == 1)
 			snprintf(phy_name, sizeof(phy_name), "usb2-phy");
 		else
-			snprintf(phy_name, sizeof(phy_name),  "usb2-%d", i);
+			snprintf(phy_name, sizeof(phy_name),  "usb2-%u", i);
 
 		temp_phy = devm_phy_get(dev, phy_name);
 		if (IS_ERR(temp_phy)) {
@@ -1532,7 +1532,7 @@ static int dwc3_core_get_phy(struct dwc3 *dwc)
 		if (vdwc->num_usb3_ports == 1)
 			snprintf(phy_name, sizeof(phy_name), "usb3-phy");
 		else
-			snprintf(phy_name, sizeof(phy_name), "usb3-%d", i);
+			snprintf(phy_name, sizeof(phy_name), "usb3-%u", i);
 
 		temp_phy = devm_phy_get(dev, phy_name);
 		if (IS_ERR(temp_phy)) {

From b45e2c927411cd2e48dd5eae01165f2fe01ee27e Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 2 Oct 2024 14:22:23 +0200
Subject: [PATCH 09/44] UPSTREAM: PM: domains: Fix alloc/free in
 dev_pm_domain_attach|detach_list()

The dev_pm_domain_attach|detach_list() functions are not resource managed,
hence they should not use devm_* helpers to manage allocation/freeing of
data. Let's fix this by converting to the traditional alloc/free functions.

Bug: 254441685
Fixes: 161e16a5e50a ("PM: domains: Add helper functions to attach/detach multiple PM domains")
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://lore.kernel.org/r/20241002122232.194245-3-ulf.hansson@linaro.org
(cherry picked from commit 7738568885f2eaecfc10a3f530a2693e5f0ae3d0)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: If7138b246fcd6811001ba7b22c118b2e5132c463
---
 drivers/base/power/common.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index 299496e1381d..d853c75dda43 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -195,6 +195,7 @@ int dev_pm_domain_attach_list(struct device *dev,
 	struct device *pd_dev = NULL;
 	int ret, i, num_pds = 0;
 	bool by_id = true;
+	size_t size;
 	u32 pd_flags = data ? data->pd_flags : 0;
 	u32 link_flags = pd_flags & PD_FLAG_NO_DEV_LINK ? 0 :
 			DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME;
@@ -217,19 +218,17 @@ int dev_pm_domain_attach_list(struct device *dev,
 	if (num_pds <= 0)
 		return 0;
 
-	pds = devm_kzalloc(dev, sizeof(*pds), GFP_KERNEL);
+	pds = kzalloc(sizeof(*pds), GFP_KERNEL);
 	if (!pds)
 		return -ENOMEM;
 
-	pds->pd_devs = devm_kcalloc(dev, num_pds, sizeof(*pds->pd_devs),
-				    GFP_KERNEL);
-	if (!pds->pd_devs)
-		return -ENOMEM;
-
-	pds->pd_links = devm_kcalloc(dev, num_pds, sizeof(*pds->pd_links),
-				     GFP_KERNEL);
-	if (!pds->pd_links)
-		return -ENOMEM;
+	size = sizeof(*pds->pd_devs) + sizeof(*pds->pd_links);
+	pds->pd_devs = kcalloc(num_pds, size, GFP_KERNEL);
+	if (!pds->pd_devs) {
+		ret = -ENOMEM;
+		goto free_pds;
+	}
+	pds->pd_links = (void *)(pds->pd_devs + num_pds);
 
 	if (link_flags && pd_flags & PD_FLAG_DEV_LINK_ON)
 		link_flags |= DL_FLAG_RPM_ACTIVE;
@@ -272,6 +271,9 @@ err_attach:
 			device_link_del(pds->pd_links[i]);
 		dev_pm_domain_detach(pds->pd_devs[i], true);
 	}
+	kfree(pds->pd_devs);
+free_pds:
+	kfree(pds);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(dev_pm_domain_attach_list);
@@ -318,6 +320,9 @@ void dev_pm_domain_detach_list(struct dev_pm_domain_list *list)
 			device_link_del(list->pd_links[i]);
 		dev_pm_domain_detach(list->pd_devs[i], true);
 	}
+
+	kfree(list->pd_devs);
+	kfree(list);
 }
 EXPORT_SYMBOL_GPL(dev_pm_domain_detach_list);
 

From 7192539e3e36af08ac9d5159c3cb81bfe7f3af8b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 9 Oct 2024 16:51:03 +0200
Subject: [PATCH 10/44] UPSTREAM: serial: qcom-geni: revert broken hibernation
 support

This reverts commit 35781d8356a2eecaa6074ceeb80ee22e252fcdae.

Hibernation is not supported on Qualcomm platforms with mainline
kernels yet a broken vendor implementation for the GENI serial driver
made it upstream.

This is effectively dead code that cannot be tested and should just be
removed, but if these paths were ever hit for an open non-console port
they would crash the machine as the driver would fail to enable clocks
during restore() (i.e. all ports would have to be closed by drivers and
user space before hibernating the system to avoid this as a comment in
the code hinted at).

The broken implementation also added a random call to enable the
receiver in the port setup code where it does not belong and which
enables the receiver prematurely for console ports.

Bug: 254441685
Fixes: 35781d8356a2 ("tty: serial: qcom-geni-serial: Add support for Hibernation feature")
Cc: stable@vger.kernel.org	# 6.2
Cc: Aniket Randive <quic_arandive@quicinc.com>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20241009145110.16847-3-johan+linaro@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 19df76662a33d2f2fc41a66607cb8285fc02d6ec)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: I2ee5832b26e10ff03699e74a8f72d1c0393c9e22
---
 drivers/tty/serial/qcom_geni_serial.c | 41 ++-------------------------
 1 file changed, 2 insertions(+), 39 deletions(-)

diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index ae82136f0aeb..76ceff99ef5e 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -1118,7 +1118,6 @@ static int qcom_geni_serial_port_setup(struct uart_port *uport)
 			       false, true, true);
 	geni_se_init(&port->se, UART_RX_WM, port->rx_fifo_depth - 2);
 	geni_se_select_mode(&port->se, port->dev_data->mode);
-	qcom_geni_serial_start_rx(uport);
 	port->setup = true;
 
 	return 0;
@@ -1734,38 +1733,6 @@ static int qcom_geni_serial_sys_resume(struct device *dev)
 	return ret;
 }
 
-static int qcom_geni_serial_sys_hib_resume(struct device *dev)
-{
-	int ret = 0;
-	struct uart_port *uport;
-	struct qcom_geni_private_data *private_data;
-	struct qcom_geni_serial_port *port = dev_get_drvdata(dev);
-
-	uport = &port->uport;
-	private_data = uport->private_data;
-
-	if (uart_console(uport)) {
-		geni_icc_set_tag(&port->se, QCOM_ICC_TAG_ALWAYS);
-		geni_icc_set_bw(&port->se);
-		ret = uart_resume_port(private_data->drv, uport);
-		/*
-		 * For hibernation usecase clients for
-		 * console UART won't call port setup during restore,
-		 * hence call port setup for console uart.
-		 */
-		qcom_geni_serial_port_setup(uport);
-	} else {
-		/*
-		 * Peripheral register settings are lost during hibernation.
-		 * Update setup flag such that port setup happens again
-		 * during next session. Clients of HS-UART will close and
-		 * open the port during hibernation.
-		 */
-		port->setup = false;
-	}
-	return ret;
-}
-
 static const struct qcom_geni_device_data qcom_geni_console_data = {
 	.console = true,
 	.mode = GENI_SE_FIFO,
@@ -1777,12 +1744,8 @@ static const struct qcom_geni_device_data qcom_geni_uart_data = {
 };
 
 static const struct dev_pm_ops qcom_geni_serial_pm_ops = {
-	.suspend = pm_sleep_ptr(qcom_geni_serial_sys_suspend),
-	.resume = pm_sleep_ptr(qcom_geni_serial_sys_resume),
-	.freeze = pm_sleep_ptr(qcom_geni_serial_sys_suspend),
-	.poweroff = pm_sleep_ptr(qcom_geni_serial_sys_suspend),
-	.restore = pm_sleep_ptr(qcom_geni_serial_sys_hib_resume),
-	.thaw = pm_sleep_ptr(qcom_geni_serial_sys_hib_resume),
+	SYSTEM_SLEEP_PM_OPS(qcom_geni_serial_sys_suspend,
+					qcom_geni_serial_sys_resume)
 };
 
 static const struct of_device_id qcom_geni_serial_match_table[] = {

From a368123b90e52869829cd2fffec370c5224bf5d0 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 9 Oct 2024 16:51:05 +0200
Subject: [PATCH 11/44] UPSTREAM: serial: qcom-geni: fix dma rx cancellation

Make sure to wait for the DMA transfer to complete when cancelling the
rx command on stop_rx(). This specifically prevents the DMA completion
interrupt from firing after rx has been restarted, something which can
lead to an IOMMU fault and hosed rx when the interrupt handler unmaps
the DMA buffer for the new command:

	qcom_geni_serial 988000.serial: serial engine reports 0 RX bytes in!
	arm-smmu 15000000.iommu: FSR    = 00000402 [Format=2 TF], SID=0x563
	arm-smmu 15000000.iommu: FSYNR0 = 00210013 [S1CBNDX=33 WNR PLVL=3]
	Bluetooth: hci0: command 0xfc00 tx timeout
	Bluetooth: hci0: Reading QCA version information failed (-110)

Also add the missing state machine reset which is needed in case
cancellation fails.

Bug: 254441685
Fixes: 2aaa43c70778 ("tty: serial: qcom-geni-serial: add support for serial engine DMA")
Cc: stable@vger.kernel.org      # 6.3
Cc: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20241009145110.16847-5-johan+linaro@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 23ee4a25661c33e6381d41e848a9060ed6d72845)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: Ie7e9dd51669db7f90057c2535ee8b51814ea7e93
---
 drivers/tty/serial/qcom_geni_serial.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 76ceff99ef5e..31e938effbb7 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -764,17 +764,27 @@ static void qcom_geni_serial_start_rx_fifo(struct uart_port *uport)
 static void qcom_geni_serial_stop_rx_dma(struct uart_port *uport)
 {
 	struct qcom_geni_serial_port *port = to_dev_port(uport);
+	bool done;
 
 	if (!qcom_geni_serial_secondary_active(uport))
 		return;
 
 	geni_se_cancel_s_cmd(&port->se);
-	qcom_geni_serial_poll_bit(uport, SE_GENI_S_IRQ_STATUS,
-				  S_CMD_CANCEL_EN, true);
-
-	if (qcom_geni_serial_secondary_active(uport))
+	done = qcom_geni_serial_poll_bit(uport, SE_DMA_RX_IRQ_STAT,
+			RX_EOT, true);
+	if (done) {
+		writel(RX_EOT | RX_DMA_DONE,
+				uport->membase + SE_DMA_RX_IRQ_CLR);
+	} else {
 		qcom_geni_serial_abort_rx(uport);
 
+		writel(1, uport->membase + SE_DMA_RX_FSM_RST);
+		qcom_geni_serial_poll_bit(uport, SE_DMA_RX_IRQ_STAT,
+				RX_RESET_DONE, true);
+		writel(RX_RESET_DONE | RX_DMA_DONE,
+				uport->membase + SE_DMA_RX_IRQ_CLR);
+	}
+
 	if (port->rx_dma_addr) {
 		geni_se_rx_dma_unprep(&port->se, port->rx_dma_addr,
 				      DMA_RX_BUF_SIZE);

From 98cb57aeb332147a0b77c392d806b4c3b16ca1c0 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 9 Oct 2024 16:51:06 +0200
Subject: [PATCH 12/44] UPSTREAM: serial: qcom-geni: fix receiver enable

The receiver is supposed to be enabled in the startup() callback and not
in set_termios() which is called also during console setup.

This specifically avoids accepting input before the port has been opened
(and interrupts enabled), something which can also break the GENI
firmware (cancel fails and after abort, the "stale" counter handling
appears to be broken so that later input is not processed until twelve
chars have been received).

There also does not appear to be any need to keep the receiver disabled
while updating the port settings.

Since commit 6f3c3cafb115 ("serial: qcom-geni: disable interrupts during
console writes") the calls to manipulate the secondary interrupts, which
were done without holding the port lock, can also lead to the receiver
being left disabled when set_termios() races with the console code (e.g.
when init opens the tty during boot). This can manifest itself as a
serial getty not accepting input.

The calls to stop and start rx in set_termios() can similarly race with
DMA completion and, for example, cause the DMA buffer to be unmapped
twice or the mapping to be leaked.

Fix this by only enabling the receiver during startup and while holding
the port lock to avoid racing with the console code.

Bug: 254441685
Fixes: 6f3c3cafb115 ("serial: qcom-geni: disable interrupts during console writes")
Fixes: 2aaa43c70778 ("tty: serial: qcom-geni-serial: add support for serial engine DMA")
Fixes: c4f528795d1a ("tty: serial: msm_geni_serial: Add serial driver support for GENI based QUP")
Cc: stable@vger.kernel.org      # 6.3
Cc: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20241009145110.16847-6-johan+linaro@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit fa103d2599e11e802c818684cff821baefe7f206)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: Ie5771faa0adbf570c9f726031cb973d013e04cca
---
 drivers/tty/serial/qcom_geni_serial.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 31e938effbb7..dda017e40cb8 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -1143,6 +1143,11 @@ static int qcom_geni_serial_startup(struct uart_port *uport)
 		if (ret)
 			return ret;
 	}
+
+	uart_port_lock_irq(uport);
+	qcom_geni_serial_start_rx(uport);
+	uart_port_unlock_irq(uport);
+
 	enable_irq(uport->irq);
 
 	return 0;
@@ -1227,7 +1232,6 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport,
 	u32 ver, sampling_rate;
 	unsigned int avg_bw_core;
 
-	qcom_geni_serial_stop_rx(uport);
 	/* baud rate */
 	baud = uart_get_baud_rate(uport, termios, old, 300, 4000000);
 	port->baud = baud;
@@ -1244,7 +1248,7 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport,
 		dev_err(port->se.dev,
 			"Couldn't find suitable clock rate for %u\n",
 			baud * sampling_rate);
-		goto out_restart_rx;
+		return;
 	}
 
 	dev_dbg(port->se.dev, "desired_rate = %u, clk_rate = %lu, clk_div = %u\n",
@@ -1323,8 +1327,6 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport,
 	writel(stop_bit_len, uport->membase + SE_UART_TX_STOP_BIT_LEN);
 	writel(ser_clk_cfg, uport->membase + GENI_SER_M_CLK_CFG);
 	writel(ser_clk_cfg, uport->membase + GENI_SER_S_CLK_CFG);
-out_restart_rx:
-	qcom_geni_serial_start_rx(uport);
 }
 
 #ifdef CONFIG_SERIAL_QCOM_GENI_CONSOLE

From 34f1eb99850e7df61a91c58d2afd2a18a5cf91ad Mon Sep 17 00:00:00 2001
From: Wei Xu <weixugc@google.com>
Date: Mon, 14 Oct 2024 22:12:11 +0000
Subject: [PATCH 13/44] UPSTREAM: mm/mglru: only clear kswapd_failures if
 reclaimable

lru_gen_shrink_node() unconditionally clears kswapd_failures, which can
prevent kswapd from sleeping and cause 100% kswapd cpu usage even when
kswapd repeatedly fails to make progress in reclaim.

Only clear kswap_failures in lru_gen_shrink_node() if reclaim makes some
progress, similar to shrink_node().

I happened to run into this problem in one of my tests recently.  It
requires a combination of several conditions: The allocator needs to
allocate a right amount of pages such that it can wake up kswapd
without itself being OOM killed; there is no memory for kswapd to
reclaim (My test disables swap and cleans page cache first); no other
process frees enough memory at the same time.

Bug: 254441685
Link: https://lkml.kernel.org/r/20241014221211.832591-1-weixugc@google.com
Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists")
Signed-off-by: Wei Xu <weixugc@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Brian Geffon <bgeffon@google.com>
Cc: Jan Alexander Steffens <heftig@archlinux.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
(cherry picked from commit b130ba4a6259f6b64d8af15e9e7ab1e912bcb7ad)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: Ia2b4a0d71096d1e6cd0ee6054df3544724d4b665
---
 mm/vmscan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index c14a16044515..08e98c9f0a90 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5654,8 +5654,8 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
 
 	blk_finish_plug(&plug);
 done:
-	/* kswapd should never fail */
-	pgdat->kswapd_failures = 0;
+	if (sc->nr_reclaimed > reclaimed)
+		pgdat->kswapd_failures = 0;
 }
 
 /******************************************************************************

From 7c2011337f5bec67497f0bb527b9eceb9acab19d Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Thu, 17 Oct 2024 00:19:47 +0000
Subject: [PATCH 14/44] UPSTREAM: KVM: arm64: Ensure vgic_ready() is ordered
 against MMIO registration

kvm_vgic_map_resources() prematurely marks the distributor as 'ready',
potentially allowing vCPUs to enter the guest before the distributor's
MMIO registration has been made visible.

Plug the race by marking the distributor as ready only after MMIO
registration is completed. Rely on the implied ordering of
synchronize_srcu() to ensure the MMIO registration is visible before
vgic_dist::ready. This also means that writers to vgic_dist::ready are
now serialized by the slots_lock, which was effectively the case already
as all writers held the slots_lock in addition to the config_lock.

Bug: 254441685
Fixes: 59112e9c390b ("KVM: arm64: vgic: Fix a circular locking issue")
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20241017001947.2707312-3-oliver.upton@linux.dev
Signed-off-by: Marc Zyngier <maz@kernel.org>
(cherry picked from commit 78a00555550042ed77b33ace7423aced228b3b4e)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: I01a7bdc92bbfe8642829c0c8f5e1bb55e1aea18f
---
 arch/arm64/kvm/vgic/vgic-init.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 059d00c17d26..0c976b3dd2b4 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -482,14 +482,23 @@ int kvm_vgic_map_resources(struct kvm *kvm)
 	if (ret)
 		goto out;
 
-	dist->ready = true;
 	dist_base = dist->vgic_dist_base;
 	mutex_unlock(&kvm->arch.config_lock);
 
 	ret = vgic_register_dist_iodev(kvm, dist_base, type);
-	if (ret)
+	if (ret) {
 		kvm_err("Unable to register VGIC dist MMIO regions\n");
+		goto out_slots;
+	}
 
+	/*
+	 * kvm_io_bus_register_dev() guarantees all readers see the new MMIO
+	 * registration before returning through synchronize_srcu(), which also
+	 * implies a full memory barrier. As such, marking the distributor as
+	 * 'ready' here is guaranteed to be ordered after all vCPUs having seen
+	 * a completely configured distributor.
+	 */
+	dist->ready = true;
 	goto out_slots;
 out:
 	mutex_unlock(&kvm->arch.config_lock);

From c5abfe08fc657a2511592ee7c6a684b848af2def Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Tue, 15 Oct 2024 18:56:05 +0100
Subject: [PATCH 15/44] UPSTREAM: fork: do not invoke uffd on fork if error
 occurs

Patch series "fork: do not expose incomplete mm on fork".

During fork we may place the virtual memory address space into an
inconsistent state before the fork operation is complete.

In addition, we may encounter an error during the fork operation that
indicates that the virtual memory address space is invalidated.

As a result, we should not be exposing it in any way to external machinery
that might interact with the mm or VMAs, machinery that is not designed to
deal with incomplete state.

We specifically update the fork logic to defer khugepaged and ksm to the
end of the operation and only to be invoked if no error arose, and
disallow uffd from observing fork events should an error have occurred.

This patch (of 2):

Currently on fork we expose the virtual address space of a process to
userland unconditionally if uffd is registered in VMAs, regardless of
whether an error arose in the fork.

This is performed in dup_userfaultfd_complete() which is invoked
unconditionally, and performs two duties - invoking registered handlers
for the UFFD_EVENT_FORK event via dup_fctx(), and clearing down
userfaultfd_fork_ctx objects established in dup_userfaultfd().

This is problematic, because the virtual address space may not yet be
correctly initialised if an error arose.

The change in commit d24062914837 ("fork: use __mt_dup() to duplicate
maple tree in dup_mmap()") makes this more pertinent as we may be in a
state where entries in the maple tree are not yet consistent.

We address this by, on fork error, ensuring that we roll back state that
we would otherwise expect to clean up through the event being handled by
userland and perform the memory freeing duty otherwise performed by
dup_userfaultfd_complete().

We do this by implementing a new function, dup_userfaultfd_fail(), which
performs the same loop, only decrementing reference counts.

Note that we perform mmgrab() on the parent and child mm's, however
userfaultfd_ctx_put() will mmdrop() this once the reference count drops to
zero, so we will avoid memory leaks correctly here.

Bug: 254441685
Link: https://lkml.kernel.org/r/cover.1729014377.git.lorenzo.stoakes@oracle.com
Link: https://lkml.kernel.org/r/d3691d58bb58712b6fb3df2be441d175bd3cdf07.1729014377.git.lorenzo.stoakes@oracle.com
Fixes: d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()")
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reported-by: Jann Horn <jannh@google.com>
Reviewed-by: Jann Horn <jannh@google.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Linus Torvalds <torvalds@linuxfoundation.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
(cherry picked from commit f64e67e5d3a45a4a04286c47afade4b518acd47b)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: I9c2f774a0f4a0a75729b86c77c627fb38b8bb17b
---
 fs/userfaultfd.c              | 28 ++++++++++++++++++++++++++++
 include/linux/userfaultfd_k.h |  5 +++++
 kernel/fork.c                 |  5 ++++-
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index f36e6e018b26..e8baf3e8a86a 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -692,6 +692,34 @@ void dup_userfaultfd_complete(struct list_head *fcs)
 	}
 }
 
+void dup_userfaultfd_fail(struct list_head *fcs)
+{
+	struct userfaultfd_fork_ctx *fctx, *n;
+
+	/*
+	 * An error has occurred on fork, we will tear memory down, but have
+	 * allocated memory for fctx's and raised reference counts for both the
+	 * original and child contexts (and on the mm for each as a result).
+	 *
+	 * These would ordinarily be taken care of by a user handling the event,
+	 * but we are no longer doing so, so manually clean up here.
+	 *
+	 * mm tear down will take care of cleaning up VMA contexts.
+	 */
+	list_for_each_entry_safe(fctx, n, fcs, list) {
+		struct userfaultfd_ctx *octx = fctx->orig;
+		struct userfaultfd_ctx *ctx = fctx->new;
+
+		atomic_dec(&octx->mmap_changing);
+		VM_BUG_ON(atomic_read(&octx->mmap_changing) < 0);
+		userfaultfd_ctx_put(octx);
+		userfaultfd_ctx_put(ctx);
+
+		list_del(&fctx->list);
+		kfree(fctx);
+	}
+}
+
 void mremap_userfaultfd_prep(struct vm_area_struct *vma,
 			     struct vm_userfaultfd_ctx *vm_ctx)
 {
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 7d881c5df6ad..587f718ccc90 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -225,6 +225,7 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
 
 extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
 extern void dup_userfaultfd_complete(struct list_head *);
+void dup_userfaultfd_fail(struct list_head *);
 
 extern void mremap_userfaultfd_prep(struct vm_area_struct *,
 				    struct vm_userfaultfd_ctx *);
@@ -299,6 +300,10 @@ static inline void dup_userfaultfd_complete(struct list_head *l)
 {
 }
 
+static inline void dup_userfaultfd_fail(struct list_head *l)
+{
+}
+
 static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma,
 					   struct vm_userfaultfd_ctx *ctx)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 2258ff82172e..82cd80f04f9d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -806,7 +806,10 @@ out:
 	mmap_write_unlock(mm);
 	flush_tlb_mm(oldmm);
 	mmap_write_unlock(oldmm);
-	dup_userfaultfd_complete(&uf);
+	if (!retval)
+		dup_userfaultfd_complete(&uf);
+	else
+		dup_userfaultfd_fail(&uf);
 fail_uprobe_end:
 	uprobe_end_dup_mmap();
 	return retval;

From 1cc1e931722aa31c670505d112197cbca3d02831 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Tue, 10 Dec 2024 17:24:12 +0000
Subject: [PATCH 16/44] UPSTREAM: fork: avoid inappropriate uprobe access to
 invalid mm

If dup_mmap() encounters an issue, currently uprobe is able to access the
relevant mm via the reverse mapping (in build_map_info()), and if we are
very unlucky with a race window, observe invalid XA_ZERO_ENTRY state which
we establish as part of the fork error path.

This occurs because uprobe_write_opcode() invokes anon_vma_prepare() which
in turn invokes find_mergeable_anon_vma() that uses a VMA iterator,
invoking vma_iter_load() which uses the advanced maple tree API and thus
is able to observe XA_ZERO_ENTRY entries added to dup_mmap() in commit
d24062914837 ("fork: use __mt_dup() to duplicate maple tree in
dup_mmap()").

This change was made on the assumption that only process tear-down code
would actually observe (and make use of) these values.  However this very
unlikely but still possible edge case with uprobes exists and
unfortunately does make these observable.

The uprobe operation prevents races against the dup_mmap() operation via
the dup_mmap_sem semaphore, which is acquired via uprobe_start_dup_mmap()
and dropped via uprobe_end_dup_mmap(), and held across
register_for_each_vma() prior to invoking build_map_info() which does the
reverse mapping lookup.

Currently these are acquired and dropped within dup_mmap(), which exposes
the race window prior to error handling in the invoking dup_mm() which
tears down the mm.

We can avoid all this by just moving the invocation of
uprobe_start_dup_mmap() and uprobe_end_dup_mmap() up a level to dup_mm()
and only release this lock once the dup_mmap() operation succeeds or clean
up is done.

This means that the uprobe code can never observe an incompletely
constructed mm and resolves the issue in this case.

Bug: 254441685
Link: https://lkml.kernel.org/r/20241210172412.52995-1-lorenzo.stoakes@oracle.com
Fixes: d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()")
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reported-by: syzbot+2d788f4f7cb660dac4b7@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/6756d273.050a0220.2477f.003d.GAE@google.com/
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peng Zhang <zhangpeng.00@bytedance.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
(cherry picked from commit 8ac662f5da19f5873fdd94c48a5cdb45b2e1b58f)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: I915ed6b4f49d63d0d629dd8e9247d4684c664f3a
---
 kernel/fork.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 82cd80f04f9d..91357988f282 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -662,11 +662,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 	LIST_HEAD(uf);
 	MA_STATE(mas, &mm->mm_mt, 0, 0);
 
-	uprobe_start_dup_mmap();
-	if (mmap_write_lock_killable(oldmm)) {
-		retval = -EINTR;
-		goto fail_uprobe_end;
-	}
+	if (mmap_write_lock_killable(oldmm))
+		return -EINTR;
 	flush_cache_dup_mm(oldmm);
 	uprobe_dup_mmap(oldmm, mm);
 	/*
@@ -810,8 +807,6 @@ out:
 		dup_userfaultfd_complete(&uf);
 	else
 		dup_userfaultfd_fail(&uf);
-fail_uprobe_end:
-	uprobe_end_dup_mmap();
 	return retval;
 
 fail_nomem_anon_vma_fork:
@@ -1642,9 +1637,11 @@ static struct mm_struct *dup_mm(struct task_struct *tsk,
 	if (!mm_init(mm, tsk, mm->user_ns))
 		goto fail_nomem;
 
+	uprobe_start_dup_mmap();
 	err = dup_mmap(mm, oldmm);
 	if (err)
 		goto free_pt;
+	uprobe_end_dup_mmap();
 
 	mm->hiwater_rss = get_mm_rss(mm);
 	mm->hiwater_vm = mm->total_vm;
@@ -1659,6 +1656,8 @@ free_pt:
 	mm->binfmt = NULL;
 	mm_init_owner(mm, NULL);
 	mmput(mm);
+	if (err)
+		uprobe_end_dup_mmap();
 
 fail_nomem:
 	return NULL;

From 646380b087a5203408877933afdaa2b480c2bc57 Mon Sep 17 00:00:00 2001
From: Selvarasu Ganesan <selvarasu.g@samsung.com>
Date: Sat, 18 Jan 2025 11:31:33 +0530
Subject: [PATCH 17/44] UPSTREAM: usb: gadget: f_midi: Fixing wMaxPacketSize
 exceeded issue during MIDI bind retries

The current implementation sets the wMaxPacketSize of bulk in/out
endpoints to 1024 bytes at the end of the f_midi_bind function. However,
in cases where there is a failure in the first midi bind attempt,
consider rebinding. This scenario may encounter an f_midi_bind issue due
to the previous bind setting the bulk endpoint's wMaxPacketSize to 1024
bytes, which exceeds the ep->maxpacket_limit where configured dwc3 TX/RX
FIFO's maxpacket size of 512 bytes for IN/OUT endpoints in support HS
speed only.

Here the term "rebind" in this context refers to attempting to bind the
MIDI function a second time in certain scenarios. The situations where
rebinding is considered include:

 * When there is a failure in the first UDC write attempt, which may be
   caused by other functions bind along with MIDI.
 * Runtime composition change : Example : MIDI,ADB to MIDI. Or MIDI to
   MIDI,ADB.

This commit addresses this issue by resetting the wMaxPacketSize before
endpoint claim. And here there is no need to reset all values in the usb
endpoint descriptor structure, as all members except wMaxPacketSize and
bEndpointAddress have predefined values.

This ensures that restores the endpoint to its expected configuration,
and preventing conflicts with value of ep->maxpacket_limit. It also
aligns with the approach used in other function drivers, which treat
endpoint descriptors as if they were full speed before endpoint claim.

Bug: 254441685
Fixes: 46decc82ffd5 ("usb: gadget: unconditionally allocate hs/ss descriptor in bind operation")
Cc: stable@vger.kernel.org
Signed-off-by: Selvarasu Ganesan <selvarasu.g@samsung.com>
Link: https://lore.kernel.org/r/20250118060134.927-1-selvarasu.g@samsung.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 9e8b21410f310c50733f6e1730bae5a8e30d3570)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: I300e3f5aa42555faf1e3c97b716396a6f8c77770
---
 drivers/usb/gadget/function/f_midi.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 9c8d56a496c9..5f022db4fa71 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -906,6 +906,15 @@ static int f_midi_bind(struct usb_configuration *c, struct usb_function *f)
 
 	status = -ENODEV;
 
+	/*
+	 * Reset wMaxPacketSize with maximum packet size of FS bulk transfer before
+	 * endpoint claim. This ensures that the wMaxPacketSize does not exceed the
+	 * limit during bind retries where configured dwc3 TX/RX FIFO's maxpacket
+	 * size of 512 bytes for IN/OUT endpoints in support HS speed only.
+	 */
+	bulk_in_desc.wMaxPacketSize = cpu_to_le16(64);
+	bulk_out_desc.wMaxPacketSize = cpu_to_le16(64);
+
 	/* allocate instance-specific endpoints */
 	midi->in_ep = usb_ep_autoconfig(cdev->gadget, &bulk_in_desc);
 	if (!midi->in_ep)

From 53b26534cce792bd8a1040c0dcddd31a34194ab0 Mon Sep 17 00:00:00 2001
From: Norihiko Hama <Norihiko.Hama@alpsalpine.com>
Date: Wed, 15 May 2024 09:43:39 +0900
Subject: [PATCH 18/44] UPSTREAM: usb-storage: Optimize scan delay more
 precisely

Current storage scan delay is reduced by the following old commit.

a4a47bc03fe5 ("Lower USB storage settling delay to something more reasonable")

It means that delay is at least 'one second', or zero with delay_use=0.
'one second' is still long delay especially for embedded system but
when delay_use is set to 0 (no delay), still error observed on some USB drives.

So delay_use should not be set to 0 but 'one second' is quite long.
Especially for embedded system, it's important for end user
how quickly access to USB drive when it's connected.
That's why we have a chance to minimize such a constant long delay.

This patch optimizes scan delay more precisely
to minimize delay time but not to have any problems on USB drives
by extending module parameter 'delay_use' in milliseconds internally.
The parameter 'delay_use' optionally supports in milliseconds
if it ends with 'ms'.
It makes the range of value to 1 / 1000 in internal 32-bit value
but it's still enough to set the delay time.
By default, delay time is 'one second' for backward compatibility.

For example, it seems to be good by changing delay_use=100ms,
that is 100 millisecond delay without issues for most USB pen drives.

Bug: 408977963
Change-Id: I77521bc01a7dadaa5bb94aecd361f2507892928c
(cherry picked from commit 804da867ad016d53bf33373cfeaae041775455f1)
Signed-off-by: Norihiko Hama <Norihiko.Hama@alpsalpine.com>
Link: https://lore.kernel.org/r/20240515004339.29892-1-Norihiko.Hama@alpsalpine.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../admin-guide/kernel-parameters.txt         |   3 +
 drivers/usb/storage/usb.c                     | 101 +++++++++++++++++-
 2 files changed, 100 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e969f2f14cd1..6593773a512d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6690,6 +6690,9 @@
 	usb-storage.delay_use=
 			[UMS] The delay in seconds before a new device is
 			scanned for Logical Units (default 1).
+			Optionally the delay in milliseconds if the value has
+			suffix with "ms".
+			Example: delay_use=2567ms
 
 	usb-storage.quirks=
 			[UMS] A list of quirks entries to supplement or
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index ed7c6ad96a74..dc4e3983f7c5 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -67,9 +67,102 @@ MODULE_AUTHOR("Matthew Dharm <mdharm-usb@one-eyed-alien.net>");
 MODULE_DESCRIPTION("USB Mass Storage driver for Linux");
 MODULE_LICENSE("GPL");
 
-static unsigned int delay_use = 1;
-module_param(delay_use, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(delay_use, "seconds to delay before using a new device");
+static unsigned int delay_use = 1 * MSEC_PER_SEC;
+
+/**
+ * parse_delay_str - parse an unsigned decimal integer delay
+ * @str: String to parse.
+ * @ndecimals: Number of decimal to scale up.
+ * @suffix: Suffix string to parse.
+ * @val: Where to store the parsed value.
+ *
+ * Parse an unsigned decimal value in @str, optionally end with @suffix.
+ * Stores the parsed value in @val just as it is if @str ends with @suffix.
+ * Otherwise store the value scale up by 10^(@ndecimal).
+ *
+ * Returns 0 on success, a negative error code otherwise.
+ */
+static int parse_delay_str(const char *str, int ndecimals, const char *suffix,
+			unsigned int *val)
+{
+	int n, n2, l;
+	char buf[16];
+
+	l = strlen(suffix);
+	n = strlen(str);
+	if (n > 0 && str[n - 1] == '\n')
+		--n;
+	if (n >= l && !strncmp(&str[n - l], suffix, l)) {
+		n -= l;
+		n2 = 0;
+	} else
+		n2 = ndecimals;
+
+	if (n + n2 > sizeof(buf) - 1)
+		return -EINVAL;
+
+	memcpy(buf, str, n);
+	while (n2-- > 0)
+		buf[n++] = '0';
+	buf[n] = 0;
+
+	return kstrtouint(buf, 10, val);
+}
+
+/**
+ * format_delay_ms - format an integer value into a delay string
+ * @val: The integer value to format, scaled by 10^(@ndecimals).
+ * @ndecimals: Number of decimal to scale down.
+ * @suffix: Suffix string to format.
+ * @str: Where to store the formatted string.
+ * @size: The size of buffer for @str.
+ *
+ * Format an integer value in @val scale down by 10^(@ndecimals) without @suffix
+ * if @val is divisible by 10^(@ndecimals).
+ * Otherwise format a value in @val just as it is with @suffix
+ *
+ * Returns the number of characters written into @str.
+ */
+static int format_delay_ms(unsigned int val, int ndecimals, const char *suffix,
+			char *str, int size)
+{
+	u64 delay_ms = val;
+	unsigned int rem = do_div(delay_ms, int_pow(10, ndecimals));
+	int ret;
+
+	if (rem)
+		ret = scnprintf(str, size, "%u%s\n", val, suffix);
+	else
+		ret = scnprintf(str, size, "%u\n", (unsigned int)delay_ms);
+	return ret;
+}
+
+static int delay_use_set(const char *s, const struct kernel_param *kp)
+{
+	unsigned int delay_ms;
+	int ret;
+
+	ret = parse_delay_str(skip_spaces(s), 3, "ms", &delay_ms);
+	if (ret < 0)
+		return ret;
+
+	*((unsigned int *)kp->arg) = delay_ms;
+	return 0;
+}
+
+static int delay_use_get(char *s, const struct kernel_param *kp)
+{
+	unsigned int delay_ms = *((unsigned int *)kp->arg);
+
+	return format_delay_ms(delay_ms, 3, "ms", s, PAGE_SIZE);
+}
+
+static const struct kernel_param_ops delay_use_ops = {
+	.set = delay_use_set,
+	.get = delay_use_get,
+};
+module_param_cb(delay_use, &delay_use_ops, &delay_use, 0644);
+MODULE_PARM_DESC(delay_use, "time to delay before using a new device");
 
 static char quirks[128];
 module_param_string(quirks, quirks, sizeof(quirks), S_IRUGO | S_IWUSR);
@@ -1066,7 +1159,7 @@ int usb_stor_probe2(struct us_data *us)
 	if (delay_use > 0)
 		dev_dbg(dev, "waiting for device to settle before scanning\n");
 	queue_delayed_work(system_freezable_wq, &us->scan_dwork,
-			delay_use * HZ);
+			msecs_to_jiffies(delay_use));
 	return 0;
 
 	/* We come here if there are any problems */

From 3cd01bb5bd3786138b0613b342c8e7ea32d92851 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 16 Apr 2025 14:24:39 -0600
Subject: [PATCH 19/44] UPSTREAM: mm: Fix is_zero_page() usage in
 try_grab_page()

The backport of upstream commit c8070b787519 ("mm: Don't pin ZERO_PAGE
in pin_user_pages()") into v6.1.130 noted below in Fixes does not
account for commit 0f0892356fa1 ("mm: allow multiple error returns in
try_grab_page()"), which changed the return value of try_grab_page()
from bool to int.  Therefore returning 0, success in the upstream
version, becomes an error here.  Fix the return value.

Bug: 411256892
Fixes: 476c1dfefab8 ("mm: Don't pin ZERO_PAGE in pin_user_pages()")
Link: https://lore.kernel.org/all/Z_6uhLQjJ7SSzI13@eldamar.lan
Reported-by: Salvatore Bonaccorso <carnil@debian.org>
Reported-by: Milan Broz <gmazyland@gmail.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: stable@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: Sasha Levin <sashal@kernel.org>
Change-Id: I1a124e70161e48c9b3374aa3fc541c6a13ea1ff0
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit edde34b792edb58a65cf16971cf34b5619c0959a)
Signed-off-by: Will McVicker <willmcvicker@google.com>
---
 mm/gup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/gup.c b/mm/gup.c
index b1daaa9d89aa..76a2b0943e2d 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -232,7 +232,7 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags)
 		 * and it is used in a *lot* of places.
 		 */
 		if (is_zero_page(page))
-			return 0;
+			return true;
 
 		/*
 		 * Similar to try_grab_folio(): be sure to *also*

From 218e2bd245875f4ad97f836cabf5970c044eee0f Mon Sep 17 00:00:00 2001
From: Yabin Cui <yabinc@google.com>
Date: Thu, 8 May 2025 16:26:42 -0700
Subject: [PATCH 20/44] FROMGIT: perf/aux: Allocate non-contiguous AUX pages by
 default

perf always allocates contiguous AUX pages based on aux_watermark.
However, this contiguous allocation doesn't benefit all PMUs. For
instance, ARM SPE and TRBE operate with virtual pages, and Coresight
ETR allocates a separate buffer. For these PMUs, allocating contiguous
AUX pages unnecessarily exacerbates memory fragmentation. This
fragmentation can prevent their use on long-running devices.

This patch modifies the perf driver to be memory-friendly by default,
by allocating non-contiguous AUX pages. For PMUs requiring contiguous
pages (Intel BTS and some Intel PT), the existing
PERF_PMU_CAP_AUX_NO_SG capability can be used. For PMUs that don't
require but can benefit from contiguous pages (some Intel PT), a new
capability, PERF_PMU_CAP_AUX_PREFER_LARGE, is added to maintain their
existing behavior.

Bug: 393467632
(cherry picked from commit 18049c8cff9cc89daadc4df6975f7d9069638926
 git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core)
Change-Id: Iaff554201726bf271c7625a6df59fb35c6cfbc5d
Signed-off-by: Yabin Cui <yabinc@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: James Clark <james.clark@linaro.org>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20250508232642.148767-1-yabinc@google.com
---
 arch/x86/events/intel/pt.c  |  2 ++
 include/linux/perf_event.h  |  1 +
 kernel/events/ring_buffer.c | 29 ++++++++++++++++++++---------
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 7ee8dc80a359..5ce05f559c64 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1793,6 +1793,8 @@ static __init int pt_init(void)
 
 	if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
 		pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
+	else
+		pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_PREFER_LARGE;
 
 	pt_pmu.pmu.capabilities	|= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
 	pt_pmu.pmu.attr_groups		 = pt_attr_groups;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 92d866352f35..6ef9152c8348 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -286,6 +286,7 @@ struct perf_event;
 #define PERF_PMU_CAP_NO_EXCLUDE			0x0080
 #define PERF_PMU_CAP_AUX_OUTPUT			0x0100
 #define PERF_PMU_CAP_EXTENDED_HW_TYPE		0x0200
+#define PERF_PMU_CAP_AUX_PREFER_LARGE		0x0400
 
 struct perf_output_handle;
 
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 644dfed04926..b9c010a0e0fe 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -672,15 +672,23 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
 {
 	bool overwrite = !(flags & RING_BUFFER_WRITABLE);
 	int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu);
-	int ret = -ENOMEM, max_order;
+	bool use_contiguous_pages = event->pmu->capabilities & (
+		PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_PREFER_LARGE);
+	/*
+	 * Initialize max_order to 0 for page allocation. This allocates single
+	 * pages to minimize memory fragmentation. This is overridden if the
+	 * PMU needs or prefers contiguous pages (use_contiguous_pages = true).
+	 */
+	int max_order = 0;
+	int ret = -ENOMEM;
 
 	if (!has_aux(event))
 		return -EOPNOTSUPP;
 
 	if (!overwrite) {
 		/*
-		 * Watermark defaults to half the buffer, and so does the
-		 * max_order, to aid PMU drivers in double buffering.
+		 * Watermark defaults to half the buffer, to aid PMU drivers
+		 * in double buffering.
 		 */
 		if (!watermark)
 			watermark = min_t(unsigned long,
@@ -688,16 +696,19 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
 					  (unsigned long)nr_pages << (PAGE_SHIFT - 1));
 
 		/*
-		 * Use aux_watermark as the basis for chunking to
-		 * help PMU drivers honor the watermark.
+		 * If using contiguous pages, use aux_watermark as the basis
+		 * for chunking to help PMU drivers honor the watermark.
 		 */
-		max_order = get_order(watermark);
+		if (use_contiguous_pages)
+			max_order = get_order(watermark);
 	} else {
 		/*
-		 * We need to start with the max_order that fits in nr_pages,
-		 * not the other way around, hence ilog2() and not get_order.
+		 * If using contiguous pages, we need to start with the
+		 * max_order that fits in nr_pages, not the other way around,
+		 * hence ilog2() and not get_order.
 		 */
-		max_order = ilog2(nr_pages);
+		if (use_contiguous_pages)
+			max_order = ilog2(nr_pages);
 		watermark = 0;
 	}
 

From a0fa2316cce1bc23e788603d7a6d0a3db7b100d3 Mon Sep 17 00:00:00 2001
From: Yabin Cui <yabinc@google.com>
Date: Thu, 15 May 2025 14:40:02 -0700
Subject: [PATCH 21/44] ANDROID: ABI: Update pixel symbol list

Adding the following symbols:
  - irq_check_status_bit
  - irq_get_percpu_devid_partition
  - irq_work_run
  - perf_aux_output_skip
  - this_cpu_has_cap

Bug: 393467632
Change-Id: I8e9f34b6b40ec078586d175efb835a6898cbc4f1
Signed-off-by: Yabin Cui <yabinc@google.com>
---
 android/abi_gki_aarch64.stg   | 42 +++++++++++++++++++++++++++++++++++
 android/abi_gki_aarch64_pixel |  5 +++++
 2 files changed, 47 insertions(+)

diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg
index fdead9fe8009..97585255cc27 100644
--- a/android/abi_gki_aarch64.stg
+++ b/android/abi_gki_aarch64.stg
@@ -313053,6 +313053,12 @@ function {
   return_type_id: 0x3e10b518
   parameter_id: 0x6720d32f
 }
+function {
+  id: 0x8c02526b
+  return_type_id: 0x6720d32f
+  parameter_id: 0x4585663f
+  parameter_id: 0x38fa32ef
+}
 function {
   id: 0x8c19f874
   return_type_id: 0x6720d32f
@@ -337217,6 +337223,12 @@ function {
   parameter_id: 0x0d30b9c3
   parameter_id: 0x15a30023
 }
+function {
+  id: 0x9fe48d4f
+  return_type_id: 0x6720d32f
+  parameter_id: 0x0aa6efc8
+  parameter_id: 0x33756485
+}
 function {
   id: 0x9fe6297b
   return_type_id: 0x6720d32f
@@ -383195,6 +383207,15 @@ elf_symbol {
   type_id: 0x8d53ba62
   full_name: "irq_get_irqchip_state"
 }
+elf_symbol {
+  id: 0xe9124d83
+  name: "irq_get_percpu_devid_partition"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x6cfe3b6f
+  type_id: 0x8c02526b
+  full_name: "irq_get_percpu_devid_partition"
+}
 elf_symbol {
   id: 0x28fabc56
   name: "irq_modify_status"
@@ -393203,6 +393224,15 @@ elf_symbol {
   type_id: 0x107966af
   full_name: "perf_aux_output_flag"
 }
+elf_symbol {
+  id: 0x4e76f69f
+  name: "perf_aux_output_skip"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x5a8de386
+  type_id: 0x9fe48d4f
+  full_name: "perf_aux_output_skip"
+}
 elf_symbol {
   id: 0x84bf9f80
   name: "perf_event_addr_filters_sync"
@@ -408543,6 +408573,15 @@ elf_symbol {
   type_id: 0x95a406a6
   full_name: "thermal_zone_unbind_cooling_device"
 }
+elf_symbol {
+  id: 0xc20a8a50
+  name: "this_cpu_has_cap"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x46e67a71
+  type_id: 0xefc5028b
+  full_name: "this_cpu_has_cap"
+}
 elf_symbol {
   id: 0x68e98442
   name: "thread_group_cputime_adjusted"
@@ -423062,6 +423101,7 @@ interface {
   symbol_id: 0x2ed6bfeb
   symbol_id: 0xa9c80d6c
   symbol_id: 0xb1d265b1
+  symbol_id: 0xe9124d83
   symbol_id: 0x28fabc56
   symbol_id: 0x68e07680
   symbol_id: 0xcd991820
@@ -424174,6 +424214,7 @@ interface {
   symbol_id: 0xf9e83d36
   symbol_id: 0x80bebca0
   symbol_id: 0xe207c73b
+  symbol_id: 0x4e76f69f
   symbol_id: 0x84bf9f80
   symbol_id: 0xea4938d9
   symbol_id: 0xdc02a166
@@ -425879,6 +425920,7 @@ interface {
   symbol_id: 0x66782435
   symbol_id: 0x793a755b
   symbol_id: 0x61ab1273
+  symbol_id: 0xc20a8a50
   symbol_id: 0x68e98442
   symbol_id: 0x1cf36c3c
   symbol_id: 0x9fc8421c
diff --git a/android/abi_gki_aarch64_pixel b/android/abi_gki_aarch64_pixel
index 8c48602bccf2..5b8c7b54bde4 100644
--- a/android/abi_gki_aarch64_pixel
+++ b/android/abi_gki_aarch64_pixel
@@ -1198,6 +1198,7 @@
   __ipv6_addr_type
   __irq_alloc_descs
   __irq_apply_affinity_hint
+  irq_check_status_bit
   irq_create_mapping_affinity
   irq_create_of_mapping
   __irq_domain_add
@@ -1208,6 +1209,7 @@
   irq_domain_xlate_twocell
   irq_force_affinity
   irq_get_irq_data
+  irq_get_percpu_devid_partition
   irq_modify_status
   irq_of_parse_and_map
   __irq_resolve_mapping
@@ -1219,6 +1221,7 @@
   irq_set_irq_wake
   irq_to_desc
   irq_work_queue
+  irq_work_run
   irq_work_sync
   is_vmalloc_addr
   jiffies
@@ -1651,6 +1654,7 @@
   perf_aux_output_begin
   perf_aux_output_end
   perf_aux_output_flag
+  perf_aux_output_skip
   perf_event_addr_filters_sync
   perf_event_create_kernel_counter
   perf_event_disable
@@ -2337,6 +2341,7 @@
   thermal_zone_device_update
   thermal_zone_get_temp
   thermal_zone_get_zone_by_name
+  this_cpu_has_cap
   thread_group_cputime_adjusted
   tick_nohz_get_idle_calls_cpu
   time64_to_tm

From 3e7cb920f1676ddf15f979d5b32e111384f7b46c Mon Sep 17 00:00:00 2001
From: Octavian Purdila <tavip@google.com>
Date: Mon, 7 Apr 2025 13:24:07 -0700
Subject: [PATCH 22/44] UPSTREAM: net_sched: sch_sfq: use a temporary work area
 for validating configuration

[ Upstream commit 8c0cea59d40cf6dd13c2950437631dd614fbade6 ]

Many configuration parameters have influence on others (e.g. divisor
-> flows -> limit, depth -> limit) and so it is difficult to correctly
do all of the validation before applying the configuration. And if a
validation error is detected late it is difficult to roll back a
partially applied configuration.

To avoid these issues use a temporary work area to update and validate
the configuration and only then apply the configuration to the
internal state.

Bug: 413623519
Signed-off-by: Octavian Purdila <tavip@google.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Stable-dep-of: b3bf8f63e617 ("net_sched: sch_sfq: move the limit validation")
Signed-off-by: Sasha Levin <sashal@kernel.org>
(cherry picked from commit 70449ca40609ec77f58b93ed154d54e1fdb197b6)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: Icab9dc62eddd23f6a2c5d06dd1f8457294716fb8
---
 net/sched/sch_sfq.c | 56 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 12 deletions(-)

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 60754f366ab7..68e909e8fabd 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -631,6 +631,15 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
 	struct red_parms *p = NULL;
 	struct sk_buff *to_free = NULL;
 	struct sk_buff *tail = NULL;
+	unsigned int maxflows;
+	unsigned int quantum;
+	unsigned int divisor;
+	int perturb_period;
+	u8 headdrop;
+	u8 maxdepth;
+	int limit;
+	u8 flags;
+
 
 	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
 		return -EINVAL;
@@ -656,36 +665,59 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
 		NL_SET_ERR_MSG_MOD(extack, "invalid limit");
 		return -EINVAL;
 	}
+
 	sch_tree_lock(sch);
+
+	limit = q->limit;
+	divisor = q->divisor;
+	headdrop = q->headdrop;
+	maxdepth = q->maxdepth;
+	maxflows = q->maxflows;
+	perturb_period = q->perturb_period;
+	quantum = q->quantum;
+	flags = q->flags;
+
+	/* update and validate configuration */
 	if (ctl->quantum)
-		q->quantum = ctl->quantum;
-	WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
+		quantum = ctl->quantum;
+	perturb_period = ctl->perturb_period * HZ;
 	if (ctl->flows)
-		q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
+		maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
 	if (ctl->divisor) {
-		q->divisor = ctl->divisor;
-		q->maxflows = min_t(u32, q->maxflows, q->divisor);
+		divisor = ctl->divisor;
+		maxflows = min_t(u32, maxflows, divisor);
 	}
 	if (ctl_v1) {
 		if (ctl_v1->depth)
-			q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
+			maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
 		if (p) {
-			swap(q->red_parms, p);
-			red_set_parms(q->red_parms,
+			red_set_parms(p,
 				      ctl_v1->qth_min, ctl_v1->qth_max,
 				      ctl_v1->Wlog,
 				      ctl_v1->Plog, ctl_v1->Scell_log,
 				      NULL,
 				      ctl_v1->max_P);
 		}
-		q->flags = ctl_v1->flags;
-		q->headdrop = ctl_v1->headdrop;
+		flags = ctl_v1->flags;
+		headdrop = ctl_v1->headdrop;
 	}
 	if (ctl->limit) {
-		q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
-		q->maxflows = min_t(u32, q->maxflows, q->limit);
+		limit = min_t(u32, ctl->limit, maxdepth * maxflows);
+		maxflows = min_t(u32, maxflows, limit);
 	}
 
+	/* commit configuration */
+	q->limit = limit;
+	q->divisor = divisor;
+	q->headdrop = headdrop;
+	q->maxdepth = maxdepth;
+	q->maxflows = maxflows;
+	WRITE_ONCE(q->perturb_period, perturb_period);
+	q->quantum = quantum;
+	q->flags = flags;
+	if (p)
+		swap(q->red_parms, p);
+
 	qlen = sch->q.qlen;
 	while (sch->q.qlen > q->limit) {
 		dropped += sfq_drop(sch, &to_free);

From 228e0f23bdeb774233bafc0383084518ec6f2ad1 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <tavip@google.com>
Date: Mon, 7 Apr 2025 13:24:08 -0700
Subject: [PATCH 23/44] UPSTREAM: net_sched: sch_sfq: move the limit validation

[ Upstream commit b3bf8f63e6179076b57c9de660c9f80b5abefe70 ]

It is not sufficient to directly validate the limit on the data that
the user passes as it can be updated based on how the other parameters
are changed.

Move the check at the end of the configuration update process to also
catch scenarios where the limit is indirectly updated, for example
with the following configurations:

tc qdisc add dev dummy0 handle 1: root sfq limit 2 flows 1 depth 1
tc qdisc add dev dummy0 handle 1: root sfq limit 2 flows 1 divisor 1

This fixes the following syzkaller reported crash:

------------[ cut here ]------------
UBSAN: array-index-out-of-bounds in net/sched/sch_sfq.c:203:6
index 65535 is out of range for type 'struct sfq_head[128]'
CPU: 1 UID: 0 PID: 3037 Comm: syz.2.16 Not tainted 6.14.0-rc2-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 12/27/2024
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:94 [inline]
 dump_stack_lvl+0x201/0x300 lib/dump_stack.c:120
 ubsan_epilogue lib/ubsan.c:231 [inline]
 __ubsan_handle_out_of_bounds+0xf5/0x120 lib/ubsan.c:429
 sfq_link net/sched/sch_sfq.c:203 [inline]
 sfq_dec+0x53c/0x610 net/sched/sch_sfq.c:231
 sfq_dequeue+0x34e/0x8c0 net/sched/sch_sfq.c:493
 sfq_reset+0x17/0x60 net/sched/sch_sfq.c:518
 qdisc_reset+0x12e/0x600 net/sched/sch_generic.c:1035
 tbf_reset+0x41/0x110 net/sched/sch_tbf.c:339
 qdisc_reset+0x12e/0x600 net/sched/sch_generic.c:1035
 dev_reset_queue+0x100/0x1b0 net/sched/sch_generic.c:1311
 netdev_for_each_tx_queue include/linux/netdevice.h:2590 [inline]
 dev_deactivate_many+0x7e5/0xe70 net/sched/sch_generic.c:1375

Bug: 413623519
Reported-by: syzbot <syzkaller@googlegroups.com>
Fixes: 10685681bafc ("net_sched: sch_sfq: don't allow 1 packet limit")
Signed-off-by: Octavian Purdila <tavip@google.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
(cherry picked from commit f86293adce0c201cfabb283ef9d6f21292089bb8)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: Ie5fc222b52c59eaa1070cc03402f8a624af60cd9
---
 net/sched/sch_sfq.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 68e909e8fabd..002941d35b64 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -661,10 +661,6 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
 		if (!p)
 			return -ENOMEM;
 	}
-	if (ctl->limit == 1) {
-		NL_SET_ERR_MSG_MOD(extack, "invalid limit");
-		return -EINVAL;
-	}
 
 	sch_tree_lock(sch);
 
@@ -705,6 +701,12 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
 		limit = min_t(u32, ctl->limit, maxdepth * maxflows);
 		maxflows = min_t(u32, maxflows, limit);
 	}
+	if (limit == 1) {
+		sch_tree_unlock(sch);
+		kfree(p);
+		NL_SET_ERR_MSG_MOD(extack, "invalid limit");
+		return -EINVAL;
+	}
 
 	/* commit configuration */
 	q->limit = limit;

From e30317e116ff3009a12abaa710008ff88f168188 Mon Sep 17 00:00:00 2001
From: Kalesh Singh <kaleshsingh@google.com>
Date: Mon, 19 May 2025 11:10:04 -0700
Subject: [PATCH 24/44] ANDROID: 16K: Remove ELF padding entry from map_file
 ranges
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Symbolization techniques use address ranges as reported in /proc/*/maps
to infer the corresponding /proc/*/map_files/ entry.

Per Daniel, this is done because the path in /proc/*/maps is problematic
for at least two reasons:

    1. The file could have been deleted from the file system (this is
       indicated with the  (deleted) suffix), meaning that you can't
       actually open it through the "regular" file system. However,
       while the mapping is alive, the kernel keeps the inode accessible
       via the corresponding /proc/*/map_files entry, allowing for
       access after all.

    2. It makes dealing with changed root and file system namespaces
       much more painful. The /proc/*/maps path is relative, and so now
       you need to concatenate paths etc. Accessing file through
       /proc/*/map_files just works (assuming necessary permissions), as
       the kernel redirects the request to the proper inode,
       irrespective of how it is exposed through the non-proc
       filesystem.

Android extends ELF padding regions to be contiguously mapped in memory
to mitigate increase in unreclaimable VMA slab memory usage.

Commit 8c2a805a857914324b077708b45c31c2f20d02da [1] emulates the padding
region of such extended mappings to be outputted as PROT_NONE
[page size compat] entries from /proc/*/[s]maps. This breaks the use
case of /proc/*/maps_files/, as the ranges in /proc/*/map_files/ are
the true ranges of the actual underlying VMA layout; while those in
/proc/*/[s]maps are the emulated (shortened) ranges.

Remove the padding (extended) ranges from /proc/*/maps_files entries.

====== Example Output ======

=== maps ===

❯ adb shell cat /proc/1/maps | grep -A1 libdl_android.so | sed '$d'

7f76663df000-7f76663e0000 r--p 00000000 fe:09 1911                       /system/lib64/bootstrap/libdl_android.so
7f76663e0000-7f76663e3000 ---p 00000000 00:00 0                          [page size compat]
7f76663e3000-7f76663e4000 r-xp 00004000 fe:09 1911                       /system/lib64/bootstrap/libdl_android.so
7f76663e4000-7f76663e7000 ---p 00000000 00:00 0                          [page size compat]
7f76663e7000-7f76663e8000 r--p 00008000 fe:09 1911                       /system/lib64/bootstrap/libdl_android.s

=== map_files - Before patch ===

❯ adb shell ls /proc/1/map_files | grep -A2 7f76663df000

7f76663df000-7f76663e3000
7f76663e3000-7f76663e7000
7f76663e7000-7f76663e8000

=== map_files - After patch ===

❯ adb shell ls /proc/1/map_files | grep -A2 7f76663df000

7f76663df000-7f76663e0000
7f76663e3000-7f76663e4000
7f76663e7000-7f76663e8000

[1] https://android.googlesource.com/kernel/common/+/8c2a805a857914324b077708b45c31c2f20d02da

Bug: 418042003
Change-Id: I0f6d703715a0e709fa1d4bd52241b5fd913dd55e
Reported-by: Daniel Müller <deso@posteo.net>
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
---
 fs/proc/base.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 77b3b1efe43d..bf14f94df793 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -65,6 +65,7 @@
 #include <linux/namei.h>
 #include <linux/mnt_namespace.h>
 #include <linux/mm.h>
+#include <linux/pgsize_migration.h>
 #include <linux/swap.h>
 #include <linux/rcupdate.h>
 #include <linux/kallsyms.h>
@@ -2476,7 +2477,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
 		}
 
 		p->start = vma->vm_start;
-		p->end = vma->vm_end;
+		p->end = VMA_PAD_START(vma);
 		p->mode = vma->vm_file->f_mode;
 	}
 	mmap_read_unlock(mm);

From ad7902a401f68e107b74c3543650798f454740b2 Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Fri, 9 May 2025 10:09:12 +1200
Subject: [PATCH 25/44] BACKPORT: mm: userfaultfd: correct dirty flags set for
 both present and swap pte
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As David pointed out, what truly matters for mremap and userfaultfd move
operations is the soft dirty bit.  The current comment and
implementation—which always sets the dirty bit for present PTEs and
fails to set the soft dirty bit for swap PTEs—are incorrect.  This could
break features like Checkpoint-Restore in Userspace (CRIU).

This patch updates the behavior to correctly set the soft dirty bit for
both present and swap PTEs in accordance with mremap.

Link: https://lkml.kernel.org/r/20250508220912.7275-1-21cnbao@gmail.com
Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI")
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Reported-by: David Hildenbrand <david@redhat.com>
Closes: https://lore.kernel.org/linux-mm/02f14ee1-923f-47e3-a994-4950afb9afcc@redhat.com/
Acked-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Cc: Lokesh Gidra <lokeshgidra@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
(cherry picked from commit 75cb1cca2c880179a11c7dd9380b6f14e41a06a4)

Merge Conflicts:
1. pte_mkwrite() doesn't take vma as second argument, so removed it.

Change-Id: I5fc25f9028ad7972ea1b6d873f072fd15f9c7214
Signed-off-by: Lokesh Gidra <lokeshgidra@google.com>
---
 mm/userfaultfd.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index b45edacc7436..468747538b41 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -966,8 +966,13 @@ static int move_present_pte(struct mm_struct *mm,
 	WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
 
 	orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot);
-	/* Follow mremap() behavior and treat the entry dirty after the move */
-	orig_dst_pte = pte_mkwrite(pte_mkdirty(orig_dst_pte));
+	/* Set soft dirty bit so userspace can notice the pte was moved */
+#ifdef CONFIG_MEM_SOFT_DIRTY
+	orig_dst_pte = pte_mksoft_dirty(orig_dst_pte);
+#endif
+	if (pte_dirty(orig_src_pte))
+		orig_dst_pte = pte_mkdirty(orig_dst_pte);
+	orig_dst_pte = pte_mkwrite(orig_dst_pte);
 
 	set_pte_at(mm, dst_addr, dst_pte, orig_dst_pte);
 out:
@@ -1001,6 +1006,9 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma,
 	}
 
 	orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte);
+#ifdef CONFIG_MEM_SOFT_DIRTY
+	orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte);
+#endif
 	set_pte_at(mm, dst_addr, dst_pte, orig_src_pte);
 	double_pt_unlock(dst_ptl, src_ptl);
 

From 58b3f63bc69f12b923960b99683f6af8527a82f7 Mon Sep 17 00:00:00 2001
From: Marcus Ma <maminghui5@xiaomi.corp-partner.google.com>
Date: Mon, 19 May 2025 21:04:39 +0800
Subject: [PATCH 26/44] ANDROID: vendor_hooks: Add hooks for pcp related
 optimization.

We want to make some optimizations to the pcp buffer. First, when directly recycling, we skip drain_all_pages when it is known that the pcp buffer is small to reduce zone->lock contention. In addition, the default pcp buffer size is still relatively small for mobile phones with large memory. We want to increase the pcp buffer area to reduce zone->lock contention.

Bug: 418695654

Change-Id: I38c7a3715500918d839e4363bbcc41cdbf4bd643
Signed-off-by: Marcus Ma <maminghui5@xiaomi.corp-partner.google.com>
---
 drivers/android/vendor_hooks.c | 2 ++
 include/trace/hooks/mm.h       | 8 ++++++++
 mm/page_alloc.c                | 7 ++++++-
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/android/vendor_hooks.c b/drivers/android/vendor_hooks.c
index 77dd1eb1b1f3..84184cc4a83e 100644
--- a/drivers/android/vendor_hooks.c
+++ b/drivers/android/vendor_hooks.c
@@ -481,3 +481,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_filemap_map_pages_range);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_vprintk_store);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_folio_referenced_check_bypass);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_calculate_totalreserve_pages);
+EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_drain_all_pages_bypass);
+EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_pageset_update);
diff --git a/include/trace/hooks/mm.h b/include/trace/hooks/mm.h
index fdbdd8080370..0e0403625dbf 100644
--- a/include/trace/hooks/mm.h
+++ b/include/trace/hooks/mm.h
@@ -315,6 +315,14 @@ DECLARE_HOOK(android_vh_filemap_map_pages_range,
 DECLARE_HOOK(android_vh_calculate_totalreserve_pages,
 	TP_PROTO(bool *skip),
 	TP_ARGS(skip));
+DECLARE_HOOK(android_vh_drain_all_pages_bypass,
+	TP_PROTO(gfp_t gfp_mask, unsigned int order, unsigned long alloc_flags,
+		int migratetype, unsigned long did_some_progress,
+		bool *bypass),
+	TP_ARGS(gfp_mask, order, alloc_flags, migratetype, did_some_progress, bypass));
+DECLARE_HOOK(android_vh_pageset_update,
+	TP_PROTO(unsigned long *high, unsigned long *batch),
+	TP_ARGS(high, batch));
 #endif /* _TRACE_HOOK_MM_H */
 
 /* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ffc6abc47849..a1605834867e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5026,6 +5026,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
 	struct page *page = NULL;
 	unsigned long pflags;
 	bool drained = false;
+	bool skip_pcp_drain = false;
 
 	trace_android_vh_mm_alloc_pages_direct_reclaim_enter(order);
 	psi_memstall_enter(&pflags);
@@ -5043,7 +5044,10 @@ retry:
 	 */
 	if (!page && !drained) {
 		unreserve_highatomic_pageblock(ac, false);
-		drain_all_pages(NULL);
+		trace_android_vh_drain_all_pages_bypass(gfp_mask, order,
+			alloc_flags, ac->migratetype, *did_some_progress, &skip_pcp_drain);
+		if (!skip_pcp_drain)
+			drain_all_pages(NULL);
 		drained = true;
 		++retry_times;
 		goto retry;
@@ -7475,6 +7479,7 @@ static int zone_highsize(struct zone *zone, int batch, int cpu_online)
 static void pageset_update(struct per_cpu_pages *pcp, unsigned long high,
 		unsigned long batch)
 {
+	trace_android_vh_pageset_update(&high, &batch);
 	WRITE_ONCE(pcp->batch, batch);
 	WRITE_ONCE(pcp->high, high);
 }

From b07be5e5113bbf7168faf3876d3fff08fa4789ed Mon Sep 17 00:00:00 2001
From: Marcus Ma <maminghui5@xiaomi.corp-partner.google.com>
Date: Mon, 19 May 2025 22:34:49 +0800
Subject: [PATCH 27/44] ANDROID: GKI: update symbol list file for xiaomi

add 2 function:
	trace_android_vh_drain_all_pages_bypass()
	trace_android_vh_pageset_update()

Bug: 418695654

Change-Id: Id1bbb269b7650528dcb2dfac29e7a611154954b3
Signed-off-by: Marcus Ma <maminghui5@xiaomi.corp-partner.google.com>
---
 android/abi_gki_aarch64.stg               | 51 +++++++++++++++++++++++
 android/abi_gki_aarch64_xiaomi            |  5 +++
 android/abi_gki_protected_exports_aarch64 |  2 +-
 3 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg
index 97585255cc27..24898f7746c4 100644
--- a/android/abi_gki_aarch64.stg
+++ b/android/abi_gki_aarch64.stg
@@ -322851,6 +322851,17 @@ function {
   parameter_id: 0x4585663f
   parameter_id: 0x33756485
 }
+function {
+  id: 0x98789491
+  return_type_id: 0x6720d32f
+  parameter_id: 0x18bd6530
+  parameter_id: 0xf1a6dfed
+  parameter_id: 0x4585663f
+  parameter_id: 0x33756485
+  parameter_id: 0x6720d32f
+  parameter_id: 0x33756485
+  parameter_id: 0x11cfee5a
+}
 function {
   id: 0x98792c3d
   return_type_id: 0x3e10b518
@@ -348507,6 +348518,15 @@ elf_symbol {
   type_id: 0x9b2a7922
   full_name: "__traceiter_android_vh_do_wp_page"
 }
+elf_symbol {
+  id: 0xf80eb64b
+  name: "__traceiter_android_vh_drain_all_pages_bypass"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0xe9b2635d
+  type_id: 0x98789491
+  full_name: "__traceiter_android_vh_drain_all_pages_bypass"
+}
 elf_symbol {
   id: 0x42312ccc
   name: "__traceiter_android_vh_dump_throttled_rt_tasks"
@@ -349470,6 +349490,15 @@ elf_symbol {
   type_id: 0x9b26096d
   full_name: "__traceiter_android_vh_page_should_be_protected"
 }
+elf_symbol {
+  id: 0x1fc96009
+  name: "__traceiter_android_vh_pageset_update"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0xf5c57c7a
+  type_id: 0x9bb71cb9
+  full_name: "__traceiter_android_vh_pageset_update"
+}
 elf_symbol {
   id: 0x13b0736e
   name: "__traceiter_android_vh_percpu_rwsem_down_read"
@@ -353610,6 +353639,15 @@ elf_symbol {
   type_id: 0x18ccbd2c
   full_name: "__tracepoint_android_vh_do_wp_page"
 }
+elf_symbol {
+  id: 0x8405c9a1
+  name: "__tracepoint_android_vh_drain_all_pages_bypass"
+  is_defined: true
+  symbol_type: OBJECT
+  crc: 0x786ed430
+  type_id: 0x18ccbd2c
+  full_name: "__tracepoint_android_vh_drain_all_pages_bypass"
+}
 elf_symbol {
   id: 0x988719fa
   name: "__tracepoint_android_vh_dump_throttled_rt_tasks"
@@ -354573,6 +354611,15 @@ elf_symbol {
   type_id: 0x18ccbd2c
   full_name: "__tracepoint_android_vh_page_should_be_protected"
 }
+elf_symbol {
+  id: 0x94cb1cab
+  name: "__tracepoint_android_vh_pageset_update"
+  is_defined: true
+  symbol_type: OBJECT
+  crc: 0x644520c9
+  type_id: 0x18ccbd2c
+  full_name: "__tracepoint_android_vh_pageset_update"
+}
 elf_symbol {
   id: 0xa4c454d8
   name: "__tracepoint_android_vh_percpu_rwsem_down_read"
@@ -419247,6 +419294,7 @@ interface {
   symbol_id: 0x54bc5972
   symbol_id: 0x9dbd7b92
   symbol_id: 0x2576f1c7
+  symbol_id: 0xf80eb64b
   symbol_id: 0x42312ccc
   symbol_id: 0xf432d1c9
   symbol_id: 0x02c8f91b
@@ -419354,6 +419402,7 @@ interface {
   symbol_id: 0xacaadcc9
   symbol_id: 0x3246acbb
   symbol_id: 0xb4d5ffdc
+  symbol_id: 0x1fc96009
   symbol_id: 0x13b0736e
   symbol_id: 0xc72f2012
   symbol_id: 0xd14f3adb
@@ -419814,6 +419863,7 @@ interface {
   symbol_id: 0xeb9f1c78
   symbol_id: 0xe2d7542c
   symbol_id: 0x15374b6d
+  symbol_id: 0x8405c9a1
   symbol_id: 0x988719fa
   symbol_id: 0x732a182b
   symbol_id: 0xe5deb919
@@ -419921,6 +419971,7 @@ interface {
   symbol_id: 0x20d2ceb3
   symbol_id: 0x4a5e6e41
   symbol_id: 0x352038ba
+  symbol_id: 0x94cb1cab
   symbol_id: 0xa4c454d8
   symbol_id: 0x7d42b7c8
   symbol_id: 0x3d63616d
diff --git a/android/abi_gki_aarch64_xiaomi b/android/abi_gki_aarch64_xiaomi
index da618470dc5f..2ebc76e2121d 100644
--- a/android/abi_gki_aarch64_xiaomi
+++ b/android/abi_gki_aarch64_xiaomi
@@ -524,3 +524,8 @@ __tracepoint_android_vh_filemap_map_pages_range
 #required by rtase.ko
 proc_get_parent_data
 netdev_stats_to_stats64
+
+__traceiter_android_vh_drain_all_pages_bypass
+__tracepoint_android_vh_drain_all_pages_bypass
+__traceiter_android_vh_pageset_update
+__tracepoint_android_vh_pageset_update
diff --git a/android/abi_gki_protected_exports_aarch64 b/android/abi_gki_protected_exports_aarch64
index c9f44faef737..c2cc936669e5 100644
--- a/android/abi_gki_protected_exports_aarch64
+++ b/android/abi_gki_protected_exports_aarch64
@@ -351,4 +351,4 @@ wwan_port_txoff
 wwan_port_txon
 wwan_register_ops
 wwan_remove_port
-wwan_unregister_ops
+wwan_unregister_ops
\ No newline at end of file

From d653b32842d8960e6504293790ab29d25ed4fa88 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 12 May 2025 15:57:22 +0100
Subject: [PATCH 28/44] Revert "ANDROID: KVM: arm64: Use enum instead of helper
 for fp state"

This reverts commit 26d24625b310b48b0d671075e02117624f7110d4, which
didn't introduce any functional change. This is reverted because
backported commits rely on the helpers that the commit has removed.
Reverting it makes it easier and cleaner to apply the backports.

No functional change intended.

Bug: 411040189
Change-Id: Ie29ece274cfc970cf116f8781b841b9ac2c5aa56
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/kvm/hyp/include/hyp/switch.h | 6 ++++++
 arch/arm64/kvm/hyp/nvhe/switch.c        | 2 +-
 arch/arm64/kvm/hyp/vhe/switch.c         | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 89f7a56dac73..3999a372078a 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -37,6 +37,12 @@ struct kvm_exception_table_entry {
 extern struct kvm_exception_table_entry __start___kvm_ex_table;
 extern struct kvm_exception_table_entry __stop___kvm_ex_table;
 
+/* Check whether the FP regs are owned by the guest */
+static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED;
+}
+
 /* Save the 32-bit only FPSIMD system register state */
 static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 2363f862abc9..b96612fca21d 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -45,7 +45,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 
 	val = vcpu->arch.cptr_el2;
 	val |= CPTR_EL2_TTA | CPTR_EL2_TAM;
-	if (vcpu->arch.fp_state != FP_STATE_GUEST_OWNED) {
+	if (!guest_owns_fp_regs(vcpu)) {
 		val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
 		__activate_traps_fpsimd32(vcpu);
 	}
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 91211f86fec2..747e4eeaab59 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -55,7 +55,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 
 	val |= CPTR_EL2_TAM;
 
-	if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
+	if (guest_owns_fp_regs(vcpu)) {
 		if (vcpu_has_sve(vcpu))
 			val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
 	} else {

From c3b505e78c572d582c22437ae10fb914662fffab Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 12 May 2025 16:17:40 +0100
Subject: [PATCH 29/44] ANDROID: KVM: arm64: Remove pkvm_set_max_sve_vq()

This function doesn't encapsulate that much code, and removing it makes
backporting SVE-fix patches easier and cleaner.

No functional change intended.

Bug: 411040189
Change-Id: I27b3fe467b1896a393751349b86771ddbb1bd62b
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 6 ------
 arch/arm64/kvm/hyp/nvhe/hyp-main.c     | 3 ++-
 arch/arm64/kvm/hyp/nvhe/switch.c       | 3 ++-
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 943cf7fc7124..00526b8863e8 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -149,12 +149,6 @@ static inline bool pkvm_ipa_range_has_pvmfw(struct pkvm_hyp_vm *vm,
 	return ipa_end > pkvm->pvmfw_load_addr && ipa_start < pvmfw_load_end;
 }
 
-static inline void pkvm_set_max_sve_vq(void)
-{
-	sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1,
-			       SYS_ZCR_EL2);
-}
-
 int pkvm_load_pvmfw_pages(struct pkvm_hyp_vm *vm, u64 ipa, phys_addr_t phys,
 			  u64 size);
 void pkvm_poison_pvmfw_pages(void);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 931152dc3aa8..ff993f0c8705 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -696,7 +696,8 @@ static void fpsimd_host_restore(void)
 			struct kvm_host_sve_state *sve_state = get_host_sve_state(vcpu);
 
 			write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
-			pkvm_set_max_sve_vq();
+			sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1,
+					       SYS_ZCR_EL2);
 			__sve_restore_state(sve_state->sve_regs +
 					    sve_ffr_offset(kvm_host_sve_max_vl),
 					    &sve_state->fpsr);
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index b96612fca21d..e75794c40e39 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -201,7 +201,8 @@ static void kvm_hyp_handle_fpsimd_host(struct kvm_vcpu *vcpu)
 		struct kvm_host_sve_state *sve_state = get_host_sve_state(vcpu);
 
 		sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
-		pkvm_set_max_sve_vq();
+		sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1,
+				       SYS_ZCR_EL2);
 		__sve_save_state(sve_state->sve_regs +
 					 sve_ffr_offset(kvm_host_sve_max_vl),
 				 &sve_state->fpsr);

From 1b3dfc7c3845207142f928885d4dda598f4eac02 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Wed, 7 May 2025 12:40:11 +0100
Subject: [PATCH 30/44] ANDROID: KVM: arm64: Move kvm_hyp_handle_fpsimd_host()
 to switch.h

Move kvm_hyp_handle_fpsimd_host() to the shared switch header, instead
of having separate implementations in the vhe/nvhe switch.c files.
Subsequent patches will remove all specific implementations from
switch.c and include switch.h in other files.

Bug: 411040189
Change-Id: I07f1d92f96b072435ded5f0b84a446df4e6a81ab
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/include/asm/kvm_hyp.h        |  3 +++
 arch/arm64/kvm/hyp/include/hyp/switch.h | 26 ++++++++++++++++++++++++-
 arch/arm64/kvm/hyp/nvhe/switch.c        | 25 ------------------------
 arch/arm64/kvm/hyp/vhe/switch.c         |  5 -----
 4 files changed, 28 insertions(+), 31 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 861049a6416f..5f16c6f800a6 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -121,6 +121,9 @@ void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
 #ifdef __KVM_NVHE_HYPERVISOR__
 struct user_fpsimd_state *get_host_fpsimd_state(struct kvm_vcpu *vcpu);
 struct kvm_host_sve_state *get_host_sve_state(struct kvm_vcpu *vcpu);
+#else
+#define get_host_fpsimd_state(vcpu) (vcpu)->arch.host_fpsimd_state
+#define get_host_sve_state(vcpu) NULL
 #endif
 
 extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val);
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 3999a372078a..efcfd44e7012 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -26,6 +26,7 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_pkvm.h>
 #include <asm/fpsimd.h>
 #include <asm/debug-monitors.h>
 #include <asm/processor.h>
@@ -167,7 +168,30 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
 	write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
 }
 
-static void kvm_hyp_handle_fpsimd_host(struct kvm_vcpu *vcpu);
+static void kvm_hyp_handle_fpsimd_host(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * Non-protected kvm relies on the host restoring its sve state.
+	 * Protected kvm restores the host's sve state as not to reveal that
+	 * fpsimd was used by a guest nor leak upper sve bits.
+	 */
+	if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
+		struct kvm_host_sve_state *sve_state = get_host_sve_state(vcpu);
+
+		sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
+		sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1,
+				       SYS_ZCR_EL2);
+		__sve_save_state(sve_state->sve_regs +
+					 sve_ffr_offset(kvm_host_sve_max_vl),
+				 &sve_state->fpsr);
+
+		/* Still trap SVE since it's handled by hyp in pKVM. */
+		if (!vcpu_has_sve(vcpu))
+			sysreg_clear_set(cptr_el2, 0, CPTR_EL2_TZ);
+	} else {
+		__fpsimd_save_state(get_host_fpsimd_state(vcpu));
+	}
+}
 
 static void __deactivate_fpsimd_traps(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index e75794c40e39..8240ae1ebeb3 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -190,31 +190,6 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
 		kvm_handle_pvm_sysreg(vcpu, exit_code));
 }
 
-static void kvm_hyp_handle_fpsimd_host(struct kvm_vcpu *vcpu)
-{
-	/*
-	 * Non-protected kvm relies on the host restoring its sve state.
-	 * Protected kvm restores the host's sve state as not to reveal that
-	 * fpsimd was used by a guest nor leak upper sve bits.
-	 */
-	if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
-		struct kvm_host_sve_state *sve_state = get_host_sve_state(vcpu);
-
-		sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
-		sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1,
-				       SYS_ZCR_EL2);
-		__sve_save_state(sve_state->sve_regs +
-					 sve_ffr_offset(kvm_host_sve_max_vl),
-				 &sve_state->fpsr);
-
-		/* Still trap SVE since it's handled by hyp in pKVM. */
-		if (!vcpu_has_sve(vcpu))
-			sysreg_clear_set(cptr_el2, 0, CPTR_EL2_TZ);
-	} else {
-		__fpsimd_save_state(get_host_fpsimd_state(vcpu));
-	}
-}
-
 static const exit_handler_fn hyp_exit_handlers[] = {
 	[0 ... ESR_ELx_EC_MAX]		= NULL,
 	[ESR_ELx_EC_CP15_32]		= kvm_hyp_handle_cp15_32,
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 747e4eeaab59..b9c8cd61ec7a 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -112,11 +112,6 @@ static void __deactivate_fpsimd_traps(struct kvm_vcpu *vcpu)
 	sysreg_clear_set(cpacr_el1, 0, reg);
 }
 
-static void kvm_hyp_handle_fpsimd_host(struct kvm_vcpu *vcpu)
-{
-	__fpsimd_save_state(vcpu->arch.host_fpsimd_state);
-}
-
 static const exit_handler_fn hyp_exit_handlers[] = {
 	[0 ... ESR_ELx_EC_MAX]		= NULL,
 	[ESR_ELx_EC_CP15_32]		= kvm_hyp_handle_cp15_32,

From d871a6444c978a769e7b97c71e5b1a1ad843eacc Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Thu, 1 May 2025 13:21:50 +0100
Subject: [PATCH 31/44] ANDROID: KVM: arm64: Move __deactivate_fpsimd_traps()
 to switch.h

Move __deactivate_fpsimd_traps() to the shared switch header, instead of
having separate implementations in the vhe/nvhe switch.c files.
Subsequent patches will remove all specific implementations from
switch.c and include switch.h in other files.

Bug: 411040189
Change-Id: I42c545e939b230366fbd9ad8e41a614193169bce
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/kvm/hyp/include/hyp/switch.h | 22 +++++++++++++++++++++-
 arch/arm64/kvm/hyp/nvhe/switch.c        | 12 ------------
 arch/arm64/kvm/hyp/vhe/switch.c         | 10 ----------
 3 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index efcfd44e7012..087a3cef4116 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -193,7 +193,27 @@ static void kvm_hyp_handle_fpsimd_host(struct kvm_vcpu *vcpu)
 	}
 }
 
-static void __deactivate_fpsimd_traps(struct kvm_vcpu *vcpu);
+static void __deactivate_fpsimd_traps(struct kvm_vcpu *vcpu)
+{
+	if (has_vhe()) {
+		u64 reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
+
+		if (vcpu_has_sve(vcpu))
+			reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
+
+		sysreg_clear_set(cpacr_el1, 0, reg);
+
+	} else {
+		u64 reg = CPTR_EL2_TFP;
+
+		if (vcpu_has_sve(vcpu) ||
+		    (is_protected_kvm_enabled() && system_supports_sve())) {
+			reg |= CPTR_EL2_TZ;
+		}
+
+		sysreg_clear_set(cptr_el2, reg, 0);
+	}
+}
 
 /*
  * We trap the first access to the FP/SIMD to save the host context and
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 8240ae1ebeb3..f1890454628d 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -108,18 +108,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
 	write_sysreg(__kvm_hyp_host_vector, vbar_el2);
 }
 
-static void __deactivate_fpsimd_traps(struct kvm_vcpu *vcpu)
-{
-	u64 reg = CPTR_EL2_TFP;
-
-	if (vcpu_has_sve(vcpu) ||
-	    (is_protected_kvm_enabled() && system_supports_sve())) {
-		reg |= CPTR_EL2_TZ;
-	}
-
-	sysreg_clear_set(cptr_el2, reg, 0);
-}
-
 /* Save VGICv3 state on non-VHE systems */
 static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index b9c8cd61ec7a..45ac4a59cc2c 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -102,16 +102,6 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
 	__deactivate_traps_common(vcpu);
 }
 
-static void __deactivate_fpsimd_traps(struct kvm_vcpu *vcpu)
-{
-	u64 reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
-
-	if (vcpu_has_sve(vcpu))
-		reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
-
-	sysreg_clear_set(cpacr_el1, 0, reg);
-}
-
 static const exit_handler_fn hyp_exit_handlers[] = {
 	[0 ... ESR_ELx_EC_MAX]		= NULL,
 	[ESR_ELx_EC_CP15_32]		= kvm_hyp_handle_cp15_32,

From 21c687a8c532784d133469ba9eb5491271b1bd04 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Thu, 1 May 2025 10:21:31 +0100
Subject: [PATCH 32/44] ANDROID: KVM: arm64: Eagerly restore host FPSIMD/SVE
 state in pKVM

Eagerly restore the host fpsimd/sve state after every vcpu run in
protected mode if the fpsimd/sve unit was used by the guest, instead of
setting fpsimd/simd traps and restoring if the host triggers them.

Note that the behavior with this patch is the existing behavior in
Android 16 (except for restoring ZCL_EL2, which is being fixed in
conjunction with this patch there as well).

Bug: 411040189
Change-Id: I5702590331093937c1cd0d08ac754c634054c7f7
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/kvm/hyp/nvhe/hyp-main.c | 100 +++++++++++------------------
 1 file changed, 38 insertions(+), 62 deletions(-)

diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index ff993f0c8705..09bd468dfadb 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -34,6 +34,8 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
 
 void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
 
+static void fpsimd_host_restore(struct kvm_vcpu *vcpu);
+
 static bool (*default_host_smc_handler)(struct kvm_cpu_context *host_ctxt);
 static bool (*default_trap_handler)(struct kvm_cpu_context *host_ctxt);
 
@@ -580,6 +582,8 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
 	hyp_entry_exit_handler_fn ec_handler;
 	u8 esr_ec;
 
+	hyp_vcpu->vcpu.arch.fp_state = FP_STATE_HOST_OWNED;
+
 	/*
 	 * If we deal with a non-protected guest and the state is potentially
 	 * dirty (from a host perspective), copy the state back into the hyp
@@ -666,50 +670,48 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, u32 exit_reason)
 	else
 		host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags;
 
+	if (hyp_vcpu->vcpu.arch.fp_state != FP_STATE_HOST_OWNED)
+		fpsimd_host_restore(&hyp_vcpu->vcpu);
+
 	hyp_vcpu->exit_code = exit_reason;
 }
 
-static void __hyp_sve_save_guest(struct pkvm_hyp_vcpu *hyp_vcpu)
+static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
 {
-	struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
-
 	__sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr);
 	__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
 	sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL1);
 }
 
-static void fpsimd_host_restore(void)
+static void fpsimd_host_restore(struct kvm_vcpu *vcpu)
 {
-	sysreg_clear_set(cptr_el2, CPTR_EL2_TZ | CPTR_EL2_TFP, 0);
-	isb();
-
-	if (unlikely(is_protected_kvm_enabled())) {
-		struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
-		struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
-
-		if (vcpu_has_sve(vcpu))
-			__hyp_sve_save_guest(hyp_vcpu);
-		else
-			__fpsimd_save_state(&hyp_vcpu->vcpu.arch.ctxt.fp_regs);
-
-		if (system_supports_sve()) {
-			struct kvm_host_sve_state *sve_state = get_host_sve_state(vcpu);
-
-			write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
-			sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1,
-					       SYS_ZCR_EL2);
-			__sve_restore_state(sve_state->sve_regs +
-					    sve_ffr_offset(kvm_host_sve_max_vl),
-					    &sve_state->fpsr);
-		} else {
-			__fpsimd_restore_state(get_host_fpsimd_state(vcpu));
-		}
-
-		hyp_vcpu->vcpu.arch.fp_state = FP_STATE_HOST_OWNED;
-	}
+	u64 reg = CPTR_EL2_TFP;
 
 	if (system_supports_sve())
-		sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+		reg |= CPTR_EL2_TZ;
+
+	sysreg_clear_set(cptr_el2, reg, 0);
+	isb();
+
+	if (vcpu_has_sve(vcpu))
+		__hyp_sve_save_guest(vcpu);
+	else
+		__fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
+
+	if (system_supports_sve()) {
+		struct kvm_host_sve_state *sve_state = get_host_sve_state(vcpu);
+
+		write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
+		sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1,
+					SYS_ZCR_EL2);
+		__sve_restore_state(sve_state->sve_regs +
+					sve_ffr_offset(kvm_host_sve_max_vl),
+					&sve_state->fpsr);
+	} else {
+		__fpsimd_restore_state(get_host_fpsimd_state(vcpu));
+	}
+
+	vcpu->arch.fp_state = FP_STATE_HOST_OWNED;
 }
 
 static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt)
@@ -740,8 +742,6 @@ static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt)
 		*last_ran = hyp_vcpu->vcpu.vcpu_id;
 	}
 
-	hyp_vcpu->vcpu.arch.fp_state = FP_STATE_HOST_OWNED;
-
 	if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) {
 		/* Propagate WFx trapping flags, trap ptrauth */
 		hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWE | HCR_TWI |
@@ -761,9 +761,6 @@ static void handle___pkvm_vcpu_put(struct kvm_cpu_context *host_ctxt)
 	if (hyp_vcpu) {
 		struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
 
-		if (hyp_vcpu->vcpu.arch.fp_state == FP_STATE_GUEST_OWNED)
-			fpsimd_host_restore();
-
 		if (!pkvm_hyp_vcpu_is_protected(hyp_vcpu) &&
 		    !vcpu_get_flag(host_vcpu, PKVM_HOST_STATE_DIRTY)) {
 			__sync_hyp_vcpu(hyp_vcpu);
@@ -784,9 +781,6 @@ static void handle___pkvm_vcpu_sync_state(struct kvm_cpu_context *host_ctxt)
 	if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
 		return;
 
-	if (hyp_vcpu->vcpu.arch.fp_state == FP_STATE_GUEST_OWNED)
-		fpsimd_host_restore();
-
 	__sync_hyp_vcpu(hyp_vcpu);
 }
 
@@ -849,23 +843,8 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
 			goto out;
 
 		flush_hyp_vcpu(hyp_vcpu);
-
 		ret = __kvm_vcpu_run(&hyp_vcpu->vcpu);
-
 		sync_hyp_vcpu(hyp_vcpu, ret);
-
-		if (hyp_vcpu->vcpu.arch.fp_state == FP_STATE_GUEST_OWNED) {
-			/*
-			 * The guest has used the FP, trap all accesses
-			 * from the host (both FP and SVE).
-			 */
-			u64 reg = CPTR_EL2_TFP;
-
-			if (system_supports_sve())
-				reg |= CPTR_EL2_TZ;
-
-			sysreg_clear_set(cptr_el2, 0, reg);
-		}
 	} else {
 		/* The host is fully trusted, run its vCPU directly. */
 		ret = __kvm_vcpu_run(host_vcpu);
@@ -1383,13 +1362,8 @@ inval:
 static void handle_host_smc(struct kvm_cpu_context *host_ctxt)
 {
 	DECLARE_REG(u64, func_id, host_ctxt, 0);
-	struct pkvm_hyp_vcpu *hyp_vcpu;
 	bool handled;
 
-	hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
-	if (hyp_vcpu && hyp_vcpu->vcpu.arch.fp_state == FP_STATE_GUEST_OWNED)
-		fpsimd_host_restore();
-
 	handled = kvm_host_psci_handler(host_ctxt);
 	if (!handled)
 		handled = kvm_host_ffa_handler(host_ctxt);
@@ -1421,9 +1395,11 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
 	case ESR_ELx_EC_SMC64:
 		handle_host_smc(host_ctxt);
 		break;
-	case ESR_ELx_EC_FP_ASIMD:
 	case ESR_ELx_EC_SVE:
-		fpsimd_host_restore();
+		BUG_ON(is_protected_kvm_enabled());
+		sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0);
+		isb();
+		sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
 		break;
 	case ESR_ELx_EC_IABT_LOW:
 	case ESR_ELx_EC_DABT_LOW:

From f1df93017ee768fda390c733eb15d0cb858e332e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 15 Nov 2022 09:46:33 +0000
Subject: [PATCH 33/44] BACKPORT: KVM: arm64: Discard any SVE state when
 entering KVM guests

[ Upstream commit 93ae6b01bafee8fa385aa25ee7ebdb40057f6abe ]

Since 8383741ab2e773a99 (KVM: arm64: Get rid of host SVE tracking/saving)
KVM has not tracked the host SVE state, relying on the fact that we
currently disable SVE whenever we perform a syscall. This may not be true
in future since performance optimisation may result in us keeping SVE
enabled in order to avoid needing to take access traps to reenable it.
Handle this by clearing TIF_SVE and converting the stored task state to
FPSIMD format when preparing to run the guest.  This is done with a new
call fpsimd_kvm_prepare() to keep the direct state manipulation
functions internal to fpsimd.c.

Change-Id: Ie011c8f17dfebd82f796aaaa62d1502a3207c7db
Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221115094640.112848-2-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
[ Mark: trivial backport to v6.1 ]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/include/asm/fpsimd.h |  1 +
 arch/arm64/kernel/fpsimd.c      | 23 +++++++++++++++++++++++
 arch/arm64/kvm/fpsimd.c         |  3 ++-
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 930b0e6c9462..3544dfcc67a1 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -56,6 +56,7 @@ extern void fpsimd_signal_preserve_current_state(void);
 extern void fpsimd_preserve_current_state(void);
 extern void fpsimd_restore_current_state(void);
 extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
+extern void fpsimd_kvm_prepare(void);
 
 extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
 				     void *sve_state, unsigned int sve_vl,
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 43afe07c74fd..1dc4254a99f2 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1643,6 +1643,29 @@ void fpsimd_signal_preserve_current_state(void)
 		sve_to_fpsimd(current);
 }
 
+/*
+ * Called by KVM when entering the guest.
+ */
+void fpsimd_kvm_prepare(void)
+{
+	if (!system_supports_sve())
+		return;
+
+	/*
+	 * KVM does not save host SVE state since we can only enter
+	 * the guest from a syscall so the ABI means that only the
+	 * non-saved SVE state needs to be saved.  If we have left
+	 * SVE enabled for performance reasons then update the task
+	 * state to be FPSIMD only.
+	 */
+	get_cpu_fpsimd_context();
+
+	if (test_and_clear_thread_flag(TIF_SVE))
+		sve_to_fpsimd(current);
+
+	put_cpu_fpsimd_context();
+}
+
 /*
  * Associate current's FPSIMD context with this cpu
  * The caller must have ownership of the cpu FPSIMD context before calling
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 453c6a541d87..a42d676466c5 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -52,11 +52,12 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
 {
 	BUG_ON(!current->mm);
-	BUG_ON(test_thread_flag(TIF_SVE));
 
 	if (!system_supports_fpsimd())
 		return;
 
+	fpsimd_kvm_prepare();
+
 	vcpu->arch.fp_state = FP_STATE_HOST_OWNED;
 
 	vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);

From 12921b6e2348899199063e4e3e029e117f2b85b8 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 10 Feb 2025 19:52:19 +0000
Subject: [PATCH 34/44] BACKPORT: KVM: arm64: Unconditionally save+flush host
 FPSIMD/SVE/SME state

[ Upstream commit fbc7e61195e23f744814e78524b73b59faa54ab4 ]

There are several problems with the way hyp code lazily saves the host's
FPSIMD/SVE state, including:

* Host SVE being discarded unexpectedly due to inconsistent
  configuration of TIF_SVE and CPACR_ELx.ZEN. This has been seen to
  result in QEMU crashes where SVE is used by memmove(), as reported by
  Eric Auger:

  https://issues.redhat.com/browse/RHEL-68997

* Host SVE state is discarded *after* modification by ptrace, which was an
  unintentional ptrace ABI change introduced with lazy discarding of SVE state.

* The host FPMR value can be discarded when running a non-protected VM,
  where FPMR support is not exposed to a VM, and that VM uses
  FPSIMD/SVE. In these cases the hyp code does not save the host's FPMR
  before unbinding the host's FPSIMD/SVE/SME state, leaving a stale
  value in memory.

Avoid these by eagerly saving and "flushing" the host's FPSIMD/SVE/SME
state when loading a vCPU such that KVM does not need to save any of the
host's FPSIMD/SVE/SME state. For clarity, fpsimd_kvm_prepare() is
removed and the necessary call to fpsimd_save_and_flush_cpu_state() is
placed in kvm_arch_vcpu_load_fp(). As 'fpsimd_state' and 'fpmr_ptr'
should not be used, they are set to NULL; all uses of these will be
removed in subsequent patches.

Historical problems go back at least as far as v5.17, e.g. erroneous
assumptions about TIF_SVE being clear in commit:

  8383741ab2e773a9 ("KVM: arm64: Get rid of host SVE tracking/saving")

... and so this eager save+flush probably needs to be backported to ALL
stable trees.

Bug: 411040189
Fixes: 93ae6b01bafee8fa ("KVM: arm64: Discard any SVE state when entering KVM guests")
Fixes: 8c845e2731041f0f ("arm64/sve: Leave SVE enabled on syscall if we don't context switch")
Fixes: ef3be86021c3bdf3 ("KVM: arm64: Add save/restore support for FPMR")
Reported-by: Eric Auger <eauger@redhat.com>
Reported-by: Wilco Dijkstra <wilco.dijkstra@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Tested-by: Eric Auger <eric.auger@redhat.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: Jeremy Linton <jeremy.linton@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oliver.upton@linux.dev>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Change-Id: I2c230b8db86f5c68ebf24f06d1e4787da284c80d
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20250210195226.1215254-2-mark.rutland@arm.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
[ Mark: Handle vcpu/host flag conflict, remove host_data_ptr() ]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/kernel/fpsimd.c | 23 -----------------------
 arch/arm64/kvm/fpsimd.c    | 18 ++++++++++--------
 2 files changed, 10 insertions(+), 31 deletions(-)

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 1dc4254a99f2..43afe07c74fd 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1643,29 +1643,6 @@ void fpsimd_signal_preserve_current_state(void)
 		sve_to_fpsimd(current);
 }
 
-/*
- * Called by KVM when entering the guest.
- */
-void fpsimd_kvm_prepare(void)
-{
-	if (!system_supports_sve())
-		return;
-
-	/*
-	 * KVM does not save host SVE state since we can only enter
-	 * the guest from a syscall so the ABI means that only the
-	 * non-saved SVE state needs to be saved.  If we have left
-	 * SVE enabled for performance reasons then update the task
-	 * state to be FPSIMD only.
-	 */
-	get_cpu_fpsimd_context();
-
-	if (test_and_clear_thread_flag(TIF_SVE))
-		sve_to_fpsimd(current);
-
-	put_cpu_fpsimd_context();
-}
-
 /*
  * Associate current's FPSIMD context with this cpu
  * The caller must have ownership of the cpu FPSIMD context before calling
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index a42d676466c5..43bec87ee74d 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -56,9 +56,16 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
 	if (!system_supports_fpsimd())
 		return;
 
-	fpsimd_kvm_prepare();
-
-	vcpu->arch.fp_state = FP_STATE_HOST_OWNED;
+	/*
+	 * Ensure that any host FPSIMD/SVE/SME state is saved and unbound such
+	 * that the host kernel is responsible for restoring this state upon
+	 * return to userspace, and the hyp code doesn't need to save anything.
+	 *
+	 * When the host may use SME, fpsimd_save_and_flush_cpu_state() ensures
+	 * that PSTATE.{SM,ZA} == {0,0}.
+	 */
+	fpsimd_save_and_flush_cpu_state();
+	vcpu->arch.fp_state = FP_STATE_FREE;
 
 	vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
 	if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
@@ -77,11 +84,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
 		vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
 		if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
 			vcpu_set_flag(vcpu, HOST_SME_ENABLED);
-
-		if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
-			vcpu->arch.fp_state = FP_STATE_FREE;
-			fpsimd_save_and_flush_cpu_state();
-		}
 	}
 }
 

From a08391468f2fcc251ecac861d59a904a65018d64 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 10 Feb 2025 19:52:20 +0000
Subject: [PATCH 35/44] BACKPORT: KVM: arm64: Remove host FPSIMD saving for
 non-protected KVM

[ Upstream commit 8eca7f6d5100b6997df4f532090bc3f7e0203bef ]

Now that the host eagerly saves its own FPSIMD/SVE/SME state,
non-protected KVM never needs to save the host FPSIMD/SVE/SME state,
and the code to do this is never used. Protected KVM still needs to
save/restore the host FPSIMD/SVE state to avoid leaking guest state to
the host (and to avoid revealing to the host whether the guest used
FPSIMD/SVE/SME), and that code needs to be retained.

Remove the unused code and data structures.

To avoid the need for a stub copy of kvm_hyp_save_fpsimd_host() in the
VHE hyp code, the nVHE/hVHE version is moved into the shared switch
header, where it is only invoked when KVM is in protected mode.

[tabba@ Kept user_fpsimd_state as to not break the KMI.]

Bug: 411040189
Change-Id: I0088db7c5f75c9331956867040b8eb69976aabf8
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20250210195226.1215254-3-mark.rutland@arm.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/include/asm/kvm_host.h       | 3 ++-
 arch/arm64/include/asm/kvm_hyp.h        | 2 +-
 arch/arm64/kvm/fpsimd.c                 | 2 --
 arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++--
 arch/arm64/kvm/hyp/nvhe/pkvm.c          | 3 ---
 5 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 1ad2a045f8e7..b894dc38e205 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -488,7 +488,8 @@ struct kvm_vcpu_arch {
 	struct kvm_guest_debug_arch vcpu_debug_state;
 	struct kvm_guest_debug_arch external_debug_state;
 
-	struct user_fpsimd_state *host_fpsimd_state;	/* hyp VA */
+	/* DO NOT USE: Removed upstream. Kept to not break the KMI. */
+	struct user_fpsimd_state *host_fpsimd_state;
 
 	struct {
 		/* {Break,watch}point registers */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 5f16c6f800a6..b0ed038ff4e9 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -122,7 +122,7 @@ void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
 struct user_fpsimd_state *get_host_fpsimd_state(struct kvm_vcpu *vcpu);
 struct kvm_host_sve_state *get_host_sve_state(struct kvm_vcpu *vcpu);
 #else
-#define get_host_fpsimd_state(vcpu) (vcpu)->arch.host_fpsimd_state
+#define get_host_fpsimd_state(vcpu) NULL
 #define get_host_sve_state(vcpu) NULL
 #endif
 
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 43bec87ee74d..552434b96595 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -37,8 +37,6 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
 	if (ret)
 		return ret;
 
-	vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
-
 	return 0;
 }
 
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 087a3cef4116..221d2b72a513 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -175,7 +175,7 @@ static void kvm_hyp_handle_fpsimd_host(struct kvm_vcpu *vcpu)
 	 * Protected kvm restores the host's sve state as not to reveal that
 	 * fpsimd was used by a guest nor leak upper sve bits.
 	 */
-	if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
+	if (system_supports_sve()) {
 		struct kvm_host_sve_state *sve_state = get_host_sve_state(vcpu);
 
 		sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
@@ -243,7 +243,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
 	isb();
 
 	/* Write out the host state if it's in the registers */
-	if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED)
+	if (is_protected_kvm_enabled() && vcpu->arch.fp_state == FP_STATE_HOST_OWNED)
 		kvm_hyp_handle_fpsimd_host(vcpu);
 
 	/* Restore the guest state */
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index b3acb60a2d6c..85227166252a 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -54,9 +54,6 @@ static void *__get_host_fpsimd_bytes(void)
 
 struct user_fpsimd_state *get_host_fpsimd_state(struct kvm_vcpu *vcpu)
 {
-	if (likely(!is_protected_kvm_enabled()))
-		return vcpu->arch.host_fpsimd_state;
-
 	WARN_ON(system_supports_sve());
 	return __get_host_fpsimd_bytes();
 }

From c952e23cf8dfc18357b9e2300b09e75807d13ef0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 10 Feb 2025 19:52:21 +0000
Subject: [PATCH 36/44] BACKPORT: KVM: arm64: Remove VHE host restore of
 CPACR_EL1.ZEN

[ Upstream commit 459f059be702056d91537b99a129994aa6ccdd35 ]

When KVM is in VHE mode, the host kernel tries to save and restore the
configuration of CPACR_EL1.ZEN (i.e. CPTR_EL2.ZEN when HCR_EL2.E2H=1)
across kvm_arch_vcpu_load_fp() and kvm_arch_vcpu_put_fp(), since the
configuration may be clobbered by hyp when running a vCPU. This logic is
currently redundant.

The VHE hyp code unconditionally configures CPTR_EL2.ZEN to 0b01 when
returning to the host, permitting host kernel usage of SVE.

Now that the host eagerly saves and unbinds its own FPSIMD/SVE/SME
state, there's no need to save/restore the state of the EL0 SVE trap.
The kernel can safely save/restore state without trapping, as described
above, and will restore userspace state (including trap controls) before
returning to userspace.

Remove the redundant logic.

Bug: 411040189
Change-Id: I43bf5587223aae54caf9389eb3de17f155043d96
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20250210195226.1215254-4-mark.rutland@arm.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
[Rework for refactoring of where the flags are stored -- broonie]
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/include/asm/kvm_host.h |  2 --
 arch/arm64/kvm/fpsimd.c           | 16 ----------------
 2 files changed, 18 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index b894dc38e205..c414593343bf 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -693,8 +693,6 @@ struct kvm_vcpu_arch {
 /* pKVM host vcpu state is dirty, needs resync */
 #define PKVM_HOST_STATE_DIRTY	__vcpu_single_flag(iflags, BIT(7))
 
-/* SVE enabled for host EL0 */
-#define HOST_SVE_ENABLED	__vcpu_single_flag(sflags, BIT(0))
 /* SME enabled for EL0 */
 #define HOST_SME_ENABLED	__vcpu_single_flag(sflags, BIT(1))
 /* Physical CPU not in supported_cpus */
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 552434b96595..b4b3b9031543 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -65,10 +65,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
 	fpsimd_save_and_flush_cpu_state();
 	vcpu->arch.fp_state = FP_STATE_FREE;
 
-	vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
-	if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
-		vcpu_set_flag(vcpu, HOST_SVE_ENABLED);
-
 	/*
 	 * We don't currently support SME guests but if we leave
 	 * things in streaming mode then when the guest starts running
@@ -174,18 +170,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
 		}
 
 		fpsimd_save_and_flush_cpu_state();
-	} else if (has_vhe() && system_supports_sve()) {
-		/*
-		 * The FPSIMD/SVE state in the CPU has not been touched, and we
-		 * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
-		 * reset to CPACR_EL1_DEFAULT by the Hyp code, disabling SVE
-		 * for EL0.  To avoid spurious traps, restore the trap state
-		 * seen by kvm_arch_vcpu_load_fp():
-		 */
-		if (vcpu_get_flag(vcpu, HOST_SVE_ENABLED))
-			sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
-		else
-			sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
 	}
 
 	update_thread_flag(TIF_SVE, 0);

From c00c44bea22d5c0ecc2f9663a461e774b3d90eae Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 10 Feb 2025 19:52:22 +0000
Subject: [PATCH 37/44] BACKPORT: KVM: arm64: Remove VHE host restore of
 CPACR_EL1.SMEN

[ Upstream commit 407a99c4654e8ea65393f412c421a55cac539f5b ]

When KVM is in VHE mode, the host kernel tries to save and restore the
configuration of CPACR_EL1.SMEN (i.e. CPTR_EL2.SMEN when HCR_EL2.E2H=1)
across kvm_arch_vcpu_load_fp() and kvm_arch_vcpu_put_fp(), since the
configuration may be clobbered by hyp when running a vCPU. This logic
has historically been broken, and is currently redundant.

This logic was originally introduced in commit:

  861262ab86270206 ("KVM: arm64: Handle SME host state when running guests")

At the time, the VHE hyp code would reset CPTR_EL2.SMEN to 0b00 when
returning to the host, trapping host access to SME state. Unfortunately,
this was unsafe as the host could take a softirq before calling
kvm_arch_vcpu_put_fp(), and if a softirq handler were to use kernel mode
NEON the resulting attempt to save the live FPSIMD/SVE/SME state would
result in a fatal trap.

That issue was limited to VHE mode. For nVHE/hVHE modes, KVM always
saved/restored the host kernel's CPACR_EL1 value, and configured
CPTR_EL2.TSM to 0b0, ensuring that host usage of SME would not be
trapped.

The issue above was incidentally fixed by commit:

  375110ab51dec5dc ("KVM: arm64: Fix resetting SME trap values on reset for (h)VHE")

That commit changed the VHE hyp code to configure CPTR_EL2.SMEN to 0b01
when returning to the host, permitting host kernel usage of SME,
avoiding the issue described above. At the time, this was not identified
as a fix for commit 861262ab86270206.

Now that the host eagerly saves and unbinds its own FPSIMD/SVE/SME
state, there's no need to save/restore the state of the EL0 SME trap.
The kernel can safely save/restore state without trapping, as described
above, and will restore userspace state (including trap controls) before
returning to userspace.

Remove the redundant logic.

Bug: 411040189
Change-Id: Ia2fbb22a21da8e63f0a3b9a76d47ee2c987e2fa5
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20250210195226.1215254-5-mark.rutland@arm.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
[Update for rework of flags storage -- broonie]
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/include/asm/kvm_host.h |  2 --
 arch/arm64/kvm/fpsimd.c           | 31 -------------------------------
 2 files changed, 33 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index c414593343bf..c5b392b18401 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -693,8 +693,6 @@ struct kvm_vcpu_arch {
 /* pKVM host vcpu state is dirty, needs resync */
 #define PKVM_HOST_STATE_DIRTY	__vcpu_single_flag(iflags, BIT(7))
 
-/* SME enabled for EL0 */
-#define HOST_SME_ENABLED	__vcpu_single_flag(sflags, BIT(1))
 /* Physical CPU not in supported_cpus */
 #define ON_UNSUPPORTED_CPU	__vcpu_single_flag(sflags, BIT(2))
 /* WFIT instruction trapped */
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index b4b3b9031543..5e7078d44713 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -64,21 +64,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
 	 */
 	fpsimd_save_and_flush_cpu_state();
 	vcpu->arch.fp_state = FP_STATE_FREE;
-
-	/*
-	 * We don't currently support SME guests but if we leave
-	 * things in streaming mode then when the guest starts running
-	 * FPSIMD or SVE code it may generate SME traps so as a
-	 * special case if we are in streaming mode we force the host
-	 * state to be saved now and exit streaming mode so that we
-	 * don't have to handle any SME traps for valid guest
-	 * operations. Do this for ZA as well for now for simplicity.
-	 */
-	if (system_supports_sme()) {
-		vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
-		if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
-			vcpu_set_flag(vcpu, HOST_SME_ENABLED);
-	}
 }
 
 /*
@@ -132,22 +117,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
 
 	local_irq_save(flags);
 
-	/*
-	 * If we have VHE then the Hyp code will reset CPACR_EL1 to
-	 * CPACR_EL1_DEFAULT and we need to reenable SME.
-	 */
-	if (has_vhe() && system_supports_sme()) {
-		/* Also restore EL0 state seen on entry */
-		if (vcpu_get_flag(vcpu, HOST_SME_ENABLED))
-			sysreg_clear_set(CPACR_EL1, 0,
-					 CPACR_EL1_SMEN_EL0EN |
-					 CPACR_EL1_SMEN_EL1EN);
-		else
-			sysreg_clear_set(CPACR_EL1,
-					 CPACR_EL1_SMEN_EL0EN,
-					 CPACR_EL1_SMEN_EL1EN);
-	}
-
 	if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
 		if (vcpu_has_sve(vcpu)) {
 			__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);

From b9b8d84f6cfc459941e2dd32eed7b92e3a3af6a9 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 10 Feb 2025 19:52:24 +0000
Subject: [PATCH 38/44] BACKPORT: KVM: arm64: Refactor exit handlers

[ Upstream commit 9b66195063c5a145843547b1d692bd189be85287 ]

The hyp exit handling logic is largely shared between VHE and nVHE/hVHE,
with common logic in arch/arm64/kvm/hyp/include/hyp/switch.h. The code
in the header depends on function definitions provided by
arch/arm64/kvm/hyp/vhe/switch.c and arch/arm64/kvm/hyp/nvhe/switch.c
when they include the header.

This is an unusual header dependency, and prevents the use of
arch/arm64/kvm/hyp/include/hyp/switch.h in other files as this would
result in compiler warnings regarding missing definitions, e.g.

| In file included from arch/arm64/kvm/hyp/nvhe/hyp-main.c:8:
| ./arch/arm64/kvm/hyp/include/hyp/switch.h:733:31: warning: 'kvm_get_exit_handler_array' used but never defined
|   733 | static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
|       |                               ^~~~~~~~~~~~~~~~~~~~~~~~~~
| ./arch/arm64/kvm/hyp/include/hyp/switch.h:735:13: warning: 'early_exit_filter' used but never defined
|   735 | static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
|       |             ^~~~~~~~~~~~~~~~~

Refactor the logic such that the header doesn't depend on anything from
the C files. There should be no functional change as a result of this
patch.

Bug: 411040189
Change-Id: I4e58bad80763afd73fd03f9653ed4e66dfe97255
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20250210195226.1215254-7-mark.rutland@arm.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/kvm/hyp/include/hyp/switch.h | 30 +++++-------------------
 arch/arm64/kvm/hyp/nvhe/switch.c        | 31 ++++++++++++++-----------
 arch/arm64/kvm/hyp/vhe/switch.c         |  8 +++----
 3 files changed, 27 insertions(+), 42 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 221d2b72a513..182983c308b8 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -437,23 +437,16 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
 
 typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
 
-static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
-
-static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
-
 /*
  * Allow the hypervisor to handle the exit with an exit handler if it has one.
  *
  * Returns true if the hypervisor handled the exit, and control should go back
  * to the guest, or false if it hasn't.
  */
-static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code,
+				       const exit_handler_fn *handlers)
 {
-	const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
-	exit_handler_fn fn;
-
-	fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
-
+	exit_handler_fn fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
 	if (fn)
 		return fn(vcpu, exit_code);
 
@@ -483,20 +476,9 @@ static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code
  * the guest, false when we should restore the host state and return to the
  * main run loop.
  */
-static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool __fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code,
+				      const exit_handler_fn *handlers)
 {
-	/*
-	 * Save PSTATE early so that we can evaluate the vcpu mode
-	 * early on.
-	 */
-	synchronize_vcpu_pstate(vcpu, exit_code);
-
-	/*
-	 * Check whether we want to repaint the state one way or
-	 * another.
-	 */
-	early_exit_filter(vcpu, exit_code);
-
 	if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
 		vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
 
@@ -526,7 +508,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 		goto exit;
 
 	/* Check if there's an exit handler and allow it to handle the exit. */
-	if (kvm_hyp_handle_exit(vcpu, exit_code))
+	if (kvm_hyp_handle_exit(vcpu, exit_code, handlers))
 		goto guest;
 exit:
 	/* Return to the host kernel and handle the exit */
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index f1890454628d..cf7c3be0e620 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -211,20 +211,23 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
 	return hyp_exit_handlers;
 }
 
-/*
- * Some guests (e.g., protected VMs) are not be allowed to run in AArch32.
- * The ARMv8 architecture does not give the hypervisor a mechanism to prevent a
- * guest from dropping to AArch32 EL0 if implemented by the CPU. If the
- * hypervisor spots a guest in such a state ensure it is handled, and don't
- * trust the host to spot or fix it.  The check below is based on the one in
- * kvm_arch_vcpu_ioctl_run().
- *
- * Returns false if the guest ran in AArch32 when it shouldn't have, and
- * thus should exit to the host, or true if a the guest run loop can continue.
- */
-static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 {
-	if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) {
+	const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+
+	synchronize_vcpu_pstate(vcpu, exit_code);
+
+	/*
+	 * Some guests (e.g., protected VMs) are not be allowed to run in
+	 * AArch32.  The ARMv8 architecture does not give the hypervisor a
+	 * mechanism to prevent a guest from dropping to AArch32 EL0 if
+	 * implemented by the CPU. If the hypervisor spots a guest in such a
+	 * state ensure it is handled, and don't trust the host to spot or fix
+	 * it.  The check below is based on the one in
+	 * kvm_arch_vcpu_ioctl_run().
+	 */
+	if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) {
 		/*
 		 * As we have caught the guest red-handed, decide that it isn't
 		 * fit for purpose anymore by making the vcpu invalid. The VMM
@@ -236,6 +239,8 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
 		*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
 		*exit_code |= ARM_EXCEPTION_IL;
 	}
+
+	return __fixup_guest_exit(vcpu, exit_code, handlers);
 }
 
 /* Switch to the guest for legacy non-VHE systems */
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 45ac4a59cc2c..f24569ac26c2 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -114,13 +114,11 @@ static const exit_handler_fn hyp_exit_handlers[] = {
 	[ESR_ELx_EC_PAC]		= kvm_hyp_handle_ptrauth,
 };
 
-static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
+static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 {
-	return hyp_exit_handlers;
-}
+	synchronize_vcpu_pstate(vcpu, exit_code);
 
-static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
-{
+	return __fixup_guest_exit(vcpu, exit_code, hyp_exit_handlers);
 }
 
 /* Switch to the guest for VHE systems running in EL2 */

From 89720e9e1bc3138967c5b89e1bb2a6fe40602104 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 10 Feb 2025 19:52:25 +0000
Subject: [PATCH 39/44] BACKPORT: KVM: arm64: Mark some header functions as
 inline

[ Upstream commit f9dd00de1e53a47763dfad601635d18542c3836d ]

The shared hyp switch header has a number of static functions which
might not be used by all files that include the header, and when unused
they will provoke compiler warnings, e.g.

| In file included from arch/arm64/kvm/hyp/nvhe/hyp-main.c:8:
| ./arch/arm64/kvm/hyp/include/hyp/switch.h:703:13: warning: 'kvm_hyp_handle_dabt_low' defined but not used [-Wunused-function]
|   703 | static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|       |             ^~~~~~~~~~~~~~~~~~~~~~~
| ./arch/arm64/kvm/hyp/include/hyp/switch.h:682:13: warning: 'kvm_hyp_handle_cp15_32' defined but not used [-Wunused-function]
|   682 | static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
|       |             ^~~~~~~~~~~~~~~~~~~~~~
| ./arch/arm64/kvm/hyp/include/hyp/switch.h:662:13: warning: 'kvm_hyp_handle_sysreg' defined but not used [-Wunused-function]
|   662 | static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
|       |             ^~~~~~~~~~~~~~~~~~~~~
| ./arch/arm64/kvm/hyp/include/hyp/switch.h:458:13: warning: 'kvm_hyp_handle_fpsimd' defined but not used [-Wunused-function]
|   458 | static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|       |             ^~~~~~~~~~~~~~~~~~~~~
| ./arch/arm64/kvm/hyp/include/hyp/switch.h:329:13: warning: 'kvm_hyp_handle_mops' defined but not used [-Wunused-function]
|   329 | static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code)
|       |             ^~~~~~~~~~~~~~~~~~~

Mark these functions as 'inline' to suppress this warning. This
shouldn't result in any functional change.

At the same time, avoid the use of __alias() in the header and alias
kvm_hyp_handle_iabt_low() and kvm_hyp_handle_watchpt_low() to
kvm_hyp_handle_memory_fault() using CPP, matching the style in the rest
of the kernel. For consistency, kvm_hyp_handle_memory_fault() is also
marked as 'inline'.

Bug: 411040189
Change-Id: I5766401542afda440f737c1fee1810a73e89e86d
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20250210195226.1215254-8-mark.rutland@arm.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/kvm/hyp/include/hyp/switch.h | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 182983c308b8..c1d4a74854e2 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -221,7 +221,7 @@ static void __deactivate_fpsimd_traps(struct kvm_vcpu *vcpu)
  * If FP/SIMD is not implemented, handle the trap and inject an undefined
  * instruction exception to the guest. Similarly for trapped SVE accesses.
  */
-static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
 {
 	bool sve_guest;
 	u8 esr_ec;
@@ -370,7 +370,7 @@ static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code)
 	return true;
 }
 
-static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
 {
 	if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
 	    handle_tx2_tvm(vcpu))
@@ -386,7 +386,7 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
 	return false;
 }
 
-static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
 {
 	if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
 	    __vgic_v3_perform_cpuif_access(vcpu) == 1)
@@ -395,19 +395,18 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
 	return false;
 }
 
-static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu,
+					       u64 *exit_code)
 {
 	if (!__populate_fault_info(vcpu))
 		return true;
 
 	return false;
 }
-static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
-	__alias(kvm_hyp_handle_memory_fault);
-static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
-	__alias(kvm_hyp_handle_memory_fault);
+#define kvm_hyp_handle_iabt_low		kvm_hyp_handle_memory_fault
+#define kvm_hyp_handle_watchpt_low	kvm_hyp_handle_memory_fault
 
-static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
 {
 	if (kvm_hyp_handle_memory_fault(vcpu, exit_code))
 		return true;

From 6a31e426c64ea7be25090feb683be9964ddea94f Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 16 Dec 2024 10:50:52 +0000
Subject: [PATCH 40/44] BACKPORT: KVM: arm64: Calculate cptr_el2 traps on
 activating traps

[ Upstream commit 2fd5b4b0e7b440602455b79977bfa64dea101e6c ]

Similar to VHE, calculate the value of cptr_el2 from scratch on
activate traps. This removes the need to store cptr_el2 in every
vcpu structure. Moreover, some traps, such as whether the guest
owns the fp registers, need to be set on every vcpu run.

[tabba@ Kept cptr_el2 as to not break the KMI.]

Bug: 411040189
Reported-by: James Clark <james.clark@linaro.org>
Fixes: 5294afdbf45a ("KVM: arm64: Exclude FP ownership from kvm_vcpu_arch")
Change-Id: Iba65e9bb65d8498007423dc5b137dedc602359de
Signed-off-by: Fuad Tabba <tabba@google.com>
Link: https://lore.kernel.org/r/20241216105057.579031-13-tabba@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/kvm_host.h |  2 ++
 arch/arm64/kvm/arm.c              |  1 -
 arch/arm64/kvm/hyp/nvhe/pkvm.c    | 14 ------------
 arch/arm64/kvm/hyp/nvhe/switch.c  | 38 ++++++++++++++++++++++---------
 4 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index c5b392b18401..3fc67e4b1b60 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -438,6 +438,8 @@ struct kvm_vcpu_arch {
 	/* Values of trap registers for the guest. */
 	u64 hcr_el2;
 	u64 mdcr_el2;
+
+	/* DO NOT USE: Removed upstream. Kept to not break the KMI. */
 	u64 cptr_el2;
 
 	/* Values of trap registers for the host before guest entry. */
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 915100a97191..4386e156c019 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1357,7 +1357,6 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 	}
 
 	vcpu_reset_hcr(vcpu);
-	vcpu->arch.cptr_el2 = CPTR_EL2_DEFAULT;
 
 	/*
 	 * Handle the "start in power-off" case.
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 85227166252a..ba09c7de6901 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -73,7 +73,6 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
 	const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1);
 	u64 hcr_set = HCR_RW;
 	u64 hcr_clear = 0;
-	u64 cptr_set = 0;
 
 	/* Protected KVM does not support AArch32 guests. */
 	BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0),
@@ -100,16 +99,10 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
 	/* Trap AMU */
 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU), feature_ids)) {
 		hcr_clear |= HCR_AMVOFFEN;
-		cptr_set |= CPTR_EL2_TAM;
 	}
 
-	/* Trap SVE */
-	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids))
-		cptr_set |= CPTR_EL2_TZ;
-
 	vcpu->arch.hcr_el2 |= hcr_set;
 	vcpu->arch.hcr_el2 &= ~hcr_clear;
-	vcpu->arch.cptr_el2 |= cptr_set;
 }
 
 /*
@@ -139,7 +132,6 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
 	const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1);
 	u64 mdcr_set = 0;
 	u64 mdcr_clear = 0;
-	u64 cptr_set = 0;
 
 	/* Trap/constrain PMU */
 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), feature_ids)) {
@@ -166,13 +158,8 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids))
 		mdcr_set |= MDCR_EL2_TTRF;
 
-	/* Trap Trace */
-	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids))
-		cptr_set |= CPTR_EL2_TTA;
-
 	vcpu->arch.mdcr_el2 |= mdcr_set;
 	vcpu->arch.mdcr_el2 &= ~mdcr_clear;
-	vcpu->arch.cptr_el2 |= cptr_set;
 }
 
 /*
@@ -237,7 +224,6 @@ static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
  */
 static void pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu)
 {
-	hyp_vcpu->vcpu.arch.cptr_el2 = CPTR_EL2_DEFAULT;
 	hyp_vcpu->vcpu.arch.mdcr_el2 = 0;
 
 	if (!pkvm_hyp_vcpu_is_protected(hyp_vcpu)) {
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index cf7c3be0e620..1fc46f7a99f8 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -36,23 +36,39 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
 
 extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
 
-static void __activate_traps(struct kvm_vcpu *vcpu)
+static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
 {
-	u64 val;
+	u64 val = CPTR_EL2_TAM;	/* Same bit irrespective of E2H */
 
-	___activate_traps(vcpu);
-	__activate_traps_common(vcpu);
+	/* !hVHE case upstream */
+	if (1) {
+		val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1;
 
-	val = vcpu->arch.cptr_el2;
-	val |= CPTR_EL2_TTA | CPTR_EL2_TAM;
-	if (!guest_owns_fp_regs(vcpu)) {
-		val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
-		__activate_traps_fpsimd32(vcpu);
-	}
-	if (cpus_have_final_cap(ARM64_SME))
+		/*
+		 * Always trap SME since it's not supported in KVM.
+		 * TSM is RES1 if SME isn't implemented.
+		 */
 		val |= CPTR_EL2_TSM;
 
+		if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs(vcpu))
+			val |= CPTR_EL2_TZ;
+
+		if (!guest_owns_fp_regs(vcpu))
+			val |= CPTR_EL2_TFP;
+	}
+
+	if (!guest_owns_fp_regs(vcpu))
+		__activate_traps_fpsimd32(vcpu);
+
 	write_sysreg(val, cptr_el2);
+}
+
+static void __activate_traps(struct kvm_vcpu *vcpu)
+{
+	___activate_traps(vcpu);
+	__activate_traps_common(vcpu);
+	__activate_cptr_traps(vcpu);
+
 	write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
 
 	if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {

From 785e577258509dc0a11b4eff53a85ef73ff88bf2 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 10 Feb 2025 19:52:26 +0000
Subject: [PATCH 41/44] BACKPORT: KVM: arm64: Eagerly switch ZCR_EL{1,2}

[ Upstream commit 59419f10045bc955d2229819c7cf7a8b0b9c5b59 ]

In non-protected KVM modes, while the guest FPSIMD/SVE/SME state is live on the
CPU, the host's active SVE VL may differ from the guest's maximum SVE VL:

* For VHE hosts, when a VM uses NV, ZCR_EL2 contains a value constrained
  by the guest hypervisor, which may be less than or equal to that
  guest's maximum VL.

  Note: in this case the value of ZCR_EL1 is immaterial due to E2H.

* For nVHE/hVHE hosts, ZCR_EL1 contains a value written by the guest,
  which may be less than or greater than the guest's maximum VL.

  Note: in this case hyp code traps host SVE usage and lazily restores
  ZCR_EL2 to the host's maximum VL, which may be greater than the
  guest's maximum VL.

This can be the case between exiting a guest and kvm_arch_vcpu_put_fp().
If a softirq is taken during this period and the softirq handler tries
to use kernel-mode NEON, then the kernel will fail to save the guest's
FPSIMD/SVE state, and will pend a SIGKILL for the current thread.

This happens because kvm_arch_vcpu_ctxsync_fp() binds the guest's live
FPSIMD/SVE state with the guest's maximum SVE VL, and
fpsimd_save_user_state() verifies that the live SVE VL is as expected
before attempting to save the register state:

| if (WARN_ON(sve_get_vl() != vl)) {
|         force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
|         return;
| }

Fix this and make this a bit easier to reason about by always eagerly
switching ZCR_EL{1,2} at hyp during guest<->host transitions. With this
happening, there's no need to trap host SVE usage, and the nVHE/nVHE
__deactivate_cptr_traps() logic can be simplified to enable host access
to all present FPSIMD/SVE/SME features.

In protected nVHE/hVHE modes, the host's state is always saved/restored
by hyp, and the guest's state is saved prior to exit to the host, so
from the host's PoV the guest never has live FPSIMD/SVE/SME state, and
the host's ZCR_EL1 is never clobbered by hyp.

Bug: 411040189
Change-Id: Ifecd5024230fadd0b73755587950ba651b94dae0
Fixes: 8c8010d69c132273 ("KVM: arm64: Save/restore SVE state for nVHE")
Fixes: 2e3cf82063a00ea0 ("KVM: arm64: nv: Ensure correct VL is loaded before saving SVE state")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oliver.upton@linux.dev>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20250210195226.1215254-9-mark.rutland@arm.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
[ v6.6 lacks pKVM saving of host SVE state, pull in discovery of maximum
  host VL separately -- broonie ]
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/kvm/fpsimd.c                 | 30 +++++---------
 arch/arm64/kvm/hyp/entry.S              |  5 +++
 arch/arm64/kvm/hyp/include/hyp/switch.h | 55 +++++++++++++++++++++++++
 arch/arm64/kvm/hyp/nvhe/hyp-main.c      |  9 ++--
 arch/arm64/kvm/hyp/nvhe/switch.c        | 30 +++++++++-----
 arch/arm64/kvm/hyp/vhe/switch.c         |  4 ++
 arch/arm64/kvm/reset.c                  |  1 +
 7 files changed, 97 insertions(+), 37 deletions(-)

diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 5e7078d44713..ad716eadc4c8 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -118,26 +118,16 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
 	local_irq_save(flags);
 
 	if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
-		if (vcpu_has_sve(vcpu)) {
-			__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
-
-			/*
-			 * Restore the VL that was saved when bound to the CPU,
-			 * which is the maximum VL for the guest. Because
-			 * the layout of the data when saving the sve state
-			 * depends on the VL, we need to use a consistent VL.
-			 * Note that this means that at guest exit ZCR_EL1 is
-			 * not necessarily the same as on guest entry.
-			 *
-			 * Flushing the cpu state sets the TIF_FOREIGN_FPSTATE
-			 * bit for the context, which lets the kernel restore
-			 * the sve state, including ZCR_EL1 later.
-			 */
-			if (!has_vhe())
-				sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1,
-						       SYS_ZCR_EL1);
-		}
-
+		/*
+		 * Flush (save and invalidate) the fpsimd/sve state so that if
+		 * the host tries to use fpsimd/sve, it's not using stale data
+		 * from the guest.
+		 *
+		 * Flushing the state sets the TIF_FOREIGN_FPSTATE bit for the
+		 * context unconditionally, in both nVHE and VHE. This allows
+		 * the kernel to restore the fpsimd/sve state, including ZCR_EL1
+		 * when needed.
+		 */
 		fpsimd_save_and_flush_cpu_state();
 	}
 
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 435346ea1504..d8c94c45cb2f 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -44,6 +44,11 @@ alternative_if ARM64_HAS_RAS_EXTN
 alternative_else_nop_endif
 	mrs	x1, isr_el1
 	cbz	x1,  1f
+
+	// Ensure that __guest_enter() always provides a context
+	// synchronization event so that callers don't need ISBs for anything
+	// that would usually be synchonized by the ERET.
+	isb
 	mov	x0, #ARM_EXCEPTION_IRQ
 	ret
 
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index c1d4a74854e2..33f6af14ba3b 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -168,6 +168,61 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
 	write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
 }
 
+static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu)
+{
+	u64 zcr_el1, zcr_el2;
+
+	if (!guest_owns_fp_regs(vcpu))
+		return;
+
+	if (vcpu_has_sve(vcpu)) {
+		zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
+
+		write_sysreg_el2(zcr_el2, SYS_ZCR);
+
+		zcr_el1 = __vcpu_sys_reg(vcpu, ZCR_EL1);
+		write_sysreg_el1(zcr_el1, SYS_ZCR);
+	}
+}
+
+static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu)
+{
+	u64 zcr_el1, zcr_el2;
+
+	if (!guest_owns_fp_regs(vcpu))
+		return;
+
+	/*
+	 * When the guest owns the FP regs, we know that guest+hyp traps for
+	 * any FPSIMD/SVE/SME features exposed to the guest have been disabled
+	 * by either fpsimd_lazy_switch_to_guest() or kvm_hyp_handle_fpsimd()
+	 * prior to __guest_entry(). As __guest_entry() guarantees a context
+	 * synchronization event, we don't need an ISB here to avoid taking
+	 * traps for anything that was exposed to the guest.
+	 */
+	if (vcpu_has_sve(vcpu)) {
+		zcr_el1 = read_sysreg_el1(SYS_ZCR);
+		__vcpu_sys_reg(vcpu, ZCR_EL1) = zcr_el1;
+
+		/*
+		 * The guest's state is always saved using the guest's max VL.
+		 * Ensure that the host has the guest's max VL active such that
+		 * the host can save the guest's state lazily, but don't
+		 * artificially restrict the host to the guest's max VL.
+		 */
+		if (has_vhe()) {
+			zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
+			write_sysreg_el2(zcr_el2, SYS_ZCR);
+		} else {
+			zcr_el2 = sve_vq_from_vl(kvm_host_sve_max_vl) - 1;
+			write_sysreg_el2(zcr_el2, SYS_ZCR);
+
+			zcr_el1 = vcpu_sve_max_vq(vcpu) - 1;
+			write_sysreg_el1(zcr_el1, SYS_ZCR);
+		}
+	}
+}
+
 static void kvm_hyp_handle_fpsimd_host(struct kvm_vcpu *vcpu)
 {
 	/*
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 09bd468dfadb..1b5fdbfa6de8 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -7,6 +7,7 @@
 #include <kvm/arm_hypercalls.h>
 
 #include <hyp/adjust_pc.h>
+#include <hyp/switch.h>
 
 #include <asm/pgtable-types.h>
 #include <asm/kvm_asm.h>
@@ -847,7 +848,9 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
 		sync_hyp_vcpu(hyp_vcpu, ret);
 	} else {
 		/* The host is fully trusted, run its vCPU directly. */
+		fpsimd_lazy_switch_to_guest(kern_hyp_va(host_vcpu));
 		ret = __kvm_vcpu_run(host_vcpu);
+		fpsimd_lazy_switch_to_host(kern_hyp_va(host_vcpu));
 	}
 out:
 	cpu_reg(host_ctxt, 1) =  ret;
@@ -1395,12 +1398,6 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
 	case ESR_ELx_EC_SMC64:
 		handle_host_smc(host_ctxt);
 		break;
-	case ESR_ELx_EC_SVE:
-		BUG_ON(is_protected_kvm_enabled());
-		sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0);
-		isb();
-		sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
-		break;
 	case ESR_ELx_EC_IABT_LOW:
 	case ESR_ELx_EC_DABT_LOW:
 		handle_host_mem_abort(host_ctxt);
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 1fc46f7a99f8..a496258d42e6 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -40,6 +40,9 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
 {
 	u64 val = CPTR_EL2_TAM;	/* Same bit irrespective of E2H */
 
+	if (!guest_owns_fp_regs(vcpu))
+		__activate_traps_fpsimd32(vcpu);
+
 	/* !hVHE case upstream */
 	if (1) {
 		val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1;
@@ -55,12 +58,24 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
 
 		if (!guest_owns_fp_regs(vcpu))
 			val |= CPTR_EL2_TFP;
+
+		write_sysreg(val, cptr_el2);
 	}
+}
 
-	if (!guest_owns_fp_regs(vcpu))
-		__activate_traps_fpsimd32(vcpu);
+static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
+{
+	/* !hVHE case upstream */
+	if (1) {
+		u64 val = CPTR_NVHE_EL2_RES1;
 
-	write_sysreg(val, cptr_el2);
+		if (!cpus_have_final_cap(ARM64_SVE))
+			val |= CPTR_EL2_TZ;
+		if (!cpus_have_final_cap(ARM64_SME))
+			val |= CPTR_EL2_TSM;
+
+		write_sysreg(val, cptr_el2);
+	}
 }
 
 static void __activate_traps(struct kvm_vcpu *vcpu)
@@ -89,7 +104,6 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 static void __deactivate_traps(struct kvm_vcpu *vcpu)
 {
 	extern char __kvm_hyp_host_vector[];
-	u64 cptr;
 
 	___deactivate_traps(vcpu);
 
@@ -114,13 +128,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
 
 	write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
 
-	cptr = CPTR_EL2_DEFAULT;
-	if (vcpu_has_sve(vcpu) && (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED))
-		cptr |= CPTR_EL2_TZ;
-	if (cpus_have_final_cap(ARM64_SME))
-		cptr &= ~CPTR_EL2_TSM;
-
-	write_sysreg(cptr, cptr_el2);
+	__deactivate_cptr_traps(vcpu);
 	write_sysreg(__kvm_hyp_host_vector, vbar_el2);
 }
 
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index f24569ac26c2..179152bb9e42 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -134,6 +134,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 
 	sysreg_save_host_state_vhe(host_ctxt);
 
+	fpsimd_lazy_switch_to_guest(vcpu);
+
 	/*
 	 * ARM erratum 1165522 requires us to configure both stage 1 and
 	 * stage 2 translation for the guest context before we clear
@@ -164,6 +166,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 
 	__deactivate_traps(vcpu);
 
+	fpsimd_lazy_switch_to_host(vcpu);
+
 	sysreg_restore_host_state_vhe(host_ctxt);
 
 	if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 4b80f4e2b438..324b3338ab6b 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -40,6 +40,7 @@ int kvm_arm_init_sve(void)
 	if (system_supports_sve()) {
 		kvm_sve_max_vl = sve_max_virtualisable_vl();
 		kvm_host_sve_max_vl = sve_max_vl();
+		kvm_nvhe_sym(kvm_host_sve_max_vl) = kvm_host_sve_max_vl;
 
 		/*
 		 * The get_sve_reg()/set_sve_reg() ioctl interface will need

From 5b71d364254c468f68c6db5f449f8fcef42cb425 Mon Sep 17 00:00:00 2001
From: zhanghao56 <zhanghao56@xiaomi.corp-partner.google.com>
Date: Tue, 13 May 2025 16:23:13 +0800
Subject: [PATCH 42/44] ANDROID: binder: fix minimum node priority comparison

The "desired" priority for a transaction can be adjusted depending on
various factors. For instance, it might be set to SCHED_NORMAL 120, when
the caller is RT and the target node has !inherit_rt.

However, instead of using these adjustments, the existing logic compares
the minimum node priority against the original transaction priority.
If the transaction priority is "higher", then the minimum node priority
is ignored. This is particularly a problem when the "desired" priority
has been changed to SCHED_NORMAL.

This patch corrects the logic, comparing the minimum node priority
against the (potentially adjusted) "desired" priority. This guarantees
that the node's minimum priority is honored.

Bug: 417382411
Cc: Martijn Coenen <maco@google.com>
Fixes: c46810c23565 ("ANDROID: binder: add RT inheritance flag to node.")
Change-Id: I813073241b996c1c38c29f20849b247023697102
Signed-off-by: zhanghao56 <zhanghao56@xiaomi.corp-partner.google.com>
Signed-off-by: Carlos Llamas <cmllamas@google.com>
---
 drivers/android/binder.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 7811ced3ac50..877a7e357c8b 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -842,8 +842,8 @@ static void binder_transaction_priority(struct binder_thread *thread,
 		desired.sched_policy = SCHED_NORMAL;
 	}
 
-	if (node_prio.prio < t->priority.prio ||
-	    (node_prio.prio == t->priority.prio &&
+	if (node_prio.prio < desired.prio ||
+	    (node_prio.prio == desired.prio &&
 	     node_prio.sched_policy == SCHED_FIFO)) {
 		/*
 		 * In case the minimum priority on the node is

From ed6999107ec6d6a8696b201f5cc71217e2f62a87 Mon Sep 17 00:00:00 2001
From: Chungkai Mei <chungkai@google.com>
Date: Mon, 19 May 2025 03:12:34 +0000
Subject: [PATCH 43/44] ANDROID: vendor_hook: add
 trace_android_rvh_setscheduler_prio

To modify priority of specific tasks, add the vendor hook in __setscheduler_prio

Bug: 409176857
Change-Id: Id5a2309378f1a8c3ecc1de71c20f44f73b3f7557
Signed-off-by: Chungkai Mei <chungkai@google.com>
---
 include/trace/hooks/sched.h | 4 ++++
 kernel/sched/core.c         | 1 +
 kernel/sched/vendor_hooks.c | 1 +
 3 files changed, 6 insertions(+)

diff --git a/include/trace/hooks/sched.h b/include/trace/hooks/sched.h
index ce26b58b9e3c..af49cae410e6 100644
--- a/include/trace/hooks/sched.h
+++ b/include/trace/hooks/sched.h
@@ -84,6 +84,10 @@ DECLARE_RESTRICTED_HOOK(android_rvh_setscheduler,
 	TP_PROTO(struct task_struct *p),
 	TP_ARGS(p), 1);
 
+DECLARE_RESTRICTED_HOOK(android_rvh_setscheduler_prio,
+	TP_PROTO(struct task_struct *p),
+	TP_ARGS(p), 1);
+
 struct sched_group;
 DECLARE_RESTRICTED_HOOK(android_rvh_find_busiest_group,
 	TP_PROTO(struct sched_group *busiest, struct rq *dst_rq, int *out_balance),
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 64a231649cce..2174dd3ffb2a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7085,6 +7085,7 @@ static void __setscheduler_prio(struct task_struct *p, int prio)
 		p->sched_class = &fair_sched_class;
 
 	p->prio = prio;
+	trace_android_rvh_setscheduler_prio(p);
 }
 
 #ifdef CONFIG_RT_MUTEXES
diff --git a/kernel/sched/vendor_hooks.c b/kernel/sched/vendor_hooks.c
index 46a67d9b6344..89a4a4174cde 100644
--- a/kernel/sched/vendor_hooks.c
+++ b/kernel/sched/vendor_hooks.c
@@ -27,6 +27,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_rtmutex_prepare_setprio);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_set_user_nice);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_set_user_nice_locked);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_setscheduler);
+EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_setscheduler_prio);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_find_busiest_group);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_dump_throttled_rt_tasks);
 EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_jiffies_update);

From 3c6d0251e1fb722e884184d964421fe6f0586534 Mon Sep 17 00:00:00 2001
From: Chungkai Mei <chungkai@google.com>
Date: Tue, 20 May 2025 03:59:59 +0000
Subject: [PATCH 44/44] ANDROID: ABI: Update pixel symbol list

Adding the following symbols:
  - param_ops_ullong
  - __traceiter_android_rvh_setscheduler_prio
  - __tracepoint_android_rvh_setscheduler_prio
  - usb_gadget_connect
  - usb_gadget_disconnect

Bug: 409176857
Change-Id: I026c6a80ef4c31577bb2fc28b0b3d9e2e709a200
Signed-off-by: Chungkai Mei <chungkai@google.com>
---
 android/abi_gki_aarch64.stg   | 20 ++++++++++++++++++++
 android/abi_gki_aarch64_pixel |  9 +++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg
index 24898f7746c4..f149d8523964 100644
--- a/android/abi_gki_aarch64.stg
+++ b/android/abi_gki_aarch64.stg
@@ -347330,6 +347330,15 @@ elf_symbol {
   type_id: 0x9bdbdcc4
   full_name: "__traceiter_android_rvh_setscheduler"
 }
+elf_symbol {
+  id: 0x1228e7e9
+  name: "__traceiter_android_rvh_setscheduler_prio"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x116cab3c
+  type_id: 0x9bdbdcc4
+  full_name: "__traceiter_android_rvh_setscheduler_prio"
+}
 elf_symbol {
   id: 0x73c83ef4
   name: "__traceiter_android_rvh_shmem_get_folio"
@@ -352451,6 +352460,15 @@ elf_symbol {
   type_id: 0x18ccbd2c
   full_name: "__tracepoint_android_rvh_setscheduler"
 }
+elf_symbol {
+  id: 0x8a4070f7
+  name: "__tracepoint_android_rvh_setscheduler_prio"
+  is_defined: true
+  symbol_type: OBJECT
+  crc: 0xa79bc306
+  type_id: 0x18ccbd2c
+  full_name: "__tracepoint_android_rvh_setscheduler_prio"
+}
 elf_symbol {
   id: 0x00b7ed82
   name: "__tracepoint_android_rvh_shmem_get_folio"
@@ -419162,6 +419180,7 @@ interface {
   symbol_id: 0x9b0cc890
   symbol_id: 0x559e0725
   symbol_id: 0xa01b20ce
+  symbol_id: 0x1228e7e9
   symbol_id: 0x73c83ef4
   symbol_id: 0x46515de8
   symbol_id: 0x955e6fc1
@@ -419731,6 +419750,7 @@ interface {
   symbol_id: 0x42fff08e
   symbol_id: 0x74f29f73
   symbol_id: 0xe48123a4
+  symbol_id: 0x8a4070f7
   symbol_id: 0x00b7ed82
   symbol_id: 0xe8cacf26
   symbol_id: 0xad588d93
diff --git a/android/abi_gki_aarch64_pixel b/android/abi_gki_aarch64_pixel
index 5b8c7b54bde4..4678f40d85ef 100644
--- a/android/abi_gki_aarch64_pixel
+++ b/android/abi_gki_aarch64_pixel
@@ -1607,6 +1607,7 @@
   param_ops_long
   param_ops_string
   param_ops_uint
+  param_ops_ullong
   param_ops_ulong
   param_set_copystring
   param_set_int
@@ -2392,6 +2393,7 @@
   __traceiter_android_rvh_set_cpus_allowed_by_task
   __traceiter_android_rvh_set_iowait
   __traceiter_android_rvh_setscheduler
+  __traceiter_android_rvh_setscheduler_prio
   __traceiter_android_rvh_set_task_cpu
   __traceiter_android_rvh_set_user_nice
   __traceiter_android_rvh_set_user_nice_locked
@@ -2417,6 +2419,7 @@
   __traceiter_android_vh_binder_proc_transaction_finish
   __traceiter_android_vh_binder_restore_priority
   __traceiter_android_vh_binder_set_priority
+  __traceiter_android_vh_calculate_totalreserve_pages
   __traceiter_android_vh_cpu_idle_enter
   __traceiter_android_vh_cpu_idle_exit
   __traceiter_android_vh_dump_throttled_rt_tasks
@@ -2460,7 +2463,6 @@
   __traceiter_android_vh_usb_dev_resume
   __traceiter_android_vh_use_amu_fie
   __traceiter_android_vh_vmscan_kswapd_done
-  __traceiter_android_vh_calculate_totalreserve_pages
   __traceiter_clock_set_rate
   __traceiter_cma_alloc_finish
   __traceiter_cma_alloc_start
@@ -2533,6 +2535,7 @@
   __tracepoint_android_rvh_set_cpus_allowed_by_task
   __tracepoint_android_rvh_set_iowait
   __tracepoint_android_rvh_setscheduler
+  __tracepoint_android_rvh_setscheduler_prio
   __tracepoint_android_rvh_set_task_cpu
   __tracepoint_android_rvh_set_user_nice
   __tracepoint_android_rvh_set_user_nice_locked
@@ -2558,6 +2561,7 @@
   __tracepoint_android_vh_binder_proc_transaction_finish
   __tracepoint_android_vh_binder_restore_priority
   __tracepoint_android_vh_binder_set_priority
+  __tracepoint_android_vh_calculate_totalreserve_pages
   __tracepoint_android_vh_cpu_idle_enter
   __tracepoint_android_vh_cpu_idle_exit
   __tracepoint_android_vh_dump_throttled_rt_tasks
@@ -2601,7 +2605,6 @@
   __tracepoint_android_vh_usb_dev_resume
   __tracepoint_android_vh_use_amu_fie
   __tracepoint_android_vh_vmscan_kswapd_done
-  __tracepoint_android_vh_calculate_totalreserve_pages
   __tracepoint_clock_set_rate
   __tracepoint_cma_alloc_finish
   __tracepoint_cma_alloc_start
@@ -2737,7 +2740,9 @@
   usb_function_register
   usb_function_unregister
   usb_gadget_activate
+  usb_gadget_connect
   usb_gadget_deactivate
+  usb_gadget_disconnect
   usb_gadget_set_state
   usb_gstrings_attach
   usb_hcd_is_primary_hcd