From f56ddffe05c0ed9c95dad3accaff1be1cd0d6208 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 12 Jun 2023 13:33:07 -0700
Subject: [PATCH 01/98] ANDROID: block: Use pr_info() instead of
 printk(KERN_INFO ...)

Switch to the modern style of printing kernel messages. Use %u instead
of %d to print unsigned integers.

The pr_fmt() format is added on top of the file to include __func__
in the pr_info() calls.

Bug: 308663717
Bug: 319125789
Change-Id: Iea0a19c8221fe1a2fcd3f26c5ffd0c3b69935eec
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
[jyescas@google.com: define pr_fmt(fmt) to include __func__ in the output]
Signed-off-by: Juan Yescas <jyescas@google.com>
---
 block/blk-settings.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 73a80895e3ae..9e757812ad02 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -2,6 +2,9 @@
 /*
  * Functions related to setting various queue properties from drivers
  */
+
+#define pr_fmt(fmt)  "%s: " fmt, __func__
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -127,8 +130,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
 
 	if ((max_hw_sectors << 9) < PAGE_SIZE) {
 		max_hw_sectors = 1 << (PAGE_SHIFT - 9);
-		printk(KERN_INFO "%s: set to minimum %d\n",
-		       __func__, max_hw_sectors);
+		pr_info("set to minimum %u\n", max_hw_sectors);
 	}
 
 	max_hw_sectors = round_down(max_hw_sectors,
@@ -243,8 +245,7 @@ void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments
 {
 	if (!max_segments) {
 		max_segments = 1;
-		printk(KERN_INFO "%s: set to minimum %d\n",
-		       __func__, max_segments);
+		pr_info("set to minimum %u\n", max_segments);
 	}
 
 	q->limits.max_segments = max_segments;
@@ -280,8 +281,7 @@ void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
 {
 	if (max_size < PAGE_SIZE) {
 		max_size = PAGE_SIZE;
-		printk(KERN_INFO "%s: set to minimum %d\n",
-		       __func__, max_size);
+		pr_info("set to minimum %u\n", max_size);
 	}
 
 	/* see blk_queue_virt_boundary() for the explanation */
@@ -701,8 +701,7 @@ void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask)
 {
 	if (mask < PAGE_SIZE - 1) {
 		mask = PAGE_SIZE - 1;
-		printk(KERN_INFO "%s: set to minimum %lx\n",
-		       __func__, mask);
+		pr_info("set to minimum %lx\n", mask);
 	}
 
 	q->limits.seg_boundary_mask = mask;

From 025c278e84315c1b1b7ecb99a2a419c7a949c0cc Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 12 Jun 2023 13:33:08 -0700
Subject: [PATCH 02/98] ANDROID: block: Prepare for supporting sub-page limits

Introduce variables that represent the lower configuration bounds. This
patch does not change any functionality.

Bug: 308663717
Bug: 319125789
Change-Id: Ia39fbe29a0711caba841a7b44d462608216841ec
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Juan Yescas <jyescas@google.com>
---
 block/blk-settings.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 9e757812ad02..536fa40d1331 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -126,10 +126,11 @@ EXPORT_SYMBOL(blk_queue_bounce_limit);
 void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
 {
 	struct queue_limits *limits = &q->limits;
+	unsigned int min_max_hw_sectors = PAGE_SIZE >> SECTOR_SHIFT;
 	unsigned int max_sectors;
 
-	if ((max_hw_sectors << 9) < PAGE_SIZE) {
-		max_hw_sectors = 1 << (PAGE_SHIFT - 9);
+	if (max_hw_sectors < min_max_hw_sectors) {
+		max_hw_sectors = min_max_hw_sectors;
 		pr_info("set to minimum %u\n", max_hw_sectors);
 	}
 
@@ -279,8 +280,10 @@ EXPORT_SYMBOL_GPL(blk_queue_max_discard_segments);
  **/
 void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
 {
-	if (max_size < PAGE_SIZE) {
-		max_size = PAGE_SIZE;
+	unsigned int min_max_segment_size = PAGE_SIZE;
+
+	if (max_size < min_max_segment_size) {
+		max_size = min_max_segment_size;
 		pr_info("set to minimum %u\n", max_size);
 	}
 

From 3f6018f1b624c48f3a4d29dbe4ad1390901abaf7 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 12 Jun 2023 13:33:09 -0700
Subject: [PATCH 03/98] ANDROID: block: Support configuring limits below the
 page size

Allow block drivers to configure the following:
* Maximum number of hardware sectors values smaller than
  PAGE_SIZE >> SECTOR_SHIFT. For PAGE_SIZE = 4096 this means that values
  below 8 become supported.
* A maximum segment size below the page size. This is most useful
  for page sizes above 4096 bytes.

The blk_sub_page_segments static branch will be used in later patches to
prevent that performance of block drivers that support segments >=
PAGE_SIZE and max_hw_sectors >= PAGE_SIZE >> SECTOR_SHIFT would be affected.

This patch may change the behavior of existing block drivers from not
working into working. If a block driver calls
blk_queue_max_hw_sectors() or blk_queue_max_segment_size(), this is
usually done to configure the maximum supported limits. An attempt to
configure a limit below what is supported by the block layer causes the
block layer to select a larger value. If that value is not supported by
the block driver, this may cause other data to be transferred than
requested, a kernel crash or other undesirable behavior.

Keeps the ABI stable by taking advantage of a hole in the structure!

Bug: 308663717
Bug: 319125789
Bug: 324152549
Change-Id: I4a7b605f0f0d82dde0b4703496c7314064f48acb
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
[jyescas@google.com: disable subpage limits in block/blk-sysfs.c
                     instead block/blk-core.c because the function
                     blk_free_queue() is not defined in 5.15 kernel]
Signed-off-by: Juan Yescas <jyescas@google.com>
---
 android/abi_gki_aarch64.stg |  7 +++++
 block/blk-settings.c        | 60 +++++++++++++++++++++++++++++++++++++
 block/blk-sysfs.c           |  2 ++
 block/blk.h                 |  9 ++++++
 include/linux/blkdev.h      |  5 ++++
 5 files changed, 83 insertions(+)

diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg
index d6a079dc565b..1ad3e32c0402 100644
--- a/android/abi_gki_aarch64.stg
+++ b/android/abi_gki_aarch64.stg
@@ -168781,6 +168781,12 @@ member {
   type_id: 0xa7c362b0
   offset: 1088
 }
+member {
+  id: 0x4c1b044f
+  name: "sub_page_limits"
+  type_id: 0x6d7f5ff6
+  offset: 840
+}
 member {
   id: 0xedd64f59
   name: "sub_reg_offsets"
@@ -227137,6 +227143,7 @@ struct_union {
     member_id: 0x06473753
     member_id: 0x1bdd5453
     member_id: 0x26582f94
+    member_id: 0x4c1b044f
     member_id: 0xaf3e33dd
     member_id: 0x2d081f94
     member_id: 0xd671ce1e
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 536fa40d1331..e416616bfc5a 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -21,6 +21,11 @@
 #include "blk.h"
 #include "blk-wbt.h"
 
+/* Protects blk_nr_sub_page_limit_queues and blk_sub_page_limits changes. */
+static DEFINE_MUTEX(blk_sub_page_limit_lock);
+static uint32_t blk_nr_sub_page_limit_queues;
+DEFINE_STATIC_KEY_FALSE(blk_sub_page_limits);
+
 void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
 {
 	q->rq_timeout = timeout;
@@ -60,6 +65,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->misaligned = 0;
 	lim->zoned = BLK_ZONED_NONE;
 	lim->zone_write_granularity = 0;
+	lim->sub_page_limits = false;
 }
 EXPORT_SYMBOL(blk_set_default_limits);
 
@@ -104,6 +110,50 @@ void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce bounce)
 }
 EXPORT_SYMBOL(blk_queue_bounce_limit);
 
+/**
+ * blk_enable_sub_page_limits - enable support for limits below the page size
+ * @lim: request queue limits for which to enable support of these features.
+ *
+ * Enable support for max_segment_size values smaller than PAGE_SIZE and for
+ * max_hw_sectors values below PAGE_SIZE >> SECTOR_SHIFT. Support for these
+ * features is not enabled all the time because of the runtime overhead of these
+ * features.
+ */
+static void blk_enable_sub_page_limits(struct queue_limits *lim)
+{
+	if (lim->sub_page_limits)
+		return;
+
+	lim->sub_page_limits = true;
+
+	mutex_lock(&blk_sub_page_limit_lock);
+	if (++blk_nr_sub_page_limit_queues == 1)
+		static_branch_enable(&blk_sub_page_limits);
+	mutex_unlock(&blk_sub_page_limit_lock);
+}
+
+/**
+ * blk_disable_sub_page_limits - disable support for limits below the page size
+ * @lim: request queue limits for which to enable support of these features.
+ *
+ * max_segment_size values smaller than PAGE_SIZE and for max_hw_sectors values
+ * below PAGE_SIZE >> SECTOR_SHIFT. Support for these features is not enabled
+ * all the time because of the runtime overhead of these features.
+ */
+void blk_disable_sub_page_limits(struct queue_limits *lim)
+{
+	if (!lim->sub_page_limits)
+		return;
+
+	lim->sub_page_limits = false;
+
+	mutex_lock(&blk_sub_page_limit_lock);
+	WARN_ON_ONCE(blk_nr_sub_page_limit_queues <= 0);
+	if (--blk_nr_sub_page_limit_queues == 0)
+		static_branch_disable(&blk_sub_page_limits);
+	mutex_unlock(&blk_sub_page_limit_lock);
+}
+
 /**
  * blk_queue_max_hw_sectors - set max sectors for a request for this queue
  * @q:  the request queue for the device
@@ -129,6 +179,11 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
 	unsigned int min_max_hw_sectors = PAGE_SIZE >> SECTOR_SHIFT;
 	unsigned int max_sectors;
 
+	if (max_hw_sectors < min_max_hw_sectors) {
+		blk_enable_sub_page_limits(limits);
+		min_max_hw_sectors = 1;
+	}
+
 	if (max_hw_sectors < min_max_hw_sectors) {
 		max_hw_sectors = min_max_hw_sectors;
 		pr_info("set to minimum %u\n", max_hw_sectors);
@@ -282,6 +337,11 @@ void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
 {
 	unsigned int min_max_segment_size = PAGE_SIZE;
 
+	if (max_size < min_max_segment_size) {
+		blk_enable_sub_page_limits(&q->limits);
+		min_max_segment_size = SECTOR_SIZE;
+	}
+
 	if (max_size < min_max_segment_size) {
 		max_size = min_max_segment_size;
 		pr_info("set to minimum %u\n", max_size);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 5a47708ff233..97d31d539ad7 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -811,6 +811,8 @@ static void blk_release_queue(struct kobject *kobj)
 
 	blk_queue_free_zone_bitmaps(q);
 
+	blk_disable_sub_page_limits(&q->limits);
+
 	if (queue_is_mq(q))
 		blk_mq_release(q);
 
diff --git a/block/blk.h b/block/blk.h
index f10c848d91a3..fa606c0bb7aa 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -16,6 +16,7 @@
 #define BLK_MAX_TIMEOUT		(5 * HZ)
 
 extern struct dentry *blk_debugfs_root;
+DECLARE_STATIC_KEY_FALSE(blk_sub_page_limits);
 
 struct internal_request_queue {
 	struct request_queue	q;
@@ -61,6 +62,14 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
 					      gfp_t flags);
 void blk_free_flush_queue(struct blk_flush_queue *q);
 
+static inline bool blk_queue_sub_page_limits(const struct queue_limits *lim)
+{
+	return static_branch_unlikely(&blk_sub_page_limits) &&
+		lim->sub_page_limits;
+}
+
+void blk_disable_sub_page_limits(struct queue_limits *q);
+
 void blk_freeze_queue(struct request_queue *q);
 void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
 void blk_queue_start_drain(struct request_queue *q);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5f771925d439..f8198834edec 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -328,6 +328,11 @@ struct queue_limits {
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
 	unsigned char		raid_partial_stripes_expensive;
+
+#ifndef __GENKSYMS__
+	bool			sub_page_limits;
+#endif
+
 	enum blk_zoned_model	zoned;
 
 	ANDROID_KABI_RESERVE(1);

From e99e7de8a6e4fb0c1b51ff1abc6f9042a4c7ae74 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 12 Jun 2023 13:33:11 -0700
Subject: [PATCH 04/98] ANDROID: block: Support submitting passthrough requests
 with small segments

If the segment size is smaller than the page size there may be multiple
segments per bvec even if a bvec only contains a single page. Hence this
patch.

Bug: 308663717
Bug: 319125789
Change-Id: I446aab83a2c519cb3c42d5d8ffd814dcc34274d2
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Juan Yescas <jyescas@google.com>
---
 block/blk-map.c |  2 +-
 block/blk.h     | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/block/blk-map.c b/block/blk-map.c
index c7f71d83eff1..e08e79142d24 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -486,7 +486,7 @@ int blk_rq_append_bio(struct request *rq, struct bio *bio)
 	unsigned int nr_segs = 0;
 
 	bio_for_each_bvec(bv, bio, iter)
-		nr_segs++;
+		nr_segs += blk_segments(&rq->q->limits, bv.bv_len);
 
 	if (!rq->bio) {
 		blk_rq_bio_prep(rq, bio, nr_segs);
diff --git a/block/blk.h b/block/blk.h
index fa606c0bb7aa..3f9e259d13f6 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -79,6 +79,24 @@ struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
 		gfp_t gfp_mask);
 void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs);
 
+/* Number of DMA segments required to transfer @bytes data. */
+static inline unsigned int blk_segments(const struct queue_limits *limits,
+					unsigned int bytes)
+{
+	if (!blk_queue_sub_page_limits(limits))
+		return 1;
+
+	{
+		const unsigned int mss = limits->max_segment_size;
+
+		if (bytes <= mss)
+			return 1;
+		if (is_power_of_2(mss))
+			return round_up(bytes, mss) >> ilog2(mss);
+		return (bytes + mss - 1) / mss;
+	}
+}
+
 static inline bool biovec_phys_mergeable(struct request_queue *q,
 		struct bio_vec *vec1, struct bio_vec *vec2)
 {

From bed88e7c4f1b6d86e76c64ca8eed6b0ce8a98e0d Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 12 Jun 2023 13:33:12 -0700
Subject: [PATCH 05/98] ANDROID: block: Add support for filesystem requests and
 small segments

Add support in the bio splitting code and also in the bio submission code
for bios with segments smaller than the page size.

Bug: 308663717
Bug: 319125789
Change-Id: Iea511675ad96b0c8255c2b87811ad33c3a02c8fa
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
[jyescas@google.com: the function bio_may_exceed_limits() does not
                     exists in the 5.15 kernel, so the sub page limit
                     code in block/blk.h was moved to the function
                     __blk_queue_split() in block/blk-merge.c]
Signed-off-by: Juan Yescas <jyescas@google.com>
---
 block/blk-merge.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 1affc5fd35f0..79bfae7ff221 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -265,7 +265,8 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 		if (nsegs < max_segs &&
 		    sectors + (bv.bv_len >> 9) <= max_sectors &&
 		    bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
-			nsegs++;
+			/* single-page bvec optimization */
+			nsegs += blk_segments(&q->limits, bv.bv_len);
 			sectors += bv.bv_len >> 9;
 		} else if (bvec_split_segs(q, &bv, &nsegs, &sectors, max_segs,
 					 max_sectors)) {
@@ -333,18 +334,17 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
 		break;
 	default:
 		/*
-		 * All drivers must accept single-segments bios that are <=
-		 * PAGE_SIZE.  This is a quick and dirty check that relies on
-		 * the fact that bi_io_vec[0] is always valid if a bio has data.
-		 * The check might lead to occasional false negatives when bios
-		 * are cloned, but compared to the performance impact of cloned
-		 * bios themselves the loop below doesn't matter anyway.
+		 * Check whether bio splitting should be performed. This check may
+		 * trigger the bio splitting code even if splitting is not necessary.
 		 */
 		if (!q->limits.chunk_sectors &&
 		    (*bio)->bi_vcnt == 1 &&
+			(!blk_queue_sub_page_limits(&q->limits) ||
+			 (*bio)->bi_io_vec->bv_len <= q->limits.max_segment_size) &&
 		    ((*bio)->bi_io_vec[0].bv_len +
 		     (*bio)->bi_io_vec[0].bv_offset) <= PAGE_SIZE) {
-			*nr_segs = 1;
+			*nr_segs = blk_segments(&q->limits,
+						(*bio)->bi_io_vec[0].bv_len);
 			break;
 		}
 		split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
@@ -519,7 +519,10 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
 			    __blk_segment_map_sg_merge(q, &bvec, &bvprv, sg))
 				goto next_bvec;
 
-			if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE)
+			if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE &&
+			    (!blk_queue_sub_page_limits(&q->limits) ||
+			     bvec.bv_len <= q->limits.max_segment_size))
+				/* single-segment bvec optimization */
 				nsegs += __blk_bvec_map_sg(bvec, sglist, sg);
 			else
 				nsegs += blk_bvec_map_sg(q, &bvec, sglist, sg);

From 3ef8e9009c277711444a1ed95cc93e4a45ddb5ef Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 12 Jun 2023 13:33:10 -0700
Subject: [PATCH 06/98] ANDROID: block: Make sub_page_limit_queues available in
 debugfs

This new debugfs attribute makes it easier to verify the code that tracks
how many queues require limits below the page size.

Bug: 308663717
Bug: 319125789
Change-Id: I855113cd3898f9641c7c3f1b4732bf4990fc7d3d
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
[jyescas@google.com: Wrap #include "blk-mq-debugfs.h" with
                     #ifndef __GENKSYSM__ to avoid ABI CRC
                     changes.]
Signed-off-by: Juan Yescas <jyescas@google.com>
---
 block/blk-core.c       | 5 +++++
 block/blk-mq-debugfs.c | 9 +++++++++
 block/blk-mq-debugfs.h | 6 ++++++
 block/blk-settings.c   | 8 ++++++++
 block/blk.h            | 1 +
 5 files changed, 29 insertions(+)

diff --git a/block/blk-core.c b/block/blk-core.c
index 47667004fdf0..195ac33f19f6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -46,7 +46,11 @@
 #include <trace/events/block.h>
 
 #include "blk.h"
+
 #include "blk-mq.h"
+#ifndef __GENKSYMS__
+#include "blk-mq-debugfs.h"
+#endif
 #include "blk-mq-sched.h"
 #include "blk-pm.h"
 #ifndef __GENKSYMS__
@@ -1786,6 +1790,7 @@ int __init blk_dev_init(void)
 		sizeof(struct internal_request_queue), 0, SLAB_PANIC, NULL);
 
 	blk_debugfs_root = debugfs_create_dir("block", NULL);
+	blk_mq_debugfs_init();
 
 	return 0;
 }
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 7023257a133d..5a5cfae86073 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -1013,3 +1013,12 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx)
 	debugfs_remove_recursive(hctx->sched_debugfs_dir);
 	hctx->sched_debugfs_dir = NULL;
 }
+
+DEFINE_DEBUGFS_ATTRIBUTE(blk_sub_page_limit_queues_fops,
+			blk_sub_page_limit_queues_get, NULL, "%llu\n");
+
+void blk_mq_debugfs_init(void)
+{
+	debugfs_create_file("sub_page_limit_queues", 0400, blk_debugfs_root,
+			    NULL, &blk_sub_page_limit_queues_fops);
+}
diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
index a68aa6041a10..997cc17a692a 100644
--- a/block/blk-mq-debugfs.h
+++ b/block/blk-mq-debugfs.h
@@ -15,6 +15,8 @@ struct blk_mq_debugfs_attr {
 	const struct seq_operations *seq_ops;
 };
 
+void blk_mq_debugfs_init(void);
+
 int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq);
 int blk_mq_debugfs_rq_show(struct seq_file *m, void *v);
 
@@ -36,6 +38,10 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos);
 void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos);
 void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q);
 #else
+static inline void blk_mq_debugfs_init(void)
+{
+}
+
 static inline void blk_mq_debugfs_register(struct request_queue *q)
 {
 }
diff --git a/block/blk-settings.c b/block/blk-settings.c
index e416616bfc5a..932c82e77cf8 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -110,6 +110,14 @@ void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce bounce)
 }
 EXPORT_SYMBOL(blk_queue_bounce_limit);
 
+/* For debugfs. */
+int blk_sub_page_limit_queues_get(void *data, u64 *val)
+{
+	*val = READ_ONCE(blk_nr_sub_page_limit_queues);
+
+	return 0;
+}
+
 /**
  * blk_enable_sub_page_limits - enable support for limits below the page size
  * @lim: request queue limits for which to enable support of these features.
diff --git a/block/blk.h b/block/blk.h
index 3f9e259d13f6..ec3713bdfed2 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -68,6 +68,7 @@ static inline bool blk_queue_sub_page_limits(const struct queue_limits *lim)
 		lim->sub_page_limits;
 }
 
+int blk_sub_page_limit_queues_get(void *data, u64 *val);
 void blk_disable_sub_page_limits(struct queue_limits *q);
 
 void blk_freeze_queue(struct request_queue *q);

From 0ffd03e67d6dab338fd8c13a916acd1936353925 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 12 Jun 2023 13:33:13 -0700
Subject: [PATCH 07/98] ANDROID: scsi_debug: Support configuring the maximum
 segment size

Add a kernel module parameter for configuring the maximum segment size.
This patch enables testing SCSI support for segments smaller than the
page size.

Bug: 308663717
Bug: 319125789
Change-Id: Ib645dead4e10d4aaf5a1dfc1d064038153737bc1
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Juan Yescas <jyescas@google.com>
---
 drivers/scsi/scsi_debug.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index e4f6bb3470e5..c8e4479b2144 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -752,6 +752,7 @@ static int sdebug_host_max_queue;	/* per host */
 static int sdebug_lowest_aligned = DEF_LOWEST_ALIGNED;
 static int sdebug_max_luns = DEF_MAX_LUNS;
 static int sdebug_max_queue = SDEBUG_CANQUEUE;	/* per submit queue */
+static unsigned int sdebug_max_segment_size = BLK_MAX_SEGMENT_SIZE;
 static unsigned int sdebug_medium_error_start = OPT_MEDIUM_ERR_ADDR;
 static int sdebug_medium_error_count = OPT_MEDIUM_ERR_NUM;
 static atomic_t retired_max_queue;	/* if > 0 then was prior max_queue */
@@ -5775,6 +5776,7 @@ module_param_named(lowest_aligned, sdebug_lowest_aligned, int, S_IRUGO);
 module_param_named(lun_format, sdebug_lun_am_i, int, S_IRUGO | S_IWUSR);
 module_param_named(max_luns, sdebug_max_luns, int, S_IRUGO | S_IWUSR);
 module_param_named(max_queue, sdebug_max_queue, int, S_IRUGO | S_IWUSR);
+module_param_named(max_segment_size, sdebug_max_segment_size, uint, S_IRUGO);
 module_param_named(medium_error_count, sdebug_medium_error_count, int,
 		   S_IRUGO | S_IWUSR);
 module_param_named(medium_error_start, sdebug_medium_error_start, int,
@@ -5851,6 +5853,7 @@ MODULE_PARM_DESC(lowest_aligned, "lowest aligned lba (def=0)");
 MODULE_PARM_DESC(lun_format, "LUN format: 0->peripheral (def); 1 --> flat address method");
 MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)");
 MODULE_PARM_DESC(max_queue, "max number of queued commands (1 to max(def))");
+MODULE_PARM_DESC(max_segment_size, "max bytes in a single segment");
 MODULE_PARM_DESC(medium_error_count, "count of sectors to return follow on MEDIUM error");
 MODULE_PARM_DESC(medium_error_start, "starting sector number to return MEDIUM error");
 MODULE_PARM_DESC(ndelay, "response delay in nanoseconds (def=0 -> ignore)");
@@ -7725,6 +7728,7 @@ static int sdebug_driver_probe(struct device *dev)
 
 	sdebug_driver_template.can_queue = sdebug_max_queue;
 	sdebug_driver_template.cmd_per_lun = sdebug_max_queue;
+	sdebug_driver_template.max_segment_size = sdebug_max_segment_size;
 	if (!sdebug_clustering)
 		sdebug_driver_template.dma_boundary = PAGE_SIZE - 1;
 

From ff1e211db631c81704bbf0cb1cb2de697b305a4f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 12 Jun 2023 13:33:14 -0700
Subject: [PATCH 08/98] ANDROID: null_blk: Support configuring the maximum
 segment size

Add support for configuring the maximum segment size.

Add support for segments smaller than the page size.

This patch enables testing segments smaller than the page size with a
driver that does not call blk_rq_map_sg().

Bug: 308663717
Bug: 319125789
Change-Id: I74165d83e71201116378c4598c2f9a2ff8c8b623
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Juan Yescas <jyescas@google.com>
---
 drivers/block/null_blk/main.c     | 18 +++++++++++++++---
 drivers/block/null_blk/null_blk.h |  1 +
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 344e3859bb1e..0d16bd6de5b7 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -156,6 +156,10 @@ static int g_max_sectors;
 module_param_named(max_sectors, g_max_sectors, int, 0444);
 MODULE_PARM_DESC(max_sectors, "Maximum size of a command (in 512B sectors)");
 
+static unsigned int g_max_segment_size = BLK_MAX_SEGMENT_SIZE;
+module_param_named(max_segment_size, g_max_segment_size, int, 0444);
+MODULE_PARM_DESC(max_segment_size, "Maximum size of a segment in bytes");
+
 static unsigned int nr_devices = 1;
 module_param(nr_devices, uint, 0444);
 MODULE_PARM_DESC(nr_devices, "Number of devices to register");
@@ -351,6 +355,7 @@ NULLB_DEVICE_ATTR(home_node, uint, NULL);
 NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
 NULLB_DEVICE_ATTR(blocksize, uint, NULL);
 NULLB_DEVICE_ATTR(max_sectors, uint, NULL);
+NULLB_DEVICE_ATTR(max_segment_size, uint, NULL);
 NULLB_DEVICE_ATTR(irqmode, uint, NULL);
 NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
 NULLB_DEVICE_ATTR(index, uint, NULL);
@@ -470,6 +475,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
 	&nullb_device_attr_queue_mode,
 	&nullb_device_attr_blocksize,
 	&nullb_device_attr_max_sectors,
+	&nullb_device_attr_max_segment_size,
 	&nullb_device_attr_irqmode,
 	&nullb_device_attr_hw_queue_depth,
 	&nullb_device_attr_index,
@@ -541,7 +547,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
 static ssize_t memb_group_features_show(struct config_item *item, char *page)
 {
 	return snprintf(page, PAGE_SIZE,
-			"memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors,virt_boundary\n");
+			"memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors,max_segment_size,virt_boundary\n");
 }
 
 CONFIGFS_ATTR_RO(memb_group_, features);
@@ -597,6 +603,7 @@ static struct nullb_device *null_alloc_dev(void)
 	dev->queue_mode = g_queue_mode;
 	dev->blocksize = g_bs;
 	dev->max_sectors = g_max_sectors;
+	dev->max_segment_size = g_max_segment_size;
 	dev->irqmode = g_irqmode;
 	dev->hw_queue_depth = g_hw_queue_depth;
 	dev->blocking = g_blocking;
@@ -1135,6 +1142,8 @@ static int null_transfer(struct nullb *nullb, struct page *page,
 	unsigned int valid_len = len;
 	int err = 0;
 
+	WARN_ONCE(len > dev->max_segment_size, "%u > %u\n", len,
+		  dev->max_segment_size);
 	if (!is_write) {
 		if (dev->zoned)
 			valid_len = null_zone_valid_read_len(nullb,
@@ -1170,7 +1179,8 @@ static int null_handle_rq(struct nullb_cmd *cmd)
 
 	spin_lock_irq(&nullb->lock);
 	rq_for_each_segment(bvec, rq, iter) {
-		len = bvec.bv_len;
+		len = min(bvec.bv_len, nullb->dev->max_segment_size);
+		bvec.bv_len = len;
 		err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
 				     op_is_write(req_op(rq)), sector,
 				     rq->cmd_flags & REQ_FUA);
@@ -1197,7 +1207,8 @@ static int null_handle_bio(struct nullb_cmd *cmd)
 
 	spin_lock_irq(&nullb->lock);
 	bio_for_each_segment(bvec, bio, iter) {
-		len = bvec.bv_len;
+		len = min(bvec.bv_len, nullb->dev->max_segment_size);
+		bvec.bv_len = len;
 		err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
 				     op_is_write(bio_op(bio)), sector,
 				     bio->bi_opf & REQ_FUA);
@@ -1904,6 +1915,7 @@ static int null_add_dev(struct nullb_device *dev)
 	dev->max_sectors = min_t(unsigned int, dev->max_sectors,
 				 BLK_DEF_MAX_SECTORS);
 	blk_queue_max_hw_sectors(nullb->q, dev->max_sectors);
+	blk_queue_max_segment_size(nullb->q, dev->max_segment_size);
 
 	if (dev->virt_boundary)
 		blk_queue_virt_boundary(nullb->q, PAGE_SIZE - 1);
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 277571c502d9..430b793c1575 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -88,6 +88,7 @@ struct nullb_device {
 	unsigned int queue_mode; /* block interface */
 	unsigned int blocksize; /* block size */
 	unsigned int max_sectors; /* Max sectors per command */
+	unsigned int max_segment_size; /* Max size of a single DMA segment. */
 	unsigned int irqmode; /* IRQ completion handler */
 	unsigned int hw_queue_depth; /* queue depth */
 	unsigned int index; /* index of the disk, only valid with a disk */

From 288abb8b19f74fd2f200b5e65c401edbebbb41cb Mon Sep 17 00:00:00 2001
From: Sajid Dalvi <sdalvi@google.com>
Date: Tue, 20 Feb 2024 13:20:17 -0600
Subject: [PATCH 09/98] ANDROID: PCI: dwc: Wait for the link only if it has
 been started

In dw_pcie_host_init() regardless of whether the link has been
started or not, the code waits for the link to come up. Even in
cases where start_link() is not defined the code ends up spinning
in a loop for 1 second. Since in some systems dw_pcie_host_init()
gets called during probe, this one second loop for each pcie
interface instance ends up extending the boot time.

Wait for the link up in only if the start_link() is defined.

The patch submitted to the upstream kernel (see link below) was not
accepted due to no upstream user.

The change here is a simplified version of that patch, which will wait
for a link only if start_link ops has been defined.

Also, this patch was already applied before in
https://r.android.com/2548250 but the functionality was lost after
https://lore.kernel.org/all/20220624143428.8334-14-Sergey.Semin@baikalelectronics.ru/
was pulled in from the LTS merge.

This patch restores the functionality (of removing the delay) which was
lost during the LTS merge.

Bug: 315052790
Link: https://lore.kernel.org/all/20240112093006.2832105-1-ajayagarwal@google.com/
Change-Id: I4e8d00f6195062728417e41ddd51072880676920
Signed-off-by: Sajid Dalvi <sdalvi@google.com>
---
 drivers/pci/controller/dwc/pcie-designware-host.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
index fe0fd88e95eb..f0967cbb48f2 100644
--- a/drivers/pci/controller/dwc/pcie-designware-host.c
+++ b/drivers/pci/controller/dwc/pcie-designware-host.c
@@ -411,8 +411,10 @@ int dw_pcie_host_init(struct pcie_port *pp)
 		if (ret)
 			goto err_free_msi;
 
-		/* Ignore errors, the link may come up later */
-		dw_pcie_wait_for_link(pci);
+		if (pci->ops && pci->ops->start_link) {
+			/* Ignore errors, the link may come up later */
+			dw_pcie_wait_for_link(pci);
+		}
 	}
 
 	bridge->sysdata = pp;

From 4403e2517ad8b4738b8de62f94f8ef08cb093a8a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 7 Feb 2024 18:49:51 +0100
Subject: [PATCH 10/98] UPSTREAM: netfilter: nft_set_rbtree: skip end interval
 element from gc

commit 60c0c230c6f046da536d3df8b39a20b9a9fd6af0 upstream.

rbtree lazy gc on insert might collect an end interval element that has
been just added in this transactions, skip end interval elements that
are not yet active.

Bug: 325477234
Fixes: f718863aca46 ("netfilter: nft_set_rbtree: fix overlap expiration walk")
Cc: stable@vger.kernel.org
Reported-by: lonial con <kongln9170@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 1296c110c5a0b45a8fcf58e7d18bc5da61a565cb)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: I42f7bca418d47948292b15ace9f371b81ccd7fe8
---
 net/netfilter/nft_set_rbtree.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index e34662f4a71e..5bf5572e945c 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -235,7 +235,7 @@ static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
 
 static const struct nft_rbtree_elem *
 nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
-		   struct nft_rbtree_elem *rbe, u8 genmask)
+		   struct nft_rbtree_elem *rbe)
 {
 	struct nft_set *set = (struct nft_set *)__set;
 	struct rb_node *prev = rb_prev(&rbe->node);
@@ -254,7 +254,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
 	while (prev) {
 		rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
 		if (nft_rbtree_interval_end(rbe_prev) &&
-		    nft_set_elem_active(&rbe_prev->ext, genmask))
+		    nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY))
 			break;
 
 		prev = rb_prev(prev);
@@ -365,7 +365,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
 		    nft_set_elem_active(&rbe->ext, cur_genmask)) {
 			const struct nft_rbtree_elem *removed_end;
 
-			removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask);
+			removed_end = nft_rbtree_gc_elem(set, priv, rbe);
 			if (IS_ERR(removed_end))
 				return PTR_ERR(removed_end);
 

From d96725ec1af6c6e58c981e45669160357967ff4c Mon Sep 17 00:00:00 2001
From: Vilas Bhat <vilasbhat@google.com>
Date: Fri, 23 Feb 2024 12:27:51 -0800
Subject: [PATCH 11/98] BACKPORT: FROMGIT: PM: runtime: add tracepoint for
 runtime_status changes

Existing runtime PM ftrace events (`rpm_suspend`, `rpm_resume`,
`rpm_return_int`) offer limited visibility into the exact timing of device
runtime power state transitions, particularly when asynchronous operations
are involved. When the `rpm_suspend` or `rpm_resume` functions are invoked
with the `RPM_ASYNC` flag, a return value of 0 i.e., success merely
indicates that the device power state request has been queued, not that
the device has yet transitioned.

A new ftrace event, `rpm_status`, is introduced. This event directly logs
the `power.runtime_status` value of a device whenever it changes providing
granular tracking of runtime power state transitions regardless of
synchronous or asynchronous `rpm_suspend` / `rpm_resume` usage.

Signed-off-by: Vilas Bhat <vilasbhat@google.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Bug: 325508361
(cherry picked from commit 015abee404760249a5c968b9ce29216b94b8ced1
 https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
 linux-next)
[vilasbhat: Removed reference to RPM_INVALID from patch as it does not exist
in 5.15]
Change-Id: Iad7cae74c41b23b430331379c180b5e59bc32c40
Signed-off-by: Vilas Bhat <vilasbhat@google.com>
---
 drivers/base/power/runtime.c |  1 +
 include/trace/events/rpm.h   | 41 ++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 5824d41a0b74..c94699018b9d 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -93,6 +93,7 @@ static void update_pm_runtime_accounting(struct device *dev)
 static void __update_runtime_status(struct device *dev, enum rpm_status status)
 {
 	update_pm_runtime_accounting(dev);
+	trace_rpm_status(dev, status);
 	dev->power.runtime_status = status;
 }
 
diff --git a/include/trace/events/rpm.h b/include/trace/events/rpm.h
index 3c716214dab1..c5763327ab6c 100644
--- a/include/trace/events/rpm.h
+++ b/include/trace/events/rpm.h
@@ -101,6 +101,47 @@ TRACE_EVENT(rpm_return_int,
 		__entry->ret)
 );
 
+#define RPM_STATUS_STRINGS \
+	EM(RPM_ACTIVE, "RPM_ACTIVE") \
+	EM(RPM_RESUMING, "RPM_RESUMING") \
+	EM(RPM_SUSPENDED, "RPM_SUSPENDED") \
+	EMe(RPM_SUSPENDING, "RPM_SUSPENDING")
+
+/* Enums require being exported to userspace, for user tool parsing. */
+#undef EM
+#undef EMe
+#define EM(a, b)	TRACE_DEFINE_ENUM(a);
+#define EMe(a, b)	TRACE_DEFINE_ENUM(a);
+
+RPM_STATUS_STRINGS
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings that
+ * will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)	{ a, b },
+#define EMe(a, b)	{ a, b }
+
+TRACE_EVENT(rpm_status,
+	TP_PROTO(struct device *dev, enum rpm_status status),
+	TP_ARGS(dev, status),
+
+	TP_STRUCT__entry(
+		__string(name,	dev_name(dev))
+		__field(int,	status)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, dev_name(dev));
+		__entry->status = status;
+	),
+
+	TP_printk("%s status=%s", __get_str(name),
+		__print_symbolic(__entry->status, RPM_STATUS_STRINGS))
+);
+
 #endif /* _TRACE_RUNTIME_POWER_H */
 
 /* This part must be outside protection */

From f115661832fb68e1b78a3f1be78de0927afe0a88 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@google.com>
Date: Tue, 27 Feb 2024 17:57:16 +0000
Subject: [PATCH 12/98] Revert "interconnect: Teach lockdep about icc_bw_lock
 order"

This reverts commit e3a29b80e9e6df217dd61c670ac42864fa4a0e67 which is
commit 13619170303878e1dae86d9a58b039475c957fcf upstream.

It is reported to cause crashes, so revert it for now.

Bug: 326555421
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I3dd7744a9b706a959cbed3a793be668147dfcb9a
---
 drivers/interconnect/core.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index b7c41bd7409c..1ea0d43ca3ae 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -1136,21 +1136,13 @@ void icc_sync_state(struct device *dev)
 			}
 		}
 	}
-	mutex_unlock(&icc_bw_lock);
 	mutex_unlock(&icc_lock);
 }
 EXPORT_SYMBOL_GPL(icc_sync_state);
 
 static int __init icc_init(void)
 {
-	struct device_node *root;
-
-	/* Teach lockdep about lock ordering wrt. shrinker: */
-	fs_reclaim_acquire(GFP_KERNEL);
-	might_lock(&icc_bw_lock);
-	fs_reclaim_release(GFP_KERNEL);
-
-	root = of_find_node_by_path("/");
+	struct device_node *root = of_find_node_by_path("/");
 
 	providers_count = of_count_icc_providers(root);
 	of_node_put(root);

From b74b4cbe62eda8dc8bd844c0af41dddd3890aa4d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@google.com>
Date: Tue, 27 Feb 2024 17:58:16 +0000
Subject: [PATCH 13/98] Revert "interconnect: Fix locking for runpm vs reclaim"

This reverts commit 9be2957f014d91088db1eb5dd09d9a03d7184dce which is
commit af42269c3523492d71ebbe11fefae2653e9cdc78 upstream.

It is reported to cause crashes, so revert it for now.

Bug: 326555421
Change-Id: I2fb3626c306e0444f4e0eb42a95488e688942ba9
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
---
 drivers/interconnect/core.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 1ea0d43ca3ae..f93678096326 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -30,7 +30,6 @@ static LIST_HEAD(icc_providers);
 static int providers_count;
 static bool synced_state;
 static DEFINE_MUTEX(icc_lock);
-static DEFINE_MUTEX(icc_bw_lock);
 static struct dentry *icc_debugfs_dir;
 
 static void icc_summary_show_one(struct seq_file *s, struct icc_node *n)
@@ -634,7 +633,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw)
 	if (WARN_ON(IS_ERR(path) || !path->num_nodes))
 		return -EINVAL;
 
-	mutex_lock(&icc_bw_lock);
+	mutex_lock(&icc_lock);
 
 	old_avg = path->reqs[0].avg_bw;
 	old_peak = path->reqs[0].peak_bw;
@@ -666,7 +665,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw)
 		apply_constraints(path);
 	}
 
-	mutex_unlock(&icc_bw_lock);
+	mutex_unlock(&icc_lock);
 
 	trace_icc_set_bw_end(path, ret);
 
@@ -969,7 +968,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider)
 		return;
 
 	mutex_lock(&icc_lock);
-	mutex_lock(&icc_bw_lock);
 
 	node->provider = provider;
 	list_add_tail(&node->node_list, &provider->nodes);
@@ -995,7 +993,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider)
 	node->avg_bw = 0;
 	node->peak_bw = 0;
 
-	mutex_unlock(&icc_bw_lock);
 	mutex_unlock(&icc_lock);
 }
 EXPORT_SYMBOL_GPL(icc_node_add);
@@ -1123,7 +1120,6 @@ void icc_sync_state(struct device *dev)
 		return;
 
 	mutex_lock(&icc_lock);
-	mutex_lock(&icc_bw_lock);
 	synced_state = true;
 	list_for_each_entry(p, &icc_providers, provider_list) {
 		dev_dbg(p->dev, "interconnect provider is in synced state\n");

From 9cef46f39ee92235f328de3bc65c1b765a200613 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 29 Feb 2024 22:35:14 +0000
Subject: [PATCH 14/98] ANDROID: remove LTO check from
 build.config.gki.aarch64.fips140

Don't check the "LTO" variable in build.config.gki.aarch64.fips140,
since fips140.ko no longer depends on LTO.  Also, Kleaf doesn't set the
"LTO" variable anyway; it was specific to build.sh.

Bug: 188620248
Change-Id: I213faa4c8c1a23898c08da121b0a5dc602b7218a
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 build.config.gki.aarch64.fips140 | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/build.config.gki.aarch64.fips140 b/build.config.gki.aarch64.fips140
index ec493efc20cf..031695579fc8 100644
--- a/build.config.gki.aarch64.fips140
+++ b/build.config.gki.aarch64.fips140
@@ -10,11 +10,6 @@ MAKE_GOALS="
 modules
 "
 
-if [ "${LTO}" = "none" ]; then
-	echo "The FIPS140 module needs LTO to be enabled."
-	exit 1
-fi
-
 MODULES_ORDER=android/gki_aarch64_fips140_modules
 KERNEL_DIR=common
 

From ee9964b308a6ee11d1cf5ee778dc91030f084489 Mon Sep 17 00:00:00 2001
From: Lokesh Gidra <lokeshgidra@google.com>
Date: Tue, 16 Jan 2024 20:20:25 +0000
Subject: [PATCH 15/98] ANDROID: userfaultfd: allow SPF for UFFD_FEATURE_SIGBUS
 on private+anon

Currently we bail out of speculative page fault when we detect that the
fault address is in a userfaultfd registered vma. However, if userfaultfd
is being used with UFFD_FEATURE_SIGBUS feature, then handle_userfault()
doesn't do much and is easiest to handle with SPF. This patch lets
MISSING userfaultfs on private anonymous mappings be allowed with SPF if
UFFD_FEATURE_SIGBUS is used.

With this patch we get >99% success rate for userfaults caused during
userfaultfd GC's compaction phase. This translates into eliminating
uninterruptible sleep time in do_page_fault() due to userfaults.

ABI breakage note: 'userfaultfd_ctx' struct, which has been modified in
this CL, is private and hence cannot cause real breakage.

Bug: 324640390
Bug: 320478828
Signed-off-by: Lokesh Gidra <lokeshgidra@google.com>
Change-Id: Ic7fde0fde03602b35179bc0cf891ddbbc434190f
---
 fs/userfaultfd.c              | 96 ++++++++++++++++++++++++++---------
 include/linux/mm_types.h      |  2 +-
 include/linux/userfaultfd_k.h | 12 ++++-
 mm/memory.c                   | 21 +++++++-
 mm/userfaultfd.c              |  2 +-
 5 files changed, 104 insertions(+), 29 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 232861e8aad6..b4c24753ec19 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -71,6 +71,7 @@ struct userfaultfd_ctx {
 	atomic_t mmap_changing;
 	/* mm with one ore more vmas attached to this userfaultfd_ctx */
 	struct mm_struct *mm;
+	struct rcu_head rcu_head;
 };
 
 struct userfaultfd_fork_ctx {
@@ -156,6 +157,13 @@ static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
 	refcount_inc(&ctx->refcount);
 }
 
+static void __free_userfaultfd_ctx(struct rcu_head *head)
+{
+	struct userfaultfd_ctx *ctx = container_of(head, struct userfaultfd_ctx,
+						   rcu_head);
+	kmem_cache_free(userfaultfd_ctx_cachep, ctx);
+}
+
 /**
  * userfaultfd_ctx_put - Releases a reference to the internal userfaultfd
  * context.
@@ -176,7 +184,7 @@ static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx)
 		VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock));
 		VM_BUG_ON(waitqueue_active(&ctx->fd_wqh));
 		mmdrop(ctx->mm);
-		kmem_cache_free(userfaultfd_ctx_cachep, ctx);
+		call_rcu(&ctx->rcu_head, __free_userfaultfd_ctx);
 	}
 }
 
@@ -350,6 +358,24 @@ static inline unsigned int userfaultfd_get_blocking_state(unsigned int flags)
 	return TASK_UNINTERRUPTIBLE;
 }
 
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+bool userfaultfd_using_sigbus(struct vm_area_struct *vma)
+{
+	struct userfaultfd_ctx *ctx;
+	bool ret;
+
+	/*
+	 * Do it inside RCU section to ensure that the ctx doesn't
+	 * disappear under us.
+	 */
+	rcu_read_lock();
+	ctx = rcu_dereference(vma->vm_userfaultfd_ctx.ctx);
+	ret = ctx && (ctx->features & UFFD_FEATURE_SIGBUS);
+	rcu_read_unlock();
+	return ret;
+}
+#endif
+
 /*
  * The locking rules involved in returning VM_FAULT_RETRY depending on
  * FAULT_FLAG_ALLOW_RETRY, FAULT_FLAG_RETRY_NOWAIT and
@@ -394,7 +420,8 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	 */
 	mmap_assert_locked(mm);
 
-	ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
+	ctx = rcu_dereference_protected(vmf->vma->vm_userfaultfd_ctx.ctx,
+					lockdep_is_held(&mm->mmap_lock));
 	if (!ctx)
 		goto out;
 
@@ -611,8 +638,10 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 		/* the various vma->vm_userfaultfd_ctx still points to it */
 		mmap_write_lock(mm);
 		for (vma = mm->mmap; vma; vma = vma->vm_next)
-			if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
-				vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+			if (rcu_access_pointer(vma->vm_userfaultfd_ctx.ctx) ==
+			    release_new_ctx) {
+				rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx,
+						   NULL);
 				vma->vm_flags &= ~__VM_UFFD_FLAGS;
 			}
 		mmap_write_unlock(mm);
@@ -643,9 +672,12 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
 	struct userfaultfd_ctx *ctx = NULL, *octx;
 	struct userfaultfd_fork_ctx *fctx;
 
-	octx = vma->vm_userfaultfd_ctx.ctx;
+	octx = rcu_dereference_protected(
+			vma->vm_userfaultfd_ctx.ctx,
+			lockdep_is_held(&vma->vm_mm->mmap_lock));
+
 	if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
-		vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+		rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, NULL);
 		vma->vm_flags &= ~__VM_UFFD_FLAGS;
 		return 0;
 	}
@@ -682,7 +714,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
 		list_add_tail(&fctx->list, fcs);
 	}
 
-	vma->vm_userfaultfd_ctx.ctx = ctx;
+	rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, ctx);
 	return 0;
 }
 
@@ -715,7 +747,8 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
 {
 	struct userfaultfd_ctx *ctx;
 
-	ctx = vma->vm_userfaultfd_ctx.ctx;
+	ctx = rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
+					lockdep_is_held(&vma->vm_mm->mmap_lock));
 
 	if (!ctx)
 		return;
@@ -726,7 +759,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
 		atomic_inc(&ctx->mmap_changing);
 	} else {
 		/* Drop uffd context if remap feature not enabled */
-		vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+		rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, NULL);
 		vma->vm_flags &= ~__VM_UFFD_FLAGS;
 	}
 }
@@ -763,7 +796,8 @@ bool userfaultfd_remove(struct vm_area_struct *vma,
 	struct userfaultfd_ctx *ctx;
 	struct userfaultfd_wait_queue ewq;
 
-	ctx = vma->vm_userfaultfd_ctx.ctx;
+	ctx = rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
+					lockdep_is_held(&mm->mmap_lock));
 	if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
 		return true;
 
@@ -801,7 +835,9 @@ int userfaultfd_unmap_prep(struct vm_area_struct *vma,
 {
 	for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
 		struct userfaultfd_unmap_ctx *unmap_ctx;
-		struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
+		struct userfaultfd_ctx *ctx =
+			rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
+						  lockdep_is_held(&vma->vm_mm->mmap_lock));
 
 		if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
 		    has_unmap_ctx(ctx, unmaps, start, end))
@@ -866,10 +902,13 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 	mmap_write_lock(mm);
 	prev = NULL;
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		struct userfaultfd_ctx *cur_uffd_ctx =
+				rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
+							  lockdep_is_held(&mm->mmap_lock));
 		cond_resched();
-		BUG_ON(!!vma->vm_userfaultfd_ctx.ctx ^
+		BUG_ON(!!cur_uffd_ctx ^
 		       !!(vma->vm_flags & __VM_UFFD_FLAGS));
-		if (vma->vm_userfaultfd_ctx.ctx != ctx) {
+		if (cur_uffd_ctx != ctx) {
 			prev = vma;
 			continue;
 		}
@@ -884,7 +923,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 		else
 			prev = vma;
 		vma->vm_flags = new_flags;
-		vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+		rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, NULL);
 	}
 	mmap_write_unlock(mm);
 	mmput(mm);
@@ -1350,9 +1389,12 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 	found = false;
 	basic_ioctls = false;
 	for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
+		struct userfaultfd_ctx *cur_uffd_ctx =
+				rcu_dereference_protected(cur->vm_userfaultfd_ctx.ctx,
+							  lockdep_is_held(&mm->mmap_lock));
 		cond_resched();
 
-		BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
+		BUG_ON(!!cur_uffd_ctx ^
 		       !!(cur->vm_flags & __VM_UFFD_FLAGS));
 
 		/* check not compatible vmas */
@@ -1395,8 +1437,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 		 * wouldn't know which one to deliver the userfaults to.
 		 */
 		ret = -EBUSY;
-		if (cur->vm_userfaultfd_ctx.ctx &&
-		    cur->vm_userfaultfd_ctx.ctx != ctx)
+		if (cur_uffd_ctx && cur_uffd_ctx != ctx)
 			goto out_unlock;
 
 		/*
@@ -1414,18 +1455,20 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 
 	ret = 0;
 	do {
+		struct userfaultfd_ctx *cur_uffd_ctx =
+				rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
+							  lockdep_is_held(&mm->mmap_lock));
 		cond_resched();
 
 		BUG_ON(!vma_can_userfault(vma, vm_flags));
-		BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
-		       vma->vm_userfaultfd_ctx.ctx != ctx);
+		BUG_ON(cur_uffd_ctx && cur_uffd_ctx != ctx);
 		WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
 
 		/*
 		 * Nothing to do: this vma is already registered into this
 		 * userfaultfd and with the right tracking mode too.
 		 */
-		if (vma->vm_userfaultfd_ctx.ctx == ctx &&
+		if (cur_uffd_ctx == ctx &&
 		    (vma->vm_flags & vm_flags) == vm_flags)
 			goto skip;
 
@@ -1460,7 +1503,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 		 * the current one has not been updated yet.
 		 */
 		vma->vm_flags = new_flags;
-		vma->vm_userfaultfd_ctx.ctx = ctx;
+		rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, ctx);
 
 		if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma))
 			hugetlb_unshare_all_pmds(vma);
@@ -1559,7 +1602,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 	for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
 		cond_resched();
 
-		BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
+		BUG_ON(!!rcu_access_pointer(cur->vm_userfaultfd_ctx.ctx) ^
 		       !!(cur->vm_flags & __VM_UFFD_FLAGS));
 
 		/*
@@ -1581,6 +1624,9 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 
 	ret = 0;
 	do {
+		struct userfaultfd_ctx *cur_uffd_ctx =
+				rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx,
+							  lockdep_is_held(&mm->mmap_lock));
 		cond_resched();
 
 		BUG_ON(!vma_can_userfault(vma, vma->vm_flags));
@@ -1589,7 +1635,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 		 * Nothing to do: this vma is already registered into this
 		 * userfaultfd and with the right tracking mode too.
 		 */
-		if (!vma->vm_userfaultfd_ctx.ctx)
+		if (!cur_uffd_ctx)
 			goto skip;
 
 		WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
@@ -1608,7 +1654,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 			struct userfaultfd_wake_range range;
 			range.start = start;
 			range.len = vma_end - start;
-			wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range);
+			wake_userfault(cur_uffd_ctx, &range);
 		}
 
 		new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
@@ -1637,7 +1683,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 		 * the current one has not been updated yet.
 		 */
 		vma->vm_flags = new_flags;
-		vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+		rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, NULL);
 
 	skip:
 		prev = vma;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 42786e6364ef..076bb5eb99f2 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -311,7 +311,7 @@ struct vm_region {
 #ifdef CONFIG_USERFAULTFD
 #define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) { NULL, })
 struct vm_userfaultfd_ctx {
-	struct userfaultfd_ctx *ctx;
+	struct userfaultfd_ctx __rcu *ctx;
 };
 #else /* CONFIG_USERFAULTFD */
 #define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) {})
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 33cea484d1ad..8ea2827a4eba 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -36,6 +36,9 @@
 extern int sysctl_unprivileged_userfaultfd;
 
 extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+extern bool userfaultfd_using_sigbus(struct vm_area_struct *vma);
+#endif
 
 /*
  * The mode of operation for __mcopy_atomic and its helpers.
@@ -75,7 +78,7 @@ extern int mwriteprotect_range(struct mm_struct *dst_mm,
 static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
 					struct vm_userfaultfd_ctx vm_ctx)
 {
-	return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx;
+	return rcu_access_pointer(vma->vm_userfaultfd_ctx.ctx) == vm_ctx.ctx;
 }
 
 /*
@@ -154,6 +157,13 @@ static inline vm_fault_t handle_userfault(struct vm_fault *vmf,
 	return VM_FAULT_SIGBUS;
 }
 
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+static inline bool userfaultfd_using_sigbus(struct vm_area_struct *vma)
+{
+	return false;
+}
+#endif
+
 static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
 					struct vm_userfaultfd_ctx vm_ctx)
 {
diff --git a/mm/memory.c b/mm/memory.c
index 54e45571139e..8b10ef4c5d9d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4939,6 +4939,17 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 		pgd_t pgdval;
 		p4d_t p4dval;
 		pud_t pudval;
+		bool uffd_missing_sigbus = false;
+
+#ifdef CONFIG_USERFAULTFD
+		/*
+		 * Only support SPF for SIGBUS+MISSING userfaults in private
+		 * anonymous VMAs.
+		 */
+		uffd_missing_sigbus = vma_is_anonymous(vma) &&
+					(vma->vm_flags & VM_UFFD_MISSING) &&
+					userfaultfd_using_sigbus(vma);
+#endif
 
 		vmf.seq = seq;
 
@@ -5018,11 +5029,19 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 
 		speculative_page_walk_end();
 
+		if (!vmf.pte && uffd_missing_sigbus)
+			return VM_FAULT_SIGBUS;
+
 		return handle_pte_fault(&vmf);
 
 	spf_fail:
 		speculative_page_walk_end();
-		return VM_FAULT_RETRY;
+		/*
+		 * Failing page-table walk is similar to page-missing so give an
+		 * opportunity to SIGBUS+MISSING userfault to handle it before
+		 * retrying with mmap_lock
+		 */
+		return uffd_missing_sigbus ? VM_FAULT_SIGBUS : VM_FAULT_RETRY;
 	}
 #endif	/* CONFIG_SPECULATIVE_PAGE_FAULT */
 
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index caa13abe0c56..d4175821dd29 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -42,7 +42,7 @@ struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
 	 * enforce the VM_MAYWRITE check done at uffd registration
 	 * time.
 	 */
-	if (!dst_vma->vm_userfaultfd_ctx.ctx)
+	if (!rcu_access_pointer(dst_vma->vm_userfaultfd_ctx.ctx))
 		return NULL;
 
 	return dst_vma;

From 0d0784d6b2c1c583844613c04b8baa8e82f29c9b Mon Sep 17 00:00:00 2001
From: Lokesh Gidra <lokeshgidra@google.com>
Date: Fri, 9 Feb 2024 23:16:00 +0000
Subject: [PATCH 16/98] ANDROID: Update ABI for userfaultfd_ctx

The struct is not public so shouldn't cause real ABI breakage.

Bug: 320478828
Bug: 324640390
Change-Id: I724ca4c00bae09bc311d6495383cfd3a77592d7a
Signed-off-by: Lokesh Gidra <lokeshgidra@google.com>
---
 android/abi_gki_aarch64.stg | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg
index 1ad3e32c0402..7b680d9ccfa3 100644
--- a/android/abi_gki_aarch64.stg
+++ b/android/abi_gki_aarch64.stg
@@ -52829,6 +52829,12 @@ member {
   type_id: 0x18bd6530
   offset: 512
 }
+member {
+  id: 0x56a72143
+  name: "callback_head"
+  type_id: 0xe3222f5b
+  offset: 1024
+}
 member {
   id: 0x56a7224a
   name: "callback_head"
@@ -241566,7 +241572,7 @@ struct_union {
   kind: STRUCT
   name: "userfaultfd_ctx"
   definition {
-    bytesize: 128
+    bytesize: 144
     member_id: 0x3162bad8
     member_id: 0x05e73814
     member_id: 0xc409a485
@@ -241578,6 +241584,7 @@ struct_union {
     member_id: 0x79d263fe
     member_id: 0xdd180b6b
     member_id: 0x3025dd18
+    member_id: 0x56a72143
   }
 }
 struct_union {

From 2390d5886287ebbf12d7735d4e5ef2fc4a6f2126 Mon Sep 17 00:00:00 2001
From: Ben Fennema <fennema@google.com>
Date: Tue, 5 Mar 2024 16:18:31 -0800
Subject: [PATCH 17/98] ANDROID: GKI: Update the ABI symbol list

Update the pixel_watch symbol list.

3 function symbol(s) added
  'void gic_resume()'
  'ssize_t mipi_dsi_dcs_write(struct mipi_dsi_device*, u8, const void*, size_t)'
  'int snd_soc_get_dai_name(const struct of_phandle_args*, const char**)'

Bug: 327650099
Change-Id: I7a7efaa91f1a37f44d3e950af4ec9947fb349acc
Signed-off-by: Ben Fennema <fennema@google.com>
---
 android/abi_gki_aarch64.stg         | 44 +++++++++++++++++++++++++++++
 android/abi_gki_aarch64_pixel_watch | 16 +++++++++++
 2 files changed, 60 insertions(+)

diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg
index 7b680d9ccfa3..68e2b49e33af 100644
--- a/android/abi_gki_aarch64.stg
+++ b/android/abi_gki_aarch64.stg
@@ -265963,6 +265963,14 @@ function {
   parameter_id: 0x11cffa09
   parameter_id: 0x4585663f
 }
+function {
+  id: 0x16019d86
+  return_type_id: 0xd5cc9c9a
+  parameter_id: 0x09626b7f
+  parameter_id: 0x295c7202
+  parameter_id: 0x391f15ea
+  parameter_id: 0xf435685e
+}
 function {
   id: 0x1603f977
   return_type_id: 0x48b5725f
@@ -280952,6 +280960,12 @@ function {
   parameter_id: 0x391f15ea
   parameter_id: 0xf435685e
 }
+function {
+  id: 0x9294d8c1
+  return_type_id: 0x6720d32f
+  parameter_id: 0x3c01aef6
+  parameter_id: 0x051414e1
+}
 function {
   id: 0x929694e1
   return_type_id: 0x6720d32f
@@ -329598,6 +329612,15 @@ elf_symbol {
   type_id: 0x8e47c273
   full_name: "gic_nonsecure_priorities"
 }
+elf_symbol {
+  id: 0x5ddec58b
+  name: "gic_resume"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x14da596c
+  type_id: 0x10985193
+  full_name: "gic_resume"
+}
 elf_symbol {
   id: 0x596931c4
   name: "gic_v3_cpu_init"
@@ -336689,6 +336712,15 @@ elf_symbol {
   type_id: 0x9fd879f0
   full_name: "mipi_dsi_dcs_set_tear_off"
 }
+elf_symbol {
+  id: 0x0d70d901
+  name: "mipi_dsi_dcs_write"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0xb1d4f105
+  type_id: 0x16019d86
+  full_name: "mipi_dsi_dcs_write"
+}
 elf_symbol {
   id: 0x9d1a3913
   name: "mipi_dsi_dcs_write_buffer"
@@ -351577,6 +351609,15 @@ elf_symbol {
   type_id: 0x6eef99a2
   full_name: "snd_soc_find_dai_with_mutex"
 }
+elf_symbol {
+  id: 0x4086fab0
+  name: "snd_soc_get_dai_name"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x0082f713
+  type_id: 0x9294d8c1
+  full_name: "snd_soc_get_dai_name"
+}
 elf_symbol {
   id: 0x33a917a0
   name: "snd_soc_get_enum_double"
@@ -365140,6 +365181,7 @@ interface {
   symbol_id: 0xe87161bc
   symbol_id: 0x112db471
   symbol_id: 0x5a582da8
+  symbol_id: 0x5ddec58b
   symbol_id: 0x596931c4
   symbol_id: 0x390427e4
   symbol_id: 0x53ba530f
@@ -365926,6 +365968,7 @@ interface {
   symbol_id: 0x6ee51cd8
   symbol_id: 0xf096de3c
   symbol_id: 0x4512217f
+  symbol_id: 0x0d70d901
   symbol_id: 0x9d1a3913
   symbol_id: 0xfdf03f19
   symbol_id: 0xb0f04be5
@@ -367579,6 +367622,7 @@ interface {
   symbol_id: 0x687a68d3
   symbol_id: 0x97843792
   symbol_id: 0xc88e38bb
+  symbol_id: 0x4086fab0
   symbol_id: 0x33a917a0
   symbol_id: 0x4c3ba1e9
   symbol_id: 0x05ad614a
diff --git a/android/abi_gki_aarch64_pixel_watch b/android/abi_gki_aarch64_pixel_watch
index 8455f206c1c5..f60a3092e140 100644
--- a/android/abi_gki_aarch64_pixel_watch
+++ b/android/abi_gki_aarch64_pixel_watch
@@ -844,6 +844,10 @@
   get_user_pages
   get_zeroed_page
   gic_nonsecure_priorities
+  gic_resume
+  gic_v3_cpu_init
+  gic_v3_dist_init
+  gic_v3_dist_wait_for_rwp
   gov_attr_set_init
   gov_attr_set_put
   governor_sysfs_ops
@@ -1220,6 +1224,7 @@
   mipi_dsi_create_packet
   mipi_dsi_dcs_set_display_brightness
   mipi_dsi_dcs_set_tear_off
+  mipi_dsi_dcs_write
   mipi_dsi_host_register
   mipi_dsi_host_unregister
   misc_deregister
@@ -1333,6 +1338,7 @@
   ns_capable
   nsecs_to_jiffies
   ns_to_timespec64
+  __num_online_cpus
   nvmem_cell_get
   nvmem_cell_put
   nvmem_cell_read
@@ -1734,6 +1740,7 @@
   rpmsg_get_signals
   rpmsg_poll
   rpmsg_register_device
+  rpmsg_register_device_override
   rpmsg_rx_done
   rpmsg_send
   rpmsg_set_signals
@@ -1939,6 +1946,7 @@
   snd_soc_dapm_put_enum_double
   snd_soc_dapm_put_volsw
   snd_soc_dapm_sync
+  snd_soc_get_dai_name
   snd_soc_get_enum_double
   snd_soc_get_pcm_runtime
   snd_soc_get_volsw
@@ -2167,12 +2175,16 @@
   __traceiter_android_vh_binder_wakeup_ilocked
   __traceiter_android_vh_cpu_idle_enter
   __traceiter_android_vh_cpu_idle_exit
+  __traceiter_android_vh_cpuidle_psci_enter
+  __traceiter_android_vh_cpuidle_psci_exit
   __traceiter_android_vh_disable_thermal_cooling_stats
   __traceiter_android_vh_ftrace_dump_buffer
   __traceiter_android_vh_ftrace_format_check
   __traceiter_android_vh_ftrace_oops_enter
   __traceiter_android_vh_ftrace_oops_exit
   __traceiter_android_vh_ftrace_size_check
+  __traceiter_android_vh_gic_resume
+  __traceiter_android_vh_gic_v3_suspend
   __traceiter_android_vh_ipi_stop
   __traceiter_android_vh_jiffies_update
   __traceiter_android_vh_mmc_sdio_pm_flag_set
@@ -2242,12 +2254,16 @@
   __tracepoint_android_vh_binder_wakeup_ilocked
   __tracepoint_android_vh_cpu_idle_enter
   __tracepoint_android_vh_cpu_idle_exit
+  __tracepoint_android_vh_cpuidle_psci_enter
+  __tracepoint_android_vh_cpuidle_psci_exit
   __tracepoint_android_vh_disable_thermal_cooling_stats
   __tracepoint_android_vh_ftrace_dump_buffer
   __tracepoint_android_vh_ftrace_format_check
   __tracepoint_android_vh_ftrace_oops_enter
   __tracepoint_android_vh_ftrace_oops_exit
   __tracepoint_android_vh_ftrace_size_check
+  __tracepoint_android_vh_gic_resume
+  __tracepoint_android_vh_gic_v3_suspend
   __tracepoint_android_vh_ipi_stop
   __tracepoint_android_vh_jiffies_update
   __tracepoint_android_vh_mmc_sdio_pm_flag_set

From 282bfc6c30c060ffe4cf34052aa90d43d6bc07ae Mon Sep 17 00:00:00 2001
From: RD Babiera <rdbabiera@google.com>
Date: Tue, 27 Feb 2024 21:50:35 +0000
Subject: [PATCH 18/98] UPSTREAM: Revert "usb: typec: class: fix
 typec_altmode_put_partner to put plugs"

This reverts commit b17b7fe6dd5c6ff74b38b0758ca799cdbb79e26e.

That commit messed up the reference counting, so it needs to
be rethought.

Fixes: b17b7fe6dd5c ("usb: typec: class: fix typec_altmode_put_partner to put plugs")
Cc: stable@vger.kernel.org
Cc: RD Babiera <rdbabiera@google.com>
Reported-by: Chris Bainbridge <chris.bainbridge@gmail.com>
Closes: https://lore.kernel.org/lkml/CAP-bSRb3SXpgo_BEdqZB-p1K5625fMegRZ17ZkPE1J8ZYgEHDg@mail.gmail.com/
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>

Bug: 324496488
(cherry picked from commit 9c6b789e954fae73c548f39332bcc56bdf0d4373)
Signed-off-by: RD Babiera <rdbabiera@google.com>
(cherry picked from https://android-review.googlesource.com/q/commit:27167a6e39a6894d905ea97aece1aa9f0120f452)
Merged-In: I2755a5e44dd1970d60e5d996dd7fc6d88f79684a
Change-Id: I2755a5e44dd1970d60e5d996dd7fc6d88f79684a
---
 drivers/usb/typec/class.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index ea86195c75b8..339752fef65e 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -265,7 +265,7 @@ static void typec_altmode_put_partner(struct altmode *altmode)
 	if (!partner)
 		return;
 
-	adev = &altmode->adev;
+	adev = &partner->adev;
 
 	if (is_typec_plug(adev->dev.parent)) {
 		struct typec_plug *plug = to_typec_plug(adev->dev.parent);
@@ -495,8 +495,7 @@ static void typec_altmode_release(struct device *dev)
 {
 	struct altmode *alt = to_altmode(to_typec_altmode(dev));
 
-	if (!is_typec_port(dev->parent))
-		typec_altmode_put_partner(alt);
+	typec_altmode_put_partner(alt);
 
 	altmode_id_remove(alt->adev.dev.parent, alt->id);
 	kfree(alt);

From c3b70e94f10cbac844ae01538d785d971476ef1a Mon Sep 17 00:00:00 2001
From: RD Babiera <rdbabiera@google.com>
Date: Tue, 27 Feb 2024 21:59:34 +0000
Subject: [PATCH 19/98] UPSTREAM: usb: typec: class: fix
 typec_altmode_put_partner to put plugs

usb: typec: class: fix typec_altmode_put_partner to put plugs
When typec_altmode_put_partner is called by a plug altmode upon release,
the port altmode the plug belongs to will not remove its reference to the
plug. The check to see if the altmode being released is a plug evaluates
against the released altmode's partner instead of the calling altmode, so
change adev in typec_altmode_put_partner to properly refer to the altmode
being released.

Because typec_altmode_set_partner calls get_device() on the port altmode,
add partner_adev that points to the port altmode in typec_put_partner to
call put_device() on. typec_altmode_set_partner is not called for port
altmodes, so add a check in typec_altmode_release to prevent
typec_altmode_put_partner() calls on port altmode release.

Fixes: 8a37d87d72f0 ("usb: typec: Bus type for alternate modes")
Cc:  <stable@vger.kernel.org>
Co-developed-by: Christian A. Ehrhardt <lk@c--e.de>
Signed-off-by: Christian A. Ehrhardt <lk@c--e.de>
Signed-off-by: RD Babiera <rdbabiera@google.com>
Tested-by: Christian A. Ehrhardt <lk@c--e.de>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20240103181754.2492492-2-rdbabiera@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Bug: 324496488
(cherry picked from commit 5962ded777d689cd8bf04454273e32228d7fb71f)
Signed-off-by: RD Babiera <rdbabiera@google.com>
(cherry picked from https://android-review.googlesource.com/q/commit:fe6e5059f610845f3373a734d228148a660ca44e)
Merged-In: I688a333753b2e8069c0437a6d9dea30bf029c92c
Change-Id: I688a333753b2e8069c0437a6d9dea30bf029c92c
---
 drivers/usb/typec/class.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 339752fef65e..173d86d120da 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -261,11 +261,13 @@ static void typec_altmode_put_partner(struct altmode *altmode)
 {
 	struct altmode *partner = altmode->partner;
 	struct typec_altmode *adev;
+	struct typec_altmode *partner_adev;
 
 	if (!partner)
 		return;
 
-	adev = &partner->adev;
+	adev = &altmode->adev;
+	partner_adev = &partner->adev;
 
 	if (is_typec_plug(adev->dev.parent)) {
 		struct typec_plug *plug = to_typec_plug(adev->dev.parent);
@@ -274,7 +276,7 @@ static void typec_altmode_put_partner(struct altmode *altmode)
 	} else {
 		partner->partner = NULL;
 	}
-	put_device(&adev->dev);
+	put_device(&partner_adev->dev);
 }
 
 /**
@@ -495,7 +497,8 @@ static void typec_altmode_release(struct device *dev)
 {
 	struct altmode *alt = to_altmode(to_typec_altmode(dev));
 
-	typec_altmode_put_partner(alt);
+	if (!is_typec_port(dev->parent))
+		typec_altmode_put_partner(alt);
 
 	altmode_id_remove(alt->adev.dev.parent, alt->id);
 	kfree(alt);

From 5aed5c34359c94f9dd1d5c815485cf8bdbb8a645 Mon Sep 17 00:00:00 2001
From: lipeifeng <lipeifeng@oppo.com>
Date: Tue, 20 Feb 2024 19:01:27 +0800
Subject: [PATCH 20/98] ANDROID: uid_sys_stat: fix data-error of cputime and io

'commit b6115e14010 ("ANDROID: uid_sys_stat: split the global
lock uid_lock to the fine-grained locks for each hlist in hash_table.")'

The above patch split the global lock to per-uid lock to reduce lock
competition. But result in data-error from uid_cputime_show and uid_io_show in
some cases.

E.g, if thread1 and thread2 read /proc/uid_cputime/show_uid_stat at the same time,
thread2 maybe operate in partA and zero active_stime and active_utime of uid_entry
when thread1 is between partB and partC, which would cause thread1 show the error data.
static int uid_cputime_show(struct seq_file *m, void *v)
{
	...
	/*partA*/
	for (bkt = 0, uid_entry = NULL; uid_entry == NULL &&
		bkt < HASH_SIZE(hash_table); bkt++) {
		lock_uid_by_bkt(bkt);
		hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) {
			uid_entry->active_stime = 0;
			uid_entry->active_utime = 0;
		}
		unlock_uid_by_bkt(bkt);
	}

	rcu_read_lock();
	/* partB */
	do_each_thread(temp, task) {
		...
		lock_uid(uid);
		if (!(task->flags & PF_EXITING)) {
			task_cputime_adjusted(task, &utime, &stime);
			uid_entry->active_utime += utime;
			uid_entry->active_stime += stime;
		}
		unlock_uid(uid);
	} while_each_thread(temp, task);
	rcu_read_unlock();

	for (bkt = 0, uid_entry = NULL; uid_entry == NULL &&
		bkt < HASH_SIZE(hash_table); bkt++) {
		lock_uid_by_bkt(bkt);
		hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) {
			u64 total_utime = uid_entry->utime +
						uid_entry->active_utime;
			u64 total_stime = uid_entry->stime +
						uid_entry->active_stime;
			/* partC */
			seq_printf(m, "%d: %llu %llu\n", uid_entry->uid,
				ktime_to_us(total_utime), ktime_to_us(total_stime));
		}
		unlock_uid_by_bkt(bkt);
	}

The patch ensures that the calculation and seq_printf of each uid_entry is within
the uid_lock range, in order to accurate data.

Bug: 278138377

Change-Id: Iaa2ccd95c4b4b333f04b2ba18d7699d94017394e
Signed-off-by: lipeifeng <lipeifeng@oppo.com>
(cherry picked from commit ea35d2bd073214e84be242287a2e91741c6588ed)
---
 drivers/misc/uid_sys_stats.c | 216 ++++++++++++-----------------------
 1 file changed, 72 insertions(+), 144 deletions(-)

diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c
index ff6bc1d6fc45..a78be7fb05ff 100644
--- a/drivers/misc/uid_sys_stats.c
+++ b/drivers/misc/uid_sys_stats.c
@@ -51,12 +51,9 @@ struct io_stats {
 
 #define UID_STATE_FOREGROUND	0
 #define UID_STATE_BACKGROUND	1
-#define UID_STATE_BUCKET_SIZE	2
-
-#define UID_STATE_TOTAL_CURR	2
-#define UID_STATE_TOTAL_LAST	3
-#define UID_STATE_DEAD_TASKS	4
-#define UID_STATE_SIZE		5
+#define UID_STATE_TOTAL_LAST	2
+#define UID_STATE_DEAD_TASKS	3
+#define UID_STATE_SIZE		4
 
 #define MAX_TASK_COMM_LEN 256
 
@@ -71,8 +68,6 @@ struct uid_entry {
 	uid_t uid;
 	u64 utime;
 	u64 stime;
-	u64 active_utime;
-	u64 active_stime;
 	int state;
 	struct io_stats io[UID_STATE_SIZE];
 	struct hlist_node hash;
@@ -173,58 +168,47 @@ static struct uid_entry *find_or_register_uid(uid_t uid)
 	return uid_entry;
 }
 
+static void calc_uid_cputime(struct uid_entry *uid_entry,
+			u64 *total_utime, u64 *total_stime)
+{
+	struct user_namespace *user_ns = current_user_ns();
+	struct task_struct *p, *t;
+	u64 utime, stime;
+	uid_t uid;
+
+	rcu_read_lock();
+	for_each_process(p) {
+		uid = from_kuid_munged(user_ns, task_uid(p));
+
+		if (uid != uid_entry->uid)
+			continue;
+
+		for_each_thread(p, t) {
+			/* avoid double accounting of dying threads */
+			if (!(t->flags & PF_EXITING)) {
+				task_cputime_adjusted(t, &utime, &stime);
+				*total_utime += utime;
+				*total_stime += stime;
+			}
+		}
+	}
+	rcu_read_unlock();
+}
+
 static int uid_cputime_show(struct seq_file *m, void *v)
 {
 	struct uid_entry *uid_entry = NULL;
-	struct task_struct *task, *temp;
-	struct user_namespace *user_ns = current_user_ns();
-	u64 utime;
-	u64 stime;
 	u32 bkt;
-	uid_t uid;
 
 	for (bkt = 0, uid_entry = NULL; uid_entry == NULL &&
 		bkt < HASH_SIZE(hash_table); bkt++) {
+
 		lock_uid_by_bkt(bkt);
 		hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) {
-			uid_entry->active_stime = 0;
-			uid_entry->active_utime = 0;
-		}
-		unlock_uid_by_bkt(bkt);
-	}
+			u64 total_utime = uid_entry->utime;
+			u64 total_stime = uid_entry->stime;
 
-	rcu_read_lock();
-	do_each_thread(temp, task) {
-		uid = from_kuid_munged(user_ns, task_uid(task));
-		lock_uid(uid);
-
-		if (!uid_entry || uid_entry->uid != uid)
-			uid_entry = find_or_register_uid(uid);
-		if (!uid_entry) {
-			rcu_read_unlock();
-			unlock_uid(uid);
-			pr_err("%s: failed to find the uid_entry for uid %d\n",
-				__func__, uid);
-			return -ENOMEM;
-		}
-		/* avoid double accounting of dying threads */
-		if (!(task->flags & PF_EXITING)) {
-			task_cputime_adjusted(task, &utime, &stime);
-			uid_entry->active_utime += utime;
-			uid_entry->active_stime += stime;
-		}
-		unlock_uid(uid);
-	} while_each_thread(temp, task);
-	rcu_read_unlock();
-
-	for (bkt = 0, uid_entry = NULL; uid_entry == NULL &&
-		bkt < HASH_SIZE(hash_table); bkt++) {
-		lock_uid_by_bkt(bkt);
-		hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) {
-			u64 total_utime = uid_entry->utime +
-						uid_entry->active_utime;
-			u64 total_stime = uid_entry->stime +
-						uid_entry->active_stime;
+			calc_uid_cputime(uid_entry, &total_utime, &total_stime);
 			seq_printf(m, "%d: %llu %llu\n", uid_entry->uid,
 				ktime_to_us(total_utime), ktime_to_us(total_stime));
 		}
@@ -323,86 +307,52 @@ static void add_uid_io_stats(struct uid_entry *uid_entry,
 	__add_uid_io_stats(uid_entry, &task->ioac, slot);
 }
 
-static void update_io_stats_all(void)
+static void update_io_stats_uid(struct uid_entry *uid_entry)
 {
-	struct uid_entry *uid_entry = NULL;
-	struct task_struct *task, *temp;
 	struct user_namespace *user_ns = current_user_ns();
+	struct task_struct *p, *t;
+	struct io_stats io;
+
+	memset(&io, 0, sizeof(struct io_stats));
+
+	rcu_read_lock();
+	for_each_process(p) {
+		uid_t uid = from_kuid_munged(user_ns, task_uid(p));
+
+		if (uid != uid_entry->uid)
+			continue;
+
+		for_each_thread(p, t) {
+			/* avoid double accounting of dying threads */
+			if (!(t->flags & PF_EXITING)) {
+				io.read_bytes += t->ioac.read_bytes;
+				io.write_bytes += compute_write_bytes(&t->ioac);
+				io.rchar += t->ioac.rchar;
+				io.wchar += t->ioac.wchar;
+				io.fsync += t->ioac.syscfs;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	compute_io_bucket_stats(&uid_entry->io[uid_entry->state], &io,
+					&uid_entry->io[UID_STATE_TOTAL_LAST],
+					&uid_entry->io[UID_STATE_DEAD_TASKS]);
+}
+
+static int uid_io_show(struct seq_file *m, void *v)
+{
+
+	struct uid_entry *uid_entry = NULL;
 	u32 bkt;
-	uid_t uid;
 
 	for (bkt = 0, uid_entry = NULL; uid_entry == NULL && bkt < HASH_SIZE(hash_table);
 		bkt++) {
 		lock_uid_by_bkt(bkt);
 		hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) {
-			memset(&uid_entry->io[UID_STATE_TOTAL_CURR], 0,
-				sizeof(struct io_stats));
-		}
-		unlock_uid_by_bkt(bkt);
-	}
 
-	rcu_read_lock();
-	do_each_thread(temp, task) {
-		uid = from_kuid_munged(user_ns, task_uid(task));
-		lock_uid(uid);
-		if (!uid_entry || uid_entry->uid != uid)
-			uid_entry = find_or_register_uid(uid);
-		if (!uid_entry) {
-			unlock_uid(uid);
-			continue;
-		}
-		add_uid_io_stats(uid_entry, task, UID_STATE_TOTAL_CURR);
-		unlock_uid(uid);
-	} while_each_thread(temp, task);
-	rcu_read_unlock();
+			update_io_stats_uid(uid_entry);
 
-	for (bkt = 0, uid_entry = NULL; uid_entry == NULL && bkt < HASH_SIZE(hash_table);
-			bkt++) {
-		lock_uid_by_bkt(bkt);
-		hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) {
-			compute_io_bucket_stats(&uid_entry->io[uid_entry->state],
-						&uid_entry->io[UID_STATE_TOTAL_CURR],
-						&uid_entry->io[UID_STATE_TOTAL_LAST],
-						&uid_entry->io[UID_STATE_DEAD_TASKS]);
-		}
-		unlock_uid_by_bkt(bkt);
-	}
-}
-
-static void update_io_stats_uid(struct uid_entry *uid_entry)
-{
-	struct task_struct *task, *temp;
-	struct user_namespace *user_ns = current_user_ns();
-
-	memset(&uid_entry->io[UID_STATE_TOTAL_CURR], 0,
-		sizeof(struct io_stats));
-
-	rcu_read_lock();
-	do_each_thread(temp, task) {
-		if (from_kuid_munged(user_ns, task_uid(task)) != uid_entry->uid)
-			continue;
-		add_uid_io_stats(uid_entry, task, UID_STATE_TOTAL_CURR);
-	} while_each_thread(temp, task);
-	rcu_read_unlock();
-
-	compute_io_bucket_stats(&uid_entry->io[uid_entry->state],
-				&uid_entry->io[UID_STATE_TOTAL_CURR],
-				&uid_entry->io[UID_STATE_TOTAL_LAST],
-				&uid_entry->io[UID_STATE_DEAD_TASKS]);
-}
-
-
-static int uid_io_show(struct seq_file *m, void *v)
-{
-	struct uid_entry *uid_entry;
-	u32 bkt;
-
-	update_io_stats_all();
-	for (bkt = 0, uid_entry = NULL; uid_entry == NULL && bkt < HASH_SIZE(hash_table);
-			bkt++) {
-
-		lock_uid_by_bkt(bkt);
-		hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) {
 			seq_printf(m, "%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
 				uid_entry->uid,
 				uid_entry->io[UID_STATE_FOREGROUND].rchar,
@@ -446,7 +396,6 @@ static ssize_t uid_procstat_write(struct file *file,
 	uid_t uid;
 	int argc, state;
 	char input[128];
-	struct uid_entry uid_entry_tmp;
 
 	if (count >= sizeof(input))
 		return -EINVAL;
@@ -475,29 +424,8 @@ static ssize_t uid_procstat_write(struct file *file,
 		return count;
 	}
 
-	/*
-	 * Update_io_stats_uid_locked would take a long lock-time of uid_lock
-	 * due to call do_each_thread to compute uid_entry->io, which would
-	 * cause to lock competition sometime.
-	 *
-	 * Using uid_entry_tmp to get the result of Update_io_stats_uid,
-	 * so that we can unlock_uid during update_io_stats_uid, in order
-	 * to avoid the unnecessary lock-time of uid_lock.
-	 */
-	uid_entry_tmp = *uid_entry;
-
-	unlock_uid(uid);
-	update_io_stats_uid(&uid_entry_tmp);
-
-	lock_uid(uid);
-	hlist_for_each_entry(uid_entry, &hash_table[hash_min(uid, HASH_BITS(hash_table))], hash) {
-		if (uid_entry->uid == uid_entry_tmp.uid) {
-			memcpy(uid_entry->io, uid_entry_tmp.io,
-				sizeof(struct io_stats) * UID_STATE_SIZE);
-			uid_entry->state = state;
-			break;
-		}
-	}
+	update_io_stats_uid(uid_entry);
+	uid_entry->state = state;
 	unlock_uid(uid);
 
 	return count;

From ac4797cea53888fe6f0df21c746bb676e360545a Mon Sep 17 00:00:00 2001
From: RD Babiera <rdbabiera@google.com>
Date: Thu, 7 Mar 2024 22:09:02 +0000
Subject: [PATCH 21/98] UPSTREAM: usb: typec: altmodes/displayport: create
 sysfs nodes as driver's default device attribute group

The DisplayPort driver's sysfs nodes may be present to the userspace before
typec_altmode_set_drvdata() completes in dp_altmode_probe. This means that
a sysfs read can trigger a NULL pointer error by deferencing dp->hpd in
hpd_show or dp->lock in pin_assignment_show, as dev_get_drvdata() returns
NULL in those cases.

Remove manual sysfs node creation in favor of adding attribute group as
default for devices bound to the driver. The ATTRIBUTE_GROUPS() macro is
not used here otherwise the path to the sysfs nodes is no longer compliant
with the ABI.

Fixes: 0e3bb7d6894d ("usb: typec: Add driver for DisplayPort alternate mode")
Cc: stable@vger.kernel.org
Signed-off-by: RD Babiera <rdbabiera@google.com>
Link: https://lore.kernel.org/r/20240229001101.3889432-2-rdbabiera@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Bug: 313517804
(cherry picked from commit 165376f6b23e9a779850e750fb2eb06622e5a531)
Change-Id: I13aaa956ff2b37e29eb0d90bbac0b7e1ac969b80
Signed-off-by: RD Babiera <rdbabiera@google.com>
---
 drivers/usb/typec/altmodes/displayport.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c
index 8ebcedb682af..213651da2aab 100644
--- a/drivers/usb/typec/altmodes/displayport.c
+++ b/drivers/usb/typec/altmodes/displayport.c
@@ -546,23 +546,27 @@ static ssize_t hpd_show(struct device *dev, struct device_attribute *attr, char
 }
 static DEVICE_ATTR_RO(hpd);
 
-static struct attribute *dp_altmode_attrs[] = {
+static struct attribute *displayport_attrs[] = {
 	&dev_attr_configuration.attr,
 	&dev_attr_pin_assignment.attr,
 	&dev_attr_hpd.attr,
 	NULL
 };
 
-static const struct attribute_group dp_altmode_group = {
+static const struct attribute_group displayport_group = {
 	.name = "displayport",
-	.attrs = dp_altmode_attrs,
+	.attrs = displayport_attrs,
+};
+
+static const struct attribute_group *displayport_groups[] = {
+	&displayport_group,
+	NULL,
 };
 
 int dp_altmode_probe(struct typec_altmode *alt)
 {
 	const struct typec_altmode *port = typec_altmode_get_partner(alt);
 	struct dp_altmode *dp;
-	int ret;
 
 	/* FIXME: Port can only be DFP_U. */
 
@@ -573,10 +577,6 @@ int dp_altmode_probe(struct typec_altmode *alt)
 	      DP_CAP_PIN_ASSIGN_DFP_D(alt->vdo)))
 		return -ENODEV;
 
-	ret = sysfs_create_group(&alt->dev.kobj, &dp_altmode_group);
-	if (ret)
-		return ret;
-
 	dp = devm_kzalloc(&alt->dev, sizeof(*dp), GFP_KERNEL);
 	if (!dp)
 		return -ENOMEM;
@@ -602,7 +602,6 @@ void dp_altmode_remove(struct typec_altmode *alt)
 {
 	struct dp_altmode *dp = typec_altmode_get_drvdata(alt);
 
-	sysfs_remove_group(&alt->dev.kobj, &dp_altmode_group);
 	cancel_work_sync(&dp->work);
 }
 EXPORT_SYMBOL_GPL(dp_altmode_remove);
@@ -620,6 +619,7 @@ static struct typec_altmode_driver dp_altmode_driver = {
 	.driver = {
 		.name = "typec_displayport",
 		.owner = THIS_MODULE,
+		.dev_groups = displayport_groups,
 	},
 };
 module_typec_altmode_driver(dp_altmode_driver);

From 37b83a89de429af428fc1c8cfb34c617013be894 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 7 Sep 2023 11:11:00 -0700
Subject: [PATCH 22/98] BACKPORT: f2fs: split initial and dynamic conditions
 for extent_cache

Let's allocate the extent_cache tree without dynamic conditions to avoid a
missing condition causing a panic as below.

 # create a file w/ a compressed flag
 # disable the compression
 # panic while updating extent_cache

F2FS-fs (dm-64): Swapfile: last extent is not aligned to section
F2FS-fs (dm-64): Swapfile (3) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(2097152 * N)
Adding 124996k swap on ./swap-file.  Priority:0 extents:2 across:17179494468k
==================================================================
BUG: KASAN: null-ptr-deref in instrument_atomic_read_write out/common/include/linux/instrumented.h:101 [inline]
BUG: KASAN: null-ptr-deref in atomic_try_cmpxchg_acquire out/common/include/asm-generic/atomic-instrumented.h:705 [inline]
BUG: KASAN: null-ptr-deref in queued_write_lock out/common/include/asm-generic/qrwlock.h:92 [inline]
BUG: KASAN: null-ptr-deref in __raw_write_lock out/common/include/linux/rwlock_api_smp.h:211 [inline]
BUG: KASAN: null-ptr-deref in _raw_write_lock+0x5a/0x110 out/common/kernel/locking/spinlock.c:295
Write of size 4 at addr 0000000000000030 by task syz-executor154/3327

CPU: 0 PID: 3327 Comm: syz-executor154 Tainted: G           O      5.10.185 #1
Hardware name: emulation qemu-x86/qemu-x86, BIOS 2023.01-21885-gb3cc1cd24d 01/01/2023
Call Trace:
 __dump_stack out/common/lib/dump_stack.c:77 [inline]
 dump_stack_lvl+0x17e/0x1c4 out/common/lib/dump_stack.c:118
 __kasan_report+0x16c/0x260 out/common/mm/kasan/report.c:415
 kasan_report+0x51/0x70 out/common/mm/kasan/report.c:428
 kasan_check_range+0x2f3/0x340 out/common/mm/kasan/generic.c:186
 __kasan_check_write+0x14/0x20 out/common/mm/kasan/shadow.c:37
 instrument_atomic_read_write out/common/include/linux/instrumented.h:101 [inline]
 atomic_try_cmpxchg_acquire out/common/include/asm-generic/atomic-instrumented.h:705 [inline]
 queued_write_lock out/common/include/asm-generic/qrwlock.h:92 [inline]
 __raw_write_lock out/common/include/linux/rwlock_api_smp.h:211 [inline]
 _raw_write_lock+0x5a/0x110 out/common/kernel/locking/spinlock.c:295
 __drop_extent_tree+0xdf/0x2f0 out/common/fs/f2fs/extent_cache.c:1155
 f2fs_drop_extent_tree+0x17/0x30 out/common/fs/f2fs/extent_cache.c:1172
 f2fs_insert_range out/common/fs/f2fs/file.c:1600 [inline]
 f2fs_fallocate+0x19fd/0x1f40 out/common/fs/f2fs/file.c:1764
 vfs_fallocate+0x514/0x9b0 out/common/fs/open.c:310
 ksys_fallocate out/common/fs/open.c:333 [inline]
 __do_sys_fallocate out/common/fs/open.c:341 [inline]
 __se_sys_fallocate out/common/fs/open.c:339 [inline]
 __x64_sys_fallocate+0xb8/0x100 out/common/fs/open.c:339
 do_syscall_64+0x35/0x50 out/common/arch/x86/entry/common.c:46

Bug: 323236756
Cc: stable@vger.kernel.org
Fixes: 72840cccc0a1 ("f2fs: allocate the extent_cache by default")
Reported-and-tested-by: syzbot+d342e330a37b48c094b7@syzkaller.appspotmail.com
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
(cherry picked from commit f803982190f0265fd36cf84670aa6daefc2b0768)
Change-Id: I584b9e37a4790baf5f6613778365b90f128bb765
---
 fs/f2fs/extent_cache.c | 53 +++++++++++++++++-------------------------
 1 file changed, 21 insertions(+), 32 deletions(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 0e2d49140c07..ad8dfac73bd4 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -74,40 +74,14 @@ static void __set_extent_info(struct extent_info *ei,
 	}
 }
 
-static bool __may_read_extent_tree(struct inode *inode)
-{
-	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-
-	if (!test_opt(sbi, READ_EXTENT_CACHE))
-		return false;
-	if (is_inode_flag_set(inode, FI_NO_EXTENT))
-		return false;
-	if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
-			 !f2fs_sb_has_readonly(sbi))
-		return false;
-	return S_ISREG(inode->i_mode);
-}
-
-static bool __may_age_extent_tree(struct inode *inode)
-{
-	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-
-	if (!test_opt(sbi, AGE_EXTENT_CACHE))
-		return false;
-	if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
-		return false;
-	if (file_is_cold(inode))
-		return false;
-
-	return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode);
-}
-
 static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
 {
 	if (type == EX_READ)
-		return __may_read_extent_tree(inode);
-	else if (type == EX_BLOCK_AGE)
-		return __may_age_extent_tree(inode);
+		return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) &&
+			S_ISREG(inode->i_mode);
+	if (type == EX_BLOCK_AGE)
+		return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) &&
+			(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode));
 	return false;
 }
 
@@ -120,7 +94,22 @@ static bool __may_extent_tree(struct inode *inode, enum extent_type type)
 	if (list_empty(&F2FS_I_SB(inode)->s_list))
 		return false;
 
-	return __init_may_extent_tree(inode, type);
+	if (!__init_may_extent_tree(inode, type))
+		return false;
+
+	if (type == EX_READ) {
+		if (is_inode_flag_set(inode, FI_NO_EXTENT))
+			return false;
+		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
+				 !f2fs_sb_has_readonly(F2FS_I_SB(inode)))
+			return false;
+	} else if (type == EX_BLOCK_AGE) {
+		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
+			return false;
+		if (file_is_cold(inode))
+			return false;
+	}
+	return true;
 }
 
 static void __try_update_largest_extent(struct extent_tree *et,

From 1225d7ed6c88881aaf342842e683fab14a314af9 Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Tue, 5 Mar 2024 16:20:32 -0800
Subject: [PATCH 23/98] ANDROID: fuse-bpf: Fix readdir for getdents

If you call getdents with a buffer size less than a page,
entries can be skipped. This correctly sets the point to continue from.

Bug: 325550828
Test: getdents with low buffer size
Change-Id: I324e7e815d31742bd4e2d70c5d07c2b09a67a7c2
Signed-off-by: Daniel Rosenberg <drosen@google.com>
---
 fs/fuse/backing.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/fs/fuse/backing.c b/fs/fuse/backing.c
index 8b747a7bde58..8485a0efe3db 100644
--- a/fs/fuse/backing.c
+++ b/fs/fuse/backing.c
@@ -2331,8 +2331,11 @@ static int filldir(struct dir_context *ctx, const char *name, int namelen,
 	return 0;
 }
 
-static int parse_dirfile(char *buf, size_t nbytes, struct dir_context *ctx)
+static int parse_dirfile(char *buf, size_t nbytes, struct dir_context *ctx,
+		loff_t next_offset)
 {
+	char *buffstart = buf;
+
 	while (nbytes >= FUSE_NAME_OFFSET) {
 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
@@ -2346,12 +2349,18 @@ static int parse_dirfile(char *buf, size_t nbytes, struct dir_context *ctx)
 
 		ctx->pos = dirent->off;
 		if (!dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
-				dirent->type))
-			break;
+				dirent->type)) {
+			// If we can't make any progress, user buffer is too small
+			if (buf == buffstart)
+				return -EINVAL;
+			else
+				return 0;
+		}
 
 		buf += reclen;
 		nbytes -= reclen;
 	}
+	ctx->pos = next_offset;
 
 	return 0;
 }
@@ -2398,13 +2407,12 @@ void *fuse_readdir_finalize(struct fuse_bpf_args *fa,
 	struct file *backing_dir = ff->backing_file;
 	int err = 0;
 
-	err = parse_dirfile(fa->out_args[1].value, fa->out_args[1].size, ctx);
+	err = parse_dirfile(fa->out_args[1].value, fa->out_args[1].size, ctx, fro->offset);
 	*force_again = !!fro->again;
 	if (*force_again && !*allow_force)
 		err = -EINVAL;
 
-	ctx->pos = fro->offset;
-	backing_dir->f_pos = fro->offset;
+	backing_dir->f_pos = ctx->pos;
 
 	free_page((unsigned long) fa->out_args[1].value);
 	return ERR_PTR(err);

From b1f8c250264bae2108cb482b2fc63c8f045db106 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 8 Mar 2024 23:24:00 +0000
Subject: [PATCH 24/98] ANDROID: fips140 - add option for debugging the
 integrity check

There now have been two times where I've had to debug the fips140
integrity check failing due to a new type of runtime code patching.
Debugging such issues requires dumping the text and rodata actually used
for the integrity check and comparing them with the originals.  Add a
kconfig option to make this easier.  Similar to
CRYPTO_FIPS140_MOD_EVAL_TESTING, the production build won't use this.

Bug: 188620248
Change-Id: I392de466ff31f999d65997dbc610e23e9eeca49d
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 crypto/Kconfig          | 27 +++++++++++++++
 crypto/fips140-module.c | 75 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 99 insertions(+), 3 deletions(-)

diff --git a/crypto/Kconfig b/crypto/Kconfig
index f41ddc91b7b2..b3728ad6ff79 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -58,6 +58,33 @@ config CRYPTO_FIPS140_MOD_EVAL_TESTING
 	  errors and support for a userspace interface to some of the module's
 	  services.  This option should not be enabled in production builds.
 
+config CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK
+	bool "Debug the integrity check in FIPS 140 module"
+	depends on CRYPTO_FIPS140_MOD
+	help
+	  This option makes the FIPS 140 module provide debugfs files containing
+	  the text and rodata that were used for the integrity check, i.e. the
+	  runtime text and rodata with relocations and code patches unapplied.
+	  This option also makes the module load even if the integrity check
+	  fails so that these files can be used to debug the failure.  (A
+	  possible failure mode is that the kernel has added a new type of code
+	  patching and the module needs to be updated to disable or unapply it.)
+
+	  This option must not be enabled in production builds.
+
+	  Example commands for debugging an integrity check failure:
+
+		adb root
+		adb shell mount debugfs -t debugfs /sys/kernel/debug
+		adb shell cp /sys/kernel/debug/fips140/{text,rodata} /data/local/tmp/
+		adb pull /data/local/tmp/text text.checked
+		adb pull /data/local/tmp/rodata rodata.checked
+		llvm-objcopy -O binary --only-section=.text fips140.ko text.orig
+		llvm-objcopy -O binary --only-section=.rodata fips140.ko rodata.orig
+		for f in {text,rodata}.{orig,checked}; do xxd -g1 $f > $f.xxd; done
+		vimdiff text.{orig,checked}.xxd
+		vimdiff rodata.{orig,checked}.xxd
+
 config CRYPTO_ALGAPI
 	tristate
 	select CRYPTO_ALGAPI2
diff --git a/crypto/fips140-module.c b/crypto/fips140-module.c
index 6412ad6c1234..ed0442b49a43 100644
--- a/crypto/fips140-module.c
+++ b/crypto/fips140-module.c
@@ -23,6 +23,7 @@
 #undef __DISABLE_EXPORTS
 
 #include <linux/ctype.h>
+#include <linux/debugfs.h>
 #include <linux/module.h>
 #include <crypto/aead.h>
 #include <crypto/aes.h>
@@ -357,6 +358,67 @@ static void __init unapply_rodata_relocations(void *section, int section_size,
 	}
 }
 
+#ifdef CONFIG_CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK
+static struct {
+	const void *text;
+	int textsize;
+	const void *rodata;
+	int rodatasize;
+} saved_integrity_check_info;
+
+static ssize_t fips140_text_read(struct file *file, char __user *to,
+				 size_t count, loff_t *ppos)
+{
+	return simple_read_from_buffer(to, count, ppos,
+				       saved_integrity_check_info.text,
+				       saved_integrity_check_info.textsize);
+}
+
+static ssize_t fips140_rodata_read(struct file *file, char __user *to,
+				   size_t count, loff_t *ppos)
+{
+	return simple_read_from_buffer(to, count, ppos,
+				       saved_integrity_check_info.rodata,
+				       saved_integrity_check_info.rodatasize);
+}
+
+static const struct file_operations fips140_text_fops = {
+	.read = fips140_text_read,
+};
+
+static const struct file_operations fips140_rodata_fops = {
+	.read = fips140_rodata_read,
+};
+
+static void fips140_init_integrity_debug_files(const void *text, int textsize,
+					       const void *rodata,
+					       int rodatasize)
+{
+	struct dentry *dir;
+
+	dir = debugfs_create_dir("fips140", NULL);
+
+	saved_integrity_check_info.text = kmemdup(text, textsize, GFP_KERNEL);
+	saved_integrity_check_info.textsize = textsize;
+	if (saved_integrity_check_info.text)
+		debugfs_create_file("text", 0400, dir, NULL,
+				    &fips140_text_fops);
+
+	saved_integrity_check_info.rodata = kmemdup(rodata, rodatasize,
+						    GFP_KERNEL);
+	saved_integrity_check_info.rodatasize = rodatasize;
+	if (saved_integrity_check_info.rodata)
+		debugfs_create_file("rodata", 0400, dir, NULL,
+				    &fips140_rodata_fops);
+}
+#else /* CONFIG_CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK */
+static void fips140_init_integrity_debug_files(const void *text, int textsize,
+					       const void *rodata,
+					       int rodatasize)
+{
+}
+#endif /* !CONFIG_CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK */
+
 extern struct {
 	u32	offset;
 	u32	count;
@@ -398,6 +460,9 @@ static bool __init check_fips140_module_hmac(void)
 				  offset_to_ptr(&fips140_rela_rodata.offset),
 				  fips140_rela_rodata.count);
 
+	fips140_init_integrity_debug_files(textcopy, textsize,
+					   rodatacopy, rodatasize);
+
 	fips140_inject_integrity_failure(textcopy);
 
 	tfm = crypto_alloc_shash("hmac(sha256)", 0, 0);
@@ -538,10 +603,14 @@ fips140_init(void)
 	 */
 
 	if (!check_fips140_module_hmac()) {
-		pr_crit("integrity check failed -- giving up!\n");
-		goto panic;
+		if (!IS_ENABLED(CONFIG_CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK)) {
+			pr_crit("integrity check failed -- giving up!\n");
+			goto panic;
+		}
+		pr_crit("ignoring integrity check failure due to debug mode\n");
+	} else {
+		pr_info("integrity check passed\n");
 	}
-	pr_info("integrity check passed\n");
 
 	complete_all(&fips140_tests_done);
 

From 7b301c70795e79f1a1cf632fad51d37a0c820232 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 8 Mar 2024 23:24:00 +0000
Subject: [PATCH 25/98] ANDROID: fips140 - fix integrity check by unapplying
 dynamic SCS

Since the kernel now has dynamic Shadow Call Stack (SCS) enabled, on
CPUs that don't support Pointer Authentication Codes (PAC) the kernel
runtime-patches paciasp and autiasp instructions into instructions that
push and pop from the shadow call stack.  This includes instructions in
loaded modules.  This broke the fips140 integrity check which needs to
know how to undo all text changes made by the module loader in order to
re-create the original text.

Fix this by updating fips140.ko to undo the dynamic SCS patching.

Bug: 188620248
Change-Id: I992bcd6c34b3340c6489b40a125715e1304cb445
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 crypto/fips140-module.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/crypto/fips140-module.c b/crypto/fips140-module.c
index ed0442b49a43..cc5c457ddd29 100644
--- a/crypto/fips140-module.c
+++ b/crypto/fips140-module.c
@@ -358,6 +358,33 @@ static void __init unapply_rodata_relocations(void *section, int section_size,
 	}
 }
 
+enum {
+	PACIASP		= 0xd503233f,
+	AUTIASP		= 0xd50323bf,
+	SCS_PUSH	= 0xf800865e,
+	SCS_POP		= 0xf85f8e5e,
+};
+
+/*
+ * To make the integrity check work with dynamic Shadow Call Stack (SCS),
+ * replace all instructions that push or pop from the SCS with the Pointer
+ * Authentication Code (PAC) instructions that were present originally.
+ */
+static void __init unapply_scs_patch(void *section, int section_size)
+{
+#if defined(CONFIG_ARM64) && defined(CONFIG_UNWIND_PATCH_PAC_INTO_SCS)
+	u32 *insns = section;
+	int i;
+
+	for (i = 0; i < section_size / sizeof(insns[0]); i++) {
+		if (insns[i] == SCS_PUSH)
+			insns[i] = PACIASP;
+		else if (insns[i] == SCS_POP)
+			insns[i] = AUTIASP;
+	}
+#endif
+}
+
 #ifdef CONFIG_CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK
 static struct {
 	const void *text;
@@ -460,6 +487,8 @@ static bool __init check_fips140_module_hmac(void)
 				  offset_to_ptr(&fips140_rela_rodata.offset),
 				  fips140_rela_rodata.count);
 
+	unapply_scs_patch(textcopy, textsize);
+
 	fips140_init_integrity_debug_files(textcopy, textsize,
 					   rodatacopy, rodatasize);
 

From 483395b4453aa7e54e88f22bdb43e133bbc9dc1e Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@google.com>
Date: Wed, 13 Mar 2024 12:23:56 -0700
Subject: [PATCH 26/98] Revert "ANDROID: Add CONFIG_BLK_DEV_NULL_BLK=m to
 gki_defconfig"

This reverts commit d217ccf7c8e03d9147de4a36ecc337b6a374a704.

Debug drivers should not be included in the GKI kernel configuration.
Hence this revert.

Bug: 326456248
Change-Id: I18db9d07ad49b22f09b6b3414d39e6ed0a728d73
Signed-off-by: Bart Van Assche <bvanassche@google.com>
---
 arch/arm64/configs/gki_defconfig | 1 -
 arch/x86/configs/gki_defconfig   | 1 -
 modules.bzl                      | 1 -
 3 files changed, 3 deletions(-)

diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig
index ccd5908ac73a..733f1f2da58a 100644
--- a/arch/arm64/configs/gki_defconfig
+++ b/arch/arm64/configs/gki_defconfig
@@ -316,7 +316,6 @@ CONFIG_ARM_SCPI_PROTOCOL=y
 # CONFIG_ARM_SCPI_POWER_DOMAIN is not set
 # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
 CONFIG_GNSS=y
-CONFIG_BLK_DEV_NULL_BLK=m
 CONFIG_ZRAM=m
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_LOOP_MIN_COUNT=16
diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig
index ca45275c81e1..2e3d924152bc 100644
--- a/arch/x86/configs/gki_defconfig
+++ b/arch/x86/configs/gki_defconfig
@@ -294,7 +294,6 @@ CONFIG_FW_LOADER_USER_HELPER=y
 # CONFIG_FW_CACHE is not set
 CONFIG_GNSS=y
 CONFIG_OF=y
-CONFIG_BLK_DEV_NULL_BLK=m
 CONFIG_ZRAM=m
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_LOOP_MIN_COUNT=16
diff --git a/modules.bzl b/modules.bzl
index 8f8e07abcec9..d8c38eeb33a9 100644
--- a/modules.bzl
+++ b/modules.bzl
@@ -8,7 +8,6 @@ This module contains a full list of kernel modules
 
 _COMMON_GKI_MODULES_LIST = [
     # keep sorted
-    "drivers/block/null_blk/null_blk.ko",
     "drivers/block/zram/zram.ko",
     "drivers/bluetooth/btbcm.ko",
     "drivers/bluetooth/btqca.ko",

From 3fd32dc1716d904707b016615d32344d631256ac Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Thu, 14 Mar 2024 15:53:20 -0700
Subject: [PATCH 27/98] ANDROID: fix isolate_migratepages_range return value

When [1] was cherry-picked from 5.10 into 5.15 kernel, it modified
the variable used to store isolate_migratepages_block() return value
like it was done in 5.10. However in 5.15 the variable used to store
the return value is different. As a result, failure to isolate a block
is not reported back to the caller. Fix by restoring the original
code and using the right variable to store the return value.

[1] ANDROID: mm: do not allow file-backed pages from CMA

Bug: 326556976
Change-Id: I06900eb43de356584ff63acfe6e994f11610b494
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
 mm/compaction.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index ff09d8c4a836..cccb46701c23 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1269,7 +1269,7 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
 					block_end_pfn, cc->zone))
 			continue;
 
-		pfn = isolate_migratepages_block(&cc_ext, pfn, block_end_pfn,
+		ret = isolate_migratepages_block(&cc_ext, pfn, block_end_pfn,
 						 ISOLATE_UNEVICTABLE);
 
 		if (ret)

From 3673533a09b6998bd0e3eafd14c1d6457bc23529 Mon Sep 17 00:00:00 2001
From: Lokesh Gidra <lokeshgidra@google.com>
Date: Fri, 8 Mar 2024 10:04:39 -0800
Subject: [PATCH 28/98] ANDROID: userfaultfd: add MMAP_TRYLOCK mode for
 COPY/ZEROPAGE

In case mmap_lock is contended, it is possible that userspace can spend
time performing other tasks rather than waiting in uninterruptible-sleep
state for the lock to become available. Even if no other task is
available, it is better to yield or sleep rather than adding contention
to already contended lock.

We introduce MMAP_TRYLOCK mode so that when possible, userspace can
request to use mmap_read_trylock(), returning -EAGAIN if and when it
fails.

Bug: 320478828
Change-Id: I2d196fd317e054af03dbd35ac1b0c7634cb370dc
Signed-off-by: Lokesh Gidra <lokeshgidra@google.com>
---
 fs/userfaultfd.c                 |  9 ++++++---
 include/linux/userfaultfd_k.h    |  8 +++++---
 include/uapi/linux/userfaultfd.h |  2 ++
 mm/userfaultfd.c                 | 13 +++++++++----
 4 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index b4c24753ec19..f8e138d903a2 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1764,7 +1764,9 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
 	ret = -EINVAL;
 	if (uffdio_copy.src + uffdio_copy.len <= uffdio_copy.src)
 		goto out;
-	if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP))
+	if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|
+				 UFFDIO_COPY_MODE_WP|
+				 UFFDIO_COPY_MODE_MMAP_TRYLOCK))
 		goto out;
 	if (mmget_not_zero(ctx->mm)) {
 		ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
@@ -1815,13 +1817,14 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
 	if (ret)
 		goto out;
 	ret = -EINVAL;
-	if (uffdio_zeropage.mode & ~UFFDIO_ZEROPAGE_MODE_DONTWAKE)
+	if (uffdio_zeropage.mode & ~(UFFDIO_ZEROPAGE_MODE_DONTWAKE|
+				     UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK))
 		goto out;
 
 	if (mmget_not_zero(ctx->mm)) {
 		ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start,
 				     uffdio_zeropage.range.len,
-				     &ctx->mmap_changing);
+				     &ctx->mmap_changing, uffdio_zeropage.mode);
 		mmput(ctx->mm);
 	} else {
 		return -ESRCH;
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 8ea2827a4eba..14fdc2dd1532 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -33,6 +33,9 @@
 #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
 #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
 
+static_assert(UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK == UFFDIO_COPY_MODE_MMAP_TRYLOCK);
+#define UFFDIO_MODE_MMAP_TRYLOCK UFFDIO_COPY_MODE_MMAP_TRYLOCK
+
 extern int sysctl_unprivileged_userfaultfd;
 
 extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
@@ -65,9 +68,8 @@ extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
 			    unsigned long src_start, unsigned long len,
 			    atomic_t *mmap_changing, __u64 mode);
 extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
-			      unsigned long dst_start,
-			      unsigned long len,
-			      atomic_t *mmap_changing);
+			      unsigned long dst_start, unsigned long len,
+			      atomic_t *mmap_changing, __u64 mode);
 extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start,
 			      unsigned long len, atomic_t *mmap_changing);
 extern int mwriteprotect_range(struct mm_struct *dst_mm,
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 05b31d60acf6..a13fa043c092 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -237,6 +237,7 @@ struct uffdio_copy {
 	 * according to the uffdio_register.ioctls.
 	 */
 #define UFFDIO_COPY_MODE_WP			((__u64)1<<1)
+#define UFFDIO_COPY_MODE_MMAP_TRYLOCK	      	((__u64)1<<63)
 	__u64 mode;
 
 	/*
@@ -249,6 +250,7 @@ struct uffdio_copy {
 struct uffdio_zeropage {
 	struct uffdio_range range;
 #define UFFDIO_ZEROPAGE_MODE_DONTWAKE		((__u64)1<<0)
+#define UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK     	((__u64)1<<63)
 	__u64 mode;
 
 	/*
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index d4175821dd29..522d88bd73ef 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -519,14 +519,19 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 	copied = 0;
 	page = NULL;
 retry:
-	mmap_read_lock(dst_mm);
+	err = -EAGAIN;
+	if (mode & UFFDIO_MODE_MMAP_TRYLOCK) {
+		if (!mmap_read_trylock(dst_mm))
+			goto out;
+	} else {
+		mmap_read_lock(dst_mm);
+	}
 
 	/*
 	 * If memory mappings are changing because of non-cooperative
 	 * operation (e.g. mremap) running in parallel, bail out and
 	 * request the user to retry later
 	 */
-	err = -EAGAIN;
 	if (mmap_changing && atomic_read(mmap_changing))
 		goto out_unlock;
 
@@ -668,10 +673,10 @@ ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
 }
 
 ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
-		       unsigned long len, atomic_t *mmap_changing)
+		       unsigned long len, atomic_t *mmap_changing, __u64 mode)
 {
 	return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
-			      mmap_changing, 0);
+			      mmap_changing, mode);
 }
 
 ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,

From 96305e30e970c4676b89cfebd671934485107716 Mon Sep 17 00:00:00 2001
From: Lokesh Gidra <lokeshgidra@google.com>
Date: Fri, 8 Mar 2024 10:59:56 -0800
Subject: [PATCH 29/98] ANDROID: userfaultfd: abort uffdio ops if mmap_lock is
 contended

Check if the mmap_lock is contended when looping over the pages that
are requested to be filled. When it is observed, we rely on the already
existing mechanism to return bytes copied/filled and -EAGAIN as error.

This helps by avoiding contention of mmap_lock for long running
userfaultfd operations. The userspace can perform other tasks before
retrying the operation for the remaining pages.

Bug: 320478828
Change-Id: I6d485fd03c96a826956ee3962e58058be3cf81c1
Signed-off-by: Lokesh Gidra <lokeshgidra@google.com>
---
 mm/userfaultfd.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 522d88bd73ef..70e54462bbf2 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -624,6 +624,15 @@ retry:
 		if (unlikely(err == -ENOENT)) {
 			void *page_kaddr;
 
+			/*
+			 * Return early due to mmap_lock contention only after
+			 * some pages are copied to ensure that jank sensitive
+			 * threads don't keep retrying for progress-critical
+			 * pages.
+			 */
+			if (copied && mmap_lock_is_contended(dst_mm))
+				break;
+
 			mmap_read_unlock(dst_mm);
 			BUG_ON(!page);
 
@@ -648,6 +657,9 @@ retry:
 
 			if (fatal_signal_pending(current))
 				err = -EINTR;
+
+			if (mmap_lock_is_contended(dst_mm))
+				err = -EAGAIN;
 		}
 		if (err)
 			break;

From 537e133918a2cd44b0d85190a915c04f2f5c54fd Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 13 Dec 2022 15:28:49 +0100
Subject: [PATCH 30/98] UPSTREAM: arm64: Apply dynamic shadow call stack
 patching in two passes

Code patching for the dynamically enabled shadow call stack comes down
to finding PACIASP and AUTIASP instructions -which behave as NOPs on
cores that do not implement pointer authentication- and converting them
into shadow call stack pushes and pops, respectively.

Due to past bad experiences with the highly complex and overengineered
DWARF standard that describes the unwind metadata that we are using to
locate these instructions, let's make this patching logic a little bit
more robust so that any issues with the unwind metadata detected at boot
time can de dealt with gracefully.

The DWARF annotations that are used for this are emitted at function
granularity, and due to the fact that the instructions we are patching
will simply behave as NOPs if left unpatched, we can abort on errors as
long as we don't leave any functions in a half-patched state.

So do a dry run of each FDE frame (covering a single function) before
performing the actual patching, and give up if the DWARF metadata cannot
be understood.

Change-Id: Iea167b37a4d84e2b444189c7af939cf58d6dc9cf
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Link: https://lore.kernel.org/r/20221213142849.1629026-1-ardb@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 54c968bec344b101ba3596f2544f0f3b4c1eef2f)
Signed-off-by: Mark-PK Tsai <mark-pk.tsai@mediatek.com>
---
 arch/arm64/kernel/patch-scs.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/patch-scs.c b/arch/arm64/kernel/patch-scs.c
index 1b3da02d5b74..a1fe4b4ff591 100644
--- a/arch/arm64/kernel/patch-scs.c
+++ b/arch/arm64/kernel/patch-scs.c
@@ -130,7 +130,8 @@ struct eh_frame {
 
 static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
 					bool fde_has_augmentation_data,
-					int code_alignment_factor)
+					int code_alignment_factor,
+					bool dry_run)
 {
 	int size = frame->size - offsetof(struct eh_frame, opcodes) + 4;
 	u64 loc = (u64)offset_to_ptr(&frame->initial_loc);
@@ -184,7 +185,8 @@ static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
 			break;
 
 		case DW_CFA_negate_ra_state:
-			scs_patch_loc(loc - 4);
+			if (!dry_run)
+				scs_patch_loc(loc - 4);
 			break;
 
 		case 0x40 ... 0x7f:
@@ -235,9 +237,12 @@ int noinstr scs_patch(const u8 eh_frame[], int size)
 		} else {
 			ret = scs_handle_fde_frame(frame,
 						   fde_has_augmentation_data,
-						   code_alignment_factor);
+						   code_alignment_factor,
+						   true);
 			if (ret)
 				return ret;
+			scs_handle_fde_frame(frame, fde_has_augmentation_data,
+					     code_alignment_factor, false);
 		}
 
 		p += sizeof(frame->size) + frame->size;

From 865e6d9df1de5f0e55cf4a40bd8d2f922dbee8c3 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 16 Jun 2023 15:22:18 +0200
Subject: [PATCH 31/98] UPSTREAM: netfilter: nf_tables: disallow timeout for
 anonymous sets

commit e26d3009efda338f19016df4175f354a9bd0a4ab upstream.

Never used from userspace, disallow these parameters.

Bug: 329205828
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit b7be6c737a179a76901c872f6b4c1d00552d9a1b)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: I3d8358a6dee3246e3ac56697dbb2be8fdc5f716f
---
 net/netfilter/nf_tables_api.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 715be2edc542..087cbccaa48a 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4671,6 +4671,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 		if (!(flags & NFT_SET_TIMEOUT))
 			return -EINVAL;
 
+		if (flags & NFT_SET_ANONYMOUS)
+			return -EOPNOTSUPP;
+
 		err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
 		if (err)
 			return err;
@@ -4679,6 +4682,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 	if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
 		if (!(flags & NFT_SET_TIMEOUT))
 			return -EINVAL;
+
+		if (flags & NFT_SET_ANONYMOUS)
+			return -EOPNOTSUPP;
+
 		desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
 	}
 

From 5854f4c2aff0f7975b9fdaaf034b550dcd296626 Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vdonnefort@google.com>
Date: Mon, 18 Mar 2024 11:05:06 +0000
Subject: [PATCH 32/98] ANDROID: KVM: arm64: Fix missing trace event for nVHE
 dyn HVCs

The hyp event host_hcall was missing when a custom HVC runs.

Bug: 278749606
Bug: 244543039
Bug: 244373730
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
(cherry picked from https://android-review.googlesource.com/q/commit:a1836ffbea9fcb70fa9d49af7382b9343285036f)
Merged-In: I760cab4fbd36a13ad262842880d9ec484f23fd22
Change-Id: I760cab4fbd36a13ad262842880d9ec484f23fd22
---
 arch/arm64/kvm/hyp/nvhe/hyp-main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 995c3a840010..87ca8cd735ea 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -1334,7 +1334,7 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
 	hcall_t hfn;
 
 	if (handle_host_dynamic_hcall(host_ctxt) == HCALL_HANDLED)
-		return;
+		goto end;
 
 	/*
 	 * If pKVM has been initialised then reject any calls to the
@@ -1359,7 +1359,7 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
 
 	cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS;
 	hfn(host_ctxt);
-
+end:
 	trace_host_hcall(id, 0);
 
 	return;

From ec86765bae82a842d58839fcdb6a70dfb310441f Mon Sep 17 00:00:00 2001
From: Will Deacon <willdeacon@google.com>
Date: Mon, 25 Mar 2024 15:32:08 +0000
Subject: [PATCH 33/98] ANDROID: KVM: arm64: Fix TLB invalidation when
 coalescing into a block

Wnen coalescing a table into a block, the break-before-make sequence
must invalidate the whole range of addresses translated by the entry in
order to avoid the possibility of a TLB conflict.

Fix the coalescing post-table walker so that the whole range of the old
table is invalidated, rather than just the first address, since a
refcount of 1 on the child page is not sufficient to ensure the absence
of any valid mappings.

Cc: Sebastian Ene <sebastianene@google.com>
Reported-by: Mostafa Saleh <smostafa@google.com>
Fixes: 6b3810205304 ("ANDROID: KVM: arm64: Coalesce host stage2 entries on ownership reclaim")
Bug: 331232642
Signed-off-by: Will Deacon <willdeacon@google.com>
Change-Id: I4c94f552e4385599ad88b1be50b69ffbafa64a9b
---
 arch/arm64/kvm/hyp/pgtable.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 05b3645d09e8..d3bd18c4dca4 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -885,7 +885,9 @@ static void stage2_coalesce_walk_table_post(u64 addr, u64 end, u32 level,
 	 * of the page table page.
 	 */
 	if (mm_ops->page_count(childp) == 1) {
-		stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
+		kvm_clear_pte(ptep);
+		kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
+		mm_ops->put_page(ptep);
 		mm_ops->put_page(childp);
 	}
 }

From d154026d33f2eeceae73e82c7e8a1c83dcc28b97 Mon Sep 17 00:00:00 2001
From: Vilas Bhat <vilasbhat@google.com>
Date: Thu, 28 Mar 2024 19:59:00 -0700
Subject: [PATCH 34/98] ANDROID: GKI: Update the ABI symbol list

Update the pixel_watch symbol list.

6 function symbol(s) added
  __module_get
  emergency_restart
  kernel_restart
  watchdog_init_timeout
  watchdog_register_device
  watchdog_unregister_device

Bug: 329913683
Change-Id: I7bce78be9c642c7fea483aab25b1eb6ce15a232d
Signed-off-by: Vilas Bhat <vilasbhat@google.com>
---
 android/abi_gki_aarch64_pixel_watch | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/android/abi_gki_aarch64_pixel_watch b/android/abi_gki_aarch64_pixel_watch
index f60a3092e140..a2877e136e4c 100644
--- a/android/abi_gki_aarch64_pixel_watch
+++ b/android/abi_gki_aarch64_pixel_watch
@@ -733,6 +733,7 @@
   dump_stack
   __dynamic_dev_dbg
   __dynamic_pr_debug
+  emergency_restart
   enable_irq
   enable_percpu_irq
   ether_setup
@@ -1092,6 +1093,7 @@
   kernel_getsockname
   kernel_kobj
   kernel_recvmsg
+  kernel_restart
   kernel_sendmsg
   kernfs_find_and_get_ns
   kernfs_notify
@@ -1272,6 +1274,7 @@
   mod_node_page_state
   mod_timer
   mod_timer_pending
+  __module_get
   module_layout
   module_put
   __msecs_to_jiffies
@@ -2457,6 +2460,9 @@
   wakeup_source_unregister
   __wake_up_sync
   __warn_printk
+  watchdog_init_timeout
+  watchdog_register_device
+  watchdog_unregister_device
   wireless_nlevent_flush
   wireless_send_event
   woken_wake_function

From f600c62d2552dba12611ba21fda137c094072eff Mon Sep 17 00:00:00 2001
From: "qinglin.li" <qinglin.li@amlogic.com>
Date: Fri, 29 Mar 2024 10:07:28 +0800
Subject: [PATCH 35/98] ANDROID: GKI: Update symbol list for Amlogic

12 function symbol(s) added
  'struct backing_dev_info* bdi_alloc(int)'
  'void bdi_put(struct backing_dev_info*)'
  'int bdi_register(struct backing_dev_info*, const char*, ...)'
  'void crypto_unregister_ahashes(struct ahash_alg*, int)'
  'void deactivate_locked_super(struct super_block*)'
  'loff_t fixed_size_llseek(struct file*, loff_t, int, loff_t)'
  'void generic_shutdown_super(struct super_block*)'
  'struct gpio_desc* gpiod_get_index_optional(struct device*, const char*, unsigned int, enum gpiod_flags)'
  'int lookup_bdev(const char*, dev_t*)'
  'struct nvmem_device* nvmem_register(const struct nvmem_config*)'
  'void nvmem_unregister(struct nvmem_device*)'
  'struct super_block* sget_fc(struct fs_context*, int(*)(struct super_block*, struct fs_context*), int(*)(struct super_block*, struct fs_context*))'

Bug: 331874739
Change-Id: Icaa620cd09b0cccb3a1075c1f0429355d90103f4
Signed-off-by: Qinglin Li <qinglin.li@amlogic.com>
---
 android/abi_gki_aarch64.stg     | 169 ++++++++++++++++++++++++++++++++
 android/abi_gki_aarch64_amlogic |  68 ++++++++++---
 2 files changed, 224 insertions(+), 13 deletions(-)

diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg
index 68e2b49e33af..ea57d83b30e8 100644
--- a/android/abi_gki_aarch64.stg
+++ b/android/abi_gki_aarch64.stg
@@ -263850,6 +263850,12 @@ function {
   return_type_id: 0x48b5725f
   parameter_id: 0x0bf6beaf
 }
+function {
+  id: 0x12663997
+  return_type_id: 0x48b5725f
+  parameter_id: 0x0d8bad22
+  parameter_id: 0x6720d32f
+}
 function {
   id: 0x1267759f
   return_type_id: 0x48b5725f
@@ -268413,6 +268419,11 @@ function {
   parameter_id: 0x1259e377
   parameter_id: 0xe276adef
 }
+function {
+  id: 0x1aa56a0d
+  return_type_id: 0x48b5725f
+  parameter_id: 0x28f4ee78
+}
 function {
   id: 0x1aab662c
   return_type_id: 0x48b5725f
@@ -272566,6 +272577,14 @@ function {
   parameter_id: 0x1d19a9d5
   parameter_id: 0x94ed3026
 }
+function {
+  id: 0x2d6430a1
+  return_type_id: 0x27a7c613
+  parameter_id: 0x18ea6ae3
+  parameter_id: 0x27a7c613
+  parameter_id: 0x6720d32f
+  parameter_id: 0x27a7c613
+}
 function {
   id: 0x2d64ae3e
   return_type_id: 0x27a7c613
@@ -272619,6 +272638,11 @@ function {
   parameter_id: 0x3760766d
   parameter_id: 0x3c38843b
 }
+function {
+  id: 0x2eb1a24e
+  return_type_id: 0x145f7b00
+  parameter_id: 0x3ddc5dec
+}
 function {
   id: 0x2f31eea6
   return_type_id: 0xa8fff47c
@@ -274279,6 +274303,11 @@ function {
   parameter_id: 0x1dce0fdd
   parameter_id: 0x6d7f5ff6
 }
+function {
+  id: 0x5120d3a0
+  return_type_id: 0x28f4ee78
+  parameter_id: 0x6720d32f
+}
 function {
   id: 0x513ab761
   return_type_id: 0x92233392
@@ -281173,6 +281202,12 @@ function {
   parameter_id: 0x6720d32f
   parameter_id: 0x295c7202
 }
+function {
+  id: 0x92c581e2
+  return_type_id: 0x6720d32f
+  parameter_id: 0x3e10b518
+  parameter_id: 0x3053e2de
+}
 function {
   id: 0x92c58e2b
   return_type_id: 0x6720d32f
@@ -284880,6 +284915,13 @@ function {
   parameter_id: 0x2dde023d
   parameter_id: 0x6720d32f
 }
+function {
+  id: 0x976cd0e7
+  return_type_id: 0x6720d32f
+  parameter_id: 0x28f4ee78
+  parameter_id: 0x3e10b518
+  parameter_id: 0xa52a0930
+}
 function {
   id: 0x977041a6
   return_type_id: 0x6720d32f
@@ -300509,6 +300551,13 @@ function {
   parameter_id: 0x38d23361
   parameter_id: 0x3ea31487
 }
+function {
+  id: 0xf07462c6
+  return_type_id: 0x26ee682a
+  parameter_id: 0x2f5fce77
+  parameter_id: 0x2fb1b820
+  parameter_id: 0x2fb1b820
+}
 function {
   id: 0xf078e232
   return_type_id: 0x6d7f5ff6
@@ -313042,6 +313091,33 @@ elf_symbol {
   type_id: 0x915d9e6f
   full_name: "bdevname"
 }
+elf_symbol {
+  id: 0x18c171b8
+  name: "bdi_alloc"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0xad31d036
+  type_id: 0x5120d3a0
+  full_name: "bdi_alloc"
+}
+elf_symbol {
+  id: 0x8fb4b5f6
+  name: "bdi_put"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0xe1b928fd
+  type_id: 0x1aa56a0d
+  full_name: "bdi_put"
+}
+elf_symbol {
+  id: 0x6bb4e80d
+  name: "bdi_register"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x435e0782
+  type_id: 0x976cd0e7
+  full_name: "bdi_register"
+}
 elf_symbol {
   id: 0xcdba3a55
   name: "bgpio_init"
@@ -317803,6 +317879,15 @@ elf_symbol {
   type_id: 0x13fabadb
   full_name: "crypto_unregister_ahash"
 }
+elf_symbol {
+  id: 0xa9d18a36
+  name: "crypto_unregister_ahashes"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0xf36b1210
+  type_id: 0x12663997
+  full_name: "crypto_unregister_ahashes"
+}
 elf_symbol {
   id: 0x883df740
   name: "crypto_unregister_alg"
@@ -318067,6 +318152,15 @@ elf_symbol {
   type_id: 0xcebcf7b7
   full_name: "dbs_update"
 }
+elf_symbol {
+  id: 0x02f83230
+  name: "deactivate_locked_super"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0xdc36d757
+  type_id: 0x1923cb99
+  full_name: "deactivate_locked_super"
+}
 elf_symbol {
   id: 0x36a5b3df
   name: "deactivate_task"
@@ -327684,6 +327778,15 @@ elf_symbol {
   type_id: 0x9cec28b8
   full_name: "firmware_request_nowarn"
 }
+elf_symbol {
+  id: 0xbbba9aad
+  name: "fixed_size_llseek"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0xad5d5d5c
+  type_id: 0x2d6430a1
+  full_name: "fixed_size_llseek"
+}
 elf_symbol {
   id: 0xd211b195
   name: "flow_block_cb_setup_simple"
@@ -328734,6 +328837,15 @@ elf_symbol {
   type_id: 0x12e4741f
   full_name: "generic_read_dir"
 }
+elf_symbol {
+  id: 0x798f83da
+  name: "generic_shutdown_super"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0xf832643f
+  type_id: 0x1923cb99
+  full_name: "generic_shutdown_super"
+}
 elf_symbol {
   id: 0x542785af
   name: "generic_write_checks"
@@ -329936,6 +330048,15 @@ elf_symbol {
   type_id: 0x5f3cfa16
   full_name: "gpiod_get_index"
 }
+elf_symbol {
+  id: 0x226f81df
+  name: "gpiod_get_index_optional"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x316800a4
+  type_id: 0x5f3cfa16
+  full_name: "gpiod_get_index_optional"
+}
 elf_symbol {
   id: 0x4950fc9e
   name: "gpiod_get_optional"
@@ -335829,6 +335950,15 @@ elf_symbol {
   type_id: 0x11cf2aed
   full_name: "logfc"
 }
+elf_symbol {
+  id: 0xad10cf0f
+  name: "lookup_bdev"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x34c7cdbc
+  type_id: 0x92c581e2
+  full_name: "lookup_bdev"
+}
 elf_symbol {
   id: 0x5868181f
   name: "lookup_page_ext"
@@ -338662,6 +338792,24 @@ elf_symbol {
   type_id: 0x99bc47d8
   full_name: "nvmem_device_write"
 }
+elf_symbol {
+  id: 0x65d6ca81
+  name: "nvmem_register"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x9e85ac36
+  type_id: 0x2eb1a24e
+  full_name: "nvmem_register"
+}
+elf_symbol {
+  id: 0x27081fa8
+  name: "nvmem_unregister"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0xcc39c03e
+  type_id: 0x158f8f53
+  full_name: "nvmem_unregister"
+}
 elf_symbol {
   id: 0x458a15db
   name: "of_address_to_resource"
@@ -349610,6 +349758,15 @@ elf_symbol {
   type_id: 0x9264424b
   full_name: "sg_zero_buffer"
 }
+elf_symbol {
+  id: 0x12402fa7
+  name: "sget_fc"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x5c47772e
+  type_id: 0xf07462c6
+  full_name: "sget_fc"
+}
 elf_symbol {
   id: 0x84e1c494
   name: "shash_free_singlespawn_instance"
@@ -363342,6 +363499,9 @@ interface {
   symbol_id: 0xe860837e
   symbol_id: 0x76f82fb7
   symbol_id: 0xc22bc12c
+  symbol_id: 0x18c171b8
+  symbol_id: 0x8fb4b5f6
+  symbol_id: 0x6bb4e80d
   symbol_id: 0xcdba3a55
   symbol_id: 0x977cc973
   symbol_id: 0x49201db3
@@ -363871,6 +364031,7 @@ interface {
   symbol_id: 0x77cf6687
   symbol_id: 0x81600265
   symbol_id: 0x57a4fa38
+  symbol_id: 0xa9d18a36
   symbol_id: 0x883df740
   symbol_id: 0xeb9b8f1f
   symbol_id: 0x4d669c6d
@@ -363900,6 +364061,7 @@ interface {
   symbol_id: 0xd8f17b20
   symbol_id: 0xf94e1278
   symbol_id: 0xf1e399f5
+  symbol_id: 0x02f83230
   symbol_id: 0x36a5b3df
   symbol_id: 0x7225f00d
   symbol_id: 0xe2150034
@@ -364968,6 +365130,7 @@ interface {
   symbol_id: 0x40cf5b57
   symbol_id: 0xf882020f
   symbol_id: 0x24954a6b
+  symbol_id: 0xbbba9aad
   symbol_id: 0xd211b195
   symbol_id: 0xba7efe3b
   symbol_id: 0x6e6c429b
@@ -365084,6 +365247,7 @@ interface {
   symbol_id: 0x7f639ef1
   symbol_id: 0x6bd69c06
   symbol_id: 0x4c4073c3
+  symbol_id: 0x798f83da
   symbol_id: 0x542785af
   symbol_id: 0xb45dfa4f
   symbol_id: 0x86da67c0
@@ -365217,6 +365381,7 @@ interface {
   symbol_id: 0x80497778
   symbol_id: 0xb92a798d
   symbol_id: 0xaca51db1
+  symbol_id: 0x226f81df
   symbol_id: 0x4950fc9e
   symbol_id: 0x74240b4d
   symbol_id: 0xe4150b00
@@ -365870,6 +366035,7 @@ interface {
   symbol_id: 0xc41c43d2
   symbol_id: 0x02bffe2a
   symbol_id: 0x06e891b4
+  symbol_id: 0xad10cf0f
   symbol_id: 0x5868181f
   symbol_id: 0x493ce9fc
   symbol_id: 0x531bbd3e
@@ -366184,6 +366350,8 @@ interface {
   symbol_id: 0x23113228
   symbol_id: 0xc1013b0e
   symbol_id: 0x28ba8e50
+  symbol_id: 0x65d6ca81
+  symbol_id: 0x27081fa8
   symbol_id: 0x458a15db
   symbol_id: 0x058addcb
   symbol_id: 0x93579cdd
@@ -367400,6 +367568,7 @@ interface {
   symbol_id: 0x7c74f0ec
   symbol_id: 0x8525915d
   symbol_id: 0x4c3efe30
+  symbol_id: 0x12402fa7
   symbol_id: 0x84e1c494
   symbol_id: 0xc2e7cb56
   symbol_id: 0x7f3b48aa
diff --git a/android/abi_gki_aarch64_amlogic b/android/abi_gki_aarch64_amlogic
index 3fb321828154..017df1429251 100644
--- a/android/abi_gki_aarch64_amlogic
+++ b/android/abi_gki_aarch64_amlogic
@@ -28,6 +28,7 @@
   arm64_use_ng_mappings
   __arm_smccc_hvc
   __arm_smccc_smc
+  async_schedule_node
   atomic_notifier_call_chain
   atomic_notifier_chain_register
   atomic_notifier_chain_unregister
@@ -35,6 +36,9 @@
   backlight_device_unregister
   balance_dirty_pages_ratelimited
   bcmp
+  bdi_alloc
+  bdi_put
+  bdi_register
   bio_add_page
   bio_alloc_bioset
   bio_associate_blkg
@@ -82,8 +86,8 @@
   bpf_master_redirect_enabled_key
   bpf_prog_put
   bpf_stats_enabled_key
-  bpf_trace_run10
   bpf_trace_run1
+  bpf_trace_run10
   bpf_trace_run2
   bpf_trace_run3
   bpf_trace_run4
@@ -91,6 +95,7 @@
   bpf_trace_run6
   bpf_trace_run7
   bpf_trace_run8
+  bpf_trace_run9
   bpf_warn_invalid_xdp_action
   __bread_gfp
   __brelse
@@ -256,6 +261,7 @@
   crypto_skcipher_encrypt
   crypto_skcipher_setkey
   crypto_unregister_ahash
+  crypto_unregister_ahashes
   crypto_unregister_shash
   crypto_unregister_skcipher
   __crypto_xor
@@ -265,6 +271,7 @@
   _ctype
   current_time
   current_umask
+  deactivate_locked_super
   debugfs_attr_read
   debugfs_attr_write
   debugfs_create_blob
@@ -585,6 +592,7 @@
   drm_connector_cleanup
   drm_connector_init
   drm_connector_list_iter_begin
+  drm_connector_list_iter_end
   drm_connector_list_iter_next
   drm_connector_set_vrr_capable_property
   drm_connector_unregister
@@ -695,6 +703,7 @@
   drm_state_dump
   drm_universal_plane_init
   drm_vblank_init
+  drm_wait_one_vblank
   drm_writeback_connector_init
   drm_writeback_queue_job
   drm_writeback_signal_completion
@@ -728,9 +737,9 @@
   extcon_get_extcon_dev
   extcon_get_state
   extcon_register_notifier
-  extcon_unregister_notifier
   extcon_set_state
   extcon_set_state_sync
+  extcon_unregister_notifier
   fasync_helper
   fault_in_iov_iter_readable
   __fdget
@@ -755,6 +764,7 @@
   find_vm_area
   find_vpid
   finish_wait
+  fixed_size_llseek
   flow_block_cb_setup_simple
   flow_rule_match_basic
   flow_rule_match_ipv4_addrs
@@ -808,6 +818,7 @@
   generic_handle_irq
   generic_permission
   generic_read_dir
+  generic_shutdown_super
   generic_write_checks
   generic_write_end
   genlmsg_multicast_allns
@@ -819,13 +830,13 @@
   genphy_handle_interrupt_no_ack
   genphy_read_abilities
   genphy_read_mmd_unsupported
-  genphy_write_mmd_unsupported
   genphy_read_status
   genphy_restart_aneg
   genphy_resume
   genphy_soft_reset
   genphy_suspend
   genphy_update_link
+  genphy_write_mmd_unsupported
   gen_pool_add_owner
   gen_pool_alloc_algo_owner
   gen_pool_avail
@@ -873,6 +884,7 @@
   gpiod_direction_output_raw
   gpiod_get
   gpiod_get_index
+  gpiod_get_index_optional
   gpiod_get_optional
   gpiod_get_raw_value
   gpiod_get_raw_value_cansleep
@@ -1034,8 +1046,8 @@
   iwe_stream_add_event
   iwe_stream_add_point
   iwe_stream_add_value
-  jiffies_64
   jiffies
+  jiffies_64
   jiffies_to_msecs
   jiffies_to_usecs
   kasan_flag_enabled
@@ -1144,12 +1156,18 @@
   __lock_page
   lockref_get
   logfc
+  log_post_read_mmio
+  log_post_write_mmio
+  log_read_mmio
+  log_write_mmio
+  lookup_bdev
   loops_per_jiffy
   mac_pton
   make_bad_inode
   mangle_path
   mark_buffer_dirty
   __mark_inode_dirty
+  match_string
   mbox_chan_received_data
   mbox_chan_txdone
   mbox_controller_register
@@ -1161,11 +1179,11 @@
   mdiobus_alloc_size
   mdiobus_free
   mdiobus_get_phy
-  mdiobus_read
   __mdiobus_read
+  mdiobus_read
   mdiobus_unregister
-  mdiobus_write
   __mdiobus_write
+  mdiobus_write
   mdio_device_create
   mdio_device_free
   media_create_pad_link
@@ -1184,8 +1202,8 @@
   memparse
   memremap
   mem_section
-  memset64
   memset
+  memset64
   __memset_io
   memstart_addr
   memunmap
@@ -1204,8 +1222,8 @@
   mmc_cqe_request_done
   mmc_detect_change
   mmc_free_host
-  mmc_gpio_get_cd
   mmc_gpiod_request_cd
+  mmc_gpio_get_cd
   mmc_of_parse
   mmc_regulator_get_supply
   mmc_regulator_set_ocr
@@ -1223,6 +1241,7 @@
   mod_delayed_work_on
   mod_node_page_state
   mod_timer
+  __module_get
   module_layout
   module_put
   mpage_readahead
@@ -1245,6 +1264,7 @@
   napi_gro_receive
   __napi_schedule
   napi_schedule_prep
+  __ndelay
   netdev_alert
   __netdev_alloc_skb
   netdev_err
@@ -1285,8 +1305,8 @@
   nla_find
   nla_memcpy
   __nla_parse
-  nla_put_64bit
   nla_put
+  nla_put_64bit
   nla_put_nohdr
   nla_reserve
   nla_strscpy
@@ -1301,6 +1321,8 @@
   ns_to_timespec64
   __num_online_cpus
   nvmem_cell_read
+  nvmem_register
+  nvmem_unregister
   of_address_to_resource
   of_alias_get_id
   of_clk_add_provider
@@ -1309,6 +1331,7 @@
   of_clk_hw_onecell_get
   of_clk_set_defaults
   of_clk_src_onecell_get
+  of_count_phandle_with_args
   of_device_get_match_data
   of_device_is_available
   of_device_is_compatible
@@ -1348,6 +1371,8 @@
   of_match_node
   __of_mdiobus_register
   of_mdio_find_bus
+  of_n_addr_cells
+  of_n_size_cells
   of_parse_phandle
   of_parse_phandle_with_args
   of_phy_is_fixed_link
@@ -1359,9 +1384,11 @@
   of_property_read_string_helper
   of_property_read_u32_index
   of_property_read_u64
+  of_property_read_u64_index
   of_property_read_variable_u16_array
   of_property_read_variable_u32_array
   of_property_read_variable_u8_array
+  of_prop_next_string
   of_prop_next_u32
   of_pwm_xlate_with_flags
   of_reserved_mem_device_init_by_idx
@@ -1441,6 +1468,7 @@
   pci_unlock_rescan_remove
   pci_write_config_byte
   pci_write_config_dword
+  pcpu_nr_pages
   PDE_DATA
   __per_cpu_offset
   perf_trace_buf_alloc
@@ -1478,8 +1506,8 @@
   phylink_start
   phylink_stop
   phylink_suspend
-  phy_modify
   __phy_modify
+  phy_modify
   phy_modify_changed
   phy_modify_paged
   phy_modify_paged_changed
@@ -1563,6 +1591,7 @@
   __printk_ratelimit
   proc_create
   proc_create_data
+  proc_create_single_data
   proc_dointvec
   proc_get_parent_data
   proc_mkdir
@@ -1803,10 +1832,12 @@
   sg_alloc_table_from_pages_segment
   sg_copy_from_buffer
   sg_copy_to_buffer
+  sget_fc
   sg_free_table
   sg_init_one
   sg_init_table
   sg_nents
+  sg_nents_for_len
   sg_next
   __sg_page_iter_next
   __sg_page_iter_start
@@ -1814,6 +1845,8 @@
   sg_pcopy_to_buffer
   show_class_attr_string
   show_regs
+  si_mem_available
+  si_meminfo
   simple_attr_open
   simple_attr_read
   simple_attr_release
@@ -1993,12 +2026,14 @@
   sysctl_sched_latency
   sysfs_create_bin_file
   sysfs_create_file_ns
+  sysfs_create_files
   sysfs_create_group
   sysfs_create_link
   sysfs_emit
   __sysfs_match_string
   sysfs_remove_bin_file
   sysfs_remove_file_ns
+  sysfs_remove_files
   sysfs_remove_group
   sysfs_remove_link
   sysfs_streq
@@ -2028,6 +2063,7 @@
   time64_to_tm
   timespec64_to_jiffies
   _totalram_pages
+  touch_softlockup_watchdog
   trace_clock_local
   trace_event_buffer_commit
   trace_event_buffer_reserve
@@ -2047,9 +2083,11 @@
   __traceiter_android_rvh_place_entity
   __traceiter_android_rvh_replace_next_task_fair
   __traceiter_android_rvh_schedule
+  __traceiter_android_rvh_select_task_rq_fair
   __traceiter_android_rvh_select_task_rq_rt
   __traceiter_android_rvh_tick_entry
   __traceiter_android_vh_alloc_pages_entry
+  __traceiter_android_vh_alloc_pages_slowpath
   __traceiter_android_vh_calc_alloc_flags
   __traceiter_android_vh_cma_alloc_bypass
   __traceiter_android_vh_cma_drain_all_pages_bypass
@@ -2097,9 +2135,11 @@
   __tracepoint_android_rvh_place_entity
   __tracepoint_android_rvh_replace_next_task_fair
   __tracepoint_android_rvh_schedule
+  __tracepoint_android_rvh_select_task_rq_fair
   __tracepoint_android_rvh_select_task_rq_rt
   __tracepoint_android_rvh_tick_entry
   __tracepoint_android_vh_alloc_pages_entry
+  __tracepoint_android_vh_alloc_pages_slowpath
   __tracepoint_android_vh_calc_alloc_flags
   __tracepoint_android_vh_cma_alloc_bypass
   __tracepoint_android_vh_cma_drain_all_pages_bypass
@@ -2209,16 +2249,16 @@
   usb_autopm_get_interface
   usb_autopm_put_interface
   usb_control_msg
-  usb_create_hcd
   __usb_create_hcd
+  usb_create_hcd
   usb_debug_root
   usb_decode_ctrl
   usb_del_gadget
   usb_del_gadget_udc
   usb_deregister
   usb_deregister_dev
-  usb_disabled
   usb_disable_autosuspend
+  usb_disabled
   usb_driver_claim_interface
   usb_driver_release_interface
   usb_ep_set_maxpacket_limit
@@ -2239,9 +2279,9 @@
   usb_get_maximum_ssp_rate
   usb_get_role_switch_default_mode
   usb_hcd_check_unlink_urb
-  usb_hc_died
   usb_hcd_end_port_resume
   usb_hcd_giveback_urb
+  usb_hc_died
   usb_hcd_irq
   usb_hcd_is_primary_hcd
   usb_hcd_link_urb_to_ep
@@ -2364,6 +2404,7 @@
   __video_register_device
   video_unregister_device
   vmalloc
+  vmalloc_nr_pages
   vmalloc_to_page
   vmalloc_to_pfn
   vmalloc_user
@@ -2371,6 +2412,7 @@
   vm_event_states
   vmf_insert_pfn_prot
   vm_insert_page
+  vm_node_stat
   vm_unmap_aliases
   vm_zone_stat
   vprintk

From 3b3c1c80e8944f6439493be170b5fe2fb608818e Mon Sep 17 00:00:00 2001
From: Ben Fennema <fennema@google.com>
Date: Fri, 29 Mar 2024 12:45:37 -0700
Subject: [PATCH 36/98] ANDROID: GKI: Update the ABI symbol list

Update the pixel_watch symbol list.

Bug: 330275264
Change-Id: I843394f80d93a3f3d1a33846d1af4f189803b829
Signed-off-by: Ben Fennema <fennema@google.com>
---
 android/abi_gki_aarch64_pixel_watch | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/android/abi_gki_aarch64_pixel_watch b/android/abi_gki_aarch64_pixel_watch
index a2877e136e4c..374639c8ee5b 100644
--- a/android/abi_gki_aarch64_pixel_watch
+++ b/android/abi_gki_aarch64_pixel_watch
@@ -1133,6 +1133,7 @@
   kobject_uevent_env
   kobj_sysfs_ops
   krealloc
+  kset_create_and_add
   ksize
   ksoftirqd
   kstat
@@ -2078,6 +2079,7 @@
   sysfs_remove_link
   sysfs_remove_link_from_group
   sysfs_streq
+  sysfs_update_group
   sysrq_mask
   system_32bit_el0_cpumask
   system_freezable_wq

From 668dfb812db727a038035a2386057811f5f08fb0 Mon Sep 17 00:00:00 2001
From: Carlos Llamas <cmllamas@google.com>
Date: Sat, 30 Mar 2024 19:01:14 +0000
Subject: [PATCH 37/98] FROMLIST: binder: check offset alignment in
 binder_get_object()

Commit 6d98eb95b450 ("binder: avoid potential data leakage when copying
txn") introduced changes to how binder objects are copied. In doing so,
it unintentionally removed an offset alignment check done through calls
to binder_alloc_copy_from_buffer() -> check_buffer().

These calls were replaced in binder_get_object() with copy_from_user(),
so now an explicit offset alignment check is needed here. This avoids
later complications when unwinding the objects gets harder.

It is worth noting this check existed prior to commit 7a67a39320df
("binder: add function to copy binder object from buffer"), likely
removed due to redundancy at the time.

Fixes: 6d98eb95b450 ("binder: avoid potential data leakage when copying txn")
Cc:  <stable@vger.kernel.org>
Signed-off-by: Carlos Llamas <cmllamas@google.com>

Bug: 320661088
Link: https://lore.kernel.org/all/20240330190115.1877819-1-cmllamas@google.com/
Change-Id: Iaddabaa28de7ba7b7d35dbb639d38ca79dbc5077
Signed-off-by: Carlos Llamas <cmllamas@google.com>
---
 drivers/android/binder.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 57295e600607..29f6a1bd739c 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -1915,8 +1915,10 @@ static size_t binder_get_object(struct binder_proc *proc,
 	size_t object_size = 0;
 
 	read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset);
-	if (offset > buffer->data_size || read_size < sizeof(*hdr))
+	if (offset > buffer->data_size || read_size < sizeof(*hdr) ||
+	    !IS_ALIGNED(offset, sizeof(u32)))
 		return 0;
+
 	if (u) {
 		if (copy_from_user(object, u + offset, read_size))
 			return 0;

From 3de9177e8168658fe7a28296affc80974496a24a Mon Sep 17 00:00:00 2001
From: erinwang <erinwang2@lenovo.com>
Date: Mon, 25 Mar 2024 10:32:19 +0800
Subject: [PATCH 38/98] ANDROID: GKI: Update symbol list for lenovo

2 function symbol(s) added
  'unsigned long* devm_bitmap_zalloc(struct device*, unsigned int, gfp_t)'
  'void tracing_on()'

Bug: 331118893

Change-Id: I3baa632e1ebb7cb09d4651d656f0dcc6cda21cd3
Signed-off-by: erinwang <erinwang2@lenovo.com>
---
 android/abi_gki_aarch64.stg    | 27 +++++++++++++++++++++++++++
 android/abi_gki_aarch64_lenovo |  3 +++
 2 files changed, 30 insertions(+)

diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg
index ea57d83b30e8..7d1c73e8e26d 100644
--- a/android/abi_gki_aarch64.stg
+++ b/android/abi_gki_aarch64.stg
@@ -298157,6 +298157,13 @@ function {
   return_type_id: 0x31b5a66f
   parameter_id: 0x2668e644
 }
+function {
+  id: 0xaa7f8be4
+  return_type_id: 0x064d6086
+  parameter_id: 0x0258f96e
+  parameter_id: 0x4585663f
+  parameter_id: 0xf1a6dfed
+}
 function {
   id: 0xaa8f5c2d
   return_type_id: 0xf435685e
@@ -320168,6 +320175,15 @@ elf_symbol {
   type_id: 0x10cc1a70
   full_name: "devm_backlight_device_unregister"
 }
+elf_symbol {
+  id: 0xff3a9dde
+  name: "devm_bitmap_zalloc"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x5e53ca9c
+  type_id: 0xaa7f8be4
+  full_name: "devm_bitmap_zalloc"
+}
 elf_symbol {
   id: 0x01a0cc1d
   name: "devm_blk_crypto_profile_init"
@@ -355052,6 +355068,15 @@ elf_symbol {
   type_id: 0x10985193
   full_name: "tracing_off"
 }
+elf_symbol {
+  id: 0x6521b803
+  name: "tracing_on"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x10138352
+  type_id: 0x10985193
+  full_name: "tracing_on"
+}
 elf_symbol {
   id: 0x3f07269b
   name: "truncate_inode_pages"
@@ -364285,6 +364310,7 @@ interface {
   symbol_id: 0xa2a47944
   symbol_id: 0x97ae66e9
   symbol_id: 0x206986c6
+  symbol_id: 0xff3a9dde
   symbol_id: 0x01a0cc1d
   symbol_id: 0x32439a1e
   symbol_id: 0x70cc5ef2
@@ -368156,6 +368182,7 @@ interface {
   symbol_id: 0x3df2f359
   symbol_id: 0x33172d21
   symbol_id: 0x54bbaa46
+  symbol_id: 0x6521b803
   symbol_id: 0x3f07269b
   symbol_id: 0x3c7c6ce9
   symbol_id: 0x7a43283c
diff --git a/android/abi_gki_aarch64_lenovo b/android/abi_gki_aarch64_lenovo
index 9173bee8d6d5..7a8be12c77d4 100644
--- a/android/abi_gki_aarch64_lenovo
+++ b/android/abi_gki_aarch64_lenovo
@@ -257,6 +257,7 @@
   _dev_info
   __dev_kfree_skb_any
   devm_add_action
+  devm_bitmap_zalloc
   devm_clk_get
   devm_free_irq
   devm_fwnode_gpiod_get_index
@@ -1330,6 +1331,8 @@
   __tracepoint_android_vh_use_amu_fie
   __tracepoint_binder_transaction_received
   __tracepoint_cpu_frequency_limits
+  tracing_off
+  tracing_on
   try_module_get
   __ubsan_handle_cfi_check_fail_abort
   __udelay

From 6a45518094ac67ec227df2f823b0726f89aa7c46 Mon Sep 17 00:00:00 2001
From: "qinglin.li" <qinglin.li@amlogic.com>
Date: Wed, 3 Apr 2024 17:46:52 +0800
Subject: [PATCH 39/98] ANDROID: GKI: Update symbol list for Amlogic

1 function symbol(s) added
  'int __traceiter_android_rvh_set_sugov_update(void*, struct sugov_policy*, unsigned int, bool*)'

1 variable symbol(s) added
  'struct tracepoint __tracepoint_android_rvh_set_sugov_update'

Bug: 332649159
Change-Id: Id8efeb5dd38638e2b08a5f2bad5db6744cc0fb15
Signed-off-by: Qinglin Li <qinglin.li@amlogic.com>
---
 android/abi_gki_aarch64.stg     | 168 ++++++++++++++++++++++++++++++++
 android/abi_gki_aarch64_amlogic |   4 +
 2 files changed, 172 insertions(+)

diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg
index 7d1c73e8e26d..72278aa6623b 100644
--- a/android/abi_gki_aarch64.stg
+++ b/android/abi_gki_aarch64.stg
@@ -478,6 +478,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x2d64ae3e
 }
+pointer_reference {
+  id: 0x01ce1d56
+  kind: POINTER
+  pointee_type_id: 0x2d7893c6
+}
 pointer_reference {
   id: 0x01cecb1b
   kind: POINTER
@@ -20298,6 +20303,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x95c98491
 }
+pointer_reference {
+  id: 0x2fe3ab52
+  kind: POINTER
+  pointee_type_id: 0x95ce4bd4
+}
 pointer_reference {
   id: 0x2fe3d7ba
   kind: POINTER
@@ -52644,6 +52654,12 @@ member {
   type_id: 0xc9082b19
   offset: 64
 }
+member {
+  id: 0xd996d7d9
+  name: "cached_raw_freq"
+  type_id: 0x4585663f
+  offset: 480
+}
 member {
   id: 0xd8c78590
   name: "cached_refs"
@@ -85332,6 +85348,12 @@ member {
   type_id: 0x92233392
   offset: 1408
 }
+member {
+  id: 0x9a51f6de
+  name: "freq_update_delay_ns"
+  type_id: 0x2e0f9112
+  offset: 384
+}
 member {
   id: 0x4cc7367d
   name: "freqm"
@@ -101067,6 +101089,12 @@ member {
   type_id: 0xb95bf932
   offset: 640
 }
+member {
+  id: 0xed01f807
+  name: "irq_work"
+  type_id: 0xb95bf932
+  offset: 512
+}
 member {
   id: 0xedbbc0ae
   name: "irq_work"
@@ -104174,6 +104202,12 @@ member {
   type_id: 0xedf277ba
   offset: 9600
 }
+member {
+  id: 0x5fee299a
+  name: "last_freq_update_time"
+  type_id: 0x92233392
+  offset: 320
+}
 member {
   id: 0x280cf0ef
   name: "last_func"
@@ -106114,6 +106148,12 @@ member {
   type_id: 0x0eafcf90
   offset: 384
 }
+member {
+  id: 0xa1f988ed
+  name: "limits_changed"
+  type_id: 0x6d7f5ff6
+  offset: 1928
+}
 member {
   id: 0xdd5d93e6
   name: "line"
@@ -120936,6 +120976,12 @@ member {
   offset: 1360
   bitsize: 1
 }
+member {
+  id: 0x89ec1f61
+  name: "need_freq_update"
+  type_id: 0x6d7f5ff6
+  offset: 1936
+}
 member {
   id: 0x2ddb9f52
   name: "need_mb"
@@ -121980,6 +122026,12 @@ member {
   type_id: 0x6720d32f
   offset: 1536
 }
+member {
+  id: 0x50805cf7
+  name: "next_freq"
+  type_id: 0x4585663f
+  offset: 448
+}
 member {
   id: 0x4d6204ba
   name: "next_hash"
@@ -144133,6 +144185,12 @@ member {
   type_id: 0x2d154530
   offset: 2752
 }
+member {
+  id: 0xeb113bfe
+  name: "rate_limit_us"
+  type_id: 0x4585663f
+  offset: 1344
+}
 member {
   id: 0x4134711d
   name: "rate_list"
@@ -173533,6 +173591,12 @@ member {
   type_id: 0x1d19a9d5
   offset: 128
 }
+member {
+  id: 0xfc7d4753
+  name: "thread"
+  type_id: 0x1d19a9d5
+  offset: 1856
+}
 member {
   id: 0xfc7d4b89
   name: "thread"
@@ -177145,6 +177209,18 @@ member {
   type_id: 0x11d941b8
   offset: 384
 }
+member {
+  id: 0x14692088
+  name: "tunables"
+  type_id: 0x2fe3ab52
+  offset: 64
+}
+member {
+  id: 0x0c096620
+  name: "tunables_hook"
+  type_id: 0xd3c80119
+  offset: 128
+}
 member {
   id: 0x3203c9f3
   name: "tuner"
@@ -181378,6 +181454,12 @@ member {
   type_id: 0x0aee7ba0
   offset: 896
 }
+member {
+  id: 0x550062ea
+  name: "update_lock"
+  type_id: 0xc8b17aa7
+  offset: 256
+}
 member {
   id: 0x556f10f2
   name: "update_lock"
@@ -187998,6 +188080,12 @@ member {
   type_id: 0x3835dcc0
   offset: 9600
 }
+member {
+  id: 0xd6c16cd0
+  name: "work"
+  type_id: 0x3835dcc0
+  offset: 704
+}
 member {
   id: 0xd6c16eed
   name: "work"
@@ -188134,6 +188222,12 @@ member {
   type_id: 0x6d7f5ff6
   offset: 1480
 }
+member {
+  id: 0x98b9adc1
+  name: "work_in_progress"
+  type_id: 0x6d7f5ff6
+  offset: 1920
+}
 member {
   id: 0x44ded5a9
   name: "work_irq_change"
@@ -188170,6 +188264,12 @@ member {
   type_id: 0xd3c80119
   offset: 3072
 }
+member {
+  id: 0x4620b8c5
+  name: "work_lock"
+  type_id: 0xa7c362b0
+  offset: 1024
+}
 member {
   id: 0x46746957
   name: "work_lock"
@@ -188297,6 +188397,12 @@ member {
   type_id: 0x1d19a9d5
   offset: 704
 }
+member {
+  id: 0xff970cdd
+  name: "worker"
+  type_id: 0xf87d4486
+  offset: 1408
+}
 member {
   id: 0x3c046673
   name: "worker_done"
@@ -235490,6 +235596,40 @@ struct_union {
     member_id: 0x866b1acd
   }
 }
+struct_union {
+  id: 0x2d7893c6
+  kind: STRUCT
+  name: "sugov_policy"
+  definition {
+    bytesize: 248
+    member_id: 0x60e88f7d
+    member_id: 0x14692088
+    member_id: 0x0c096620
+    member_id: 0x550062ea
+    member_id: 0x5fee299a
+    member_id: 0x9a51f6de
+    member_id: 0x50805cf7
+    member_id: 0xd996d7d9
+    member_id: 0xed01f807
+    member_id: 0xd6c16cd0
+    member_id: 0x4620b8c5
+    member_id: 0xff970cdd
+    member_id: 0xfc7d4753
+    member_id: 0x98b9adc1
+    member_id: 0xa1f988ed
+    member_id: 0x89ec1f61
+  }
+}
+struct_union {
+  id: 0x95ce4bd4
+  kind: STRUCT
+  name: "sugov_tunables"
+  definition {
+    bytesize: 176
+    member_id: 0x2ae4a5b7
+    member_id: 0xeb113bfe
+  }
+}
 struct_union {
   id: 0xb1f94634
   kind: STRUCT
@@ -290900,6 +291040,14 @@ function {
   parameter_id: 0x15c389f6
   parameter_id: 0xd25db1d3
 }
+function {
+  id: 0x9bb9a470
+  return_type_id: 0x6720d32f
+  parameter_id: 0x18bd6530
+  parameter_id: 0x01ce1d56
+  parameter_id: 0x4585663f
+  parameter_id: 0x11cfee5a
+}
 function {
   id: 0x9bba5387
   return_type_id: 0x6720d32f
@@ -305692,6 +305840,15 @@ elf_symbol {
   type_id: 0x9b745c5a
   full_name: "__traceiter_android_rvh_set_skip_swapcache_flags"
 }
+elf_symbol {
+  id: 0xdbc1c244
+  name: "__traceiter_android_rvh_set_sugov_update"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x4b0174aa
+  type_id: 0x9bb9a470
+  full_name: "__traceiter_android_rvh_set_sugov_update"
+}
 elf_symbol {
   id: 0xc6a28b4a
   name: "__traceiter_android_rvh_set_task_cpu"
@@ -308959,6 +309116,15 @@ elf_symbol {
   type_id: 0x18ccbd2c
   full_name: "__tracepoint_android_rvh_set_skip_swapcache_flags"
 }
+elf_symbol {
+  id: 0xe665d9e6
+  name: "__tracepoint_android_rvh_set_sugov_update"
+  is_defined: true
+  symbol_type: OBJECT
+  crc: 0xfaf475a5
+  type_id: 0x18ccbd2c
+  full_name: "__tracepoint_android_rvh_set_sugov_update"
+}
 elf_symbol {
   id: 0xc5049f7c
   name: "__tracepoint_android_rvh_set_task_cpu"
@@ -362701,6 +362867,7 @@ interface {
   symbol_id: 0x9c2c2d71
   symbol_id: 0x615c3dcf
   symbol_id: 0x96033ccd
+  symbol_id: 0xdbc1c244
   symbol_id: 0xc6a28b4a
   symbol_id: 0x9b0cc890
   symbol_id: 0x559e0725
@@ -363064,6 +363231,7 @@ interface {
   symbol_id: 0xbe9f9d4f
   symbol_id: 0xde470f79
   symbol_id: 0xa88f0d7b
+  symbol_id: 0xe665d9e6
   symbol_id: 0xc5049f7c
   symbol_id: 0x42fff08e
   symbol_id: 0x74f29f73
diff --git a/android/abi_gki_aarch64_amlogic b/android/abi_gki_aarch64_amlogic
index 017df1429251..a201089a1294 100644
--- a/android/abi_gki_aarch64_amlogic
+++ b/android/abi_gki_aarch64_amlogic
@@ -199,6 +199,8 @@
   cpu_all_bits
   cpu_bit_bitmap
   cpufreq_boost_enabled
+  cpufreq_cpu_get
+  cpufreq_cpu_put
   cpufreq_generic_attr
   cpufreq_generic_frequency_table_verify
   cpufreq_generic_suspend
@@ -2085,6 +2087,7 @@
   __traceiter_android_rvh_schedule
   __traceiter_android_rvh_select_task_rq_fair
   __traceiter_android_rvh_select_task_rq_rt
+  __traceiter_android_rvh_set_sugov_update
   __traceiter_android_rvh_tick_entry
   __traceiter_android_vh_alloc_pages_entry
   __traceiter_android_vh_alloc_pages_slowpath
@@ -2137,6 +2140,7 @@
   __tracepoint_android_rvh_schedule
   __tracepoint_android_rvh_select_task_rq_fair
   __tracepoint_android_rvh_select_task_rq_rt
+  __tracepoint_android_rvh_set_sugov_update
   __tracepoint_android_rvh_tick_entry
   __tracepoint_android_vh_alloc_pages_entry
   __tracepoint_android_vh_alloc_pages_slowpath

From 948f42ca2bc502a042a545ee8a900956f60c19ec Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 10 Mar 2024 10:02:41 +0100
Subject: [PATCH 40/98] UPSTREAM: netfilter: nft_set_pipapo: release elements
 in clone only from destroy path

[ Upstream commit b0e256f3dd2ba6532f37c5c22e07cb07a36031ee ]

Clone already always provides a current view of the lookup table, use it
to destroy the set, otherwise it is possible to destroy elements twice.

This fix requires:

 212ed75dc5fb ("netfilter: nf_tables: integrate pipapo into commit protocol")

which came after:

 9827a0e6e23b ("netfilter: nft_set_pipapo: release elements in clone from abort path").

Bug: 330876672
Fixes: 9827a0e6e23b ("netfilter: nft_set_pipapo: release elements in clone from abort path")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
(cherry picked from commit ff90050771412b91e928093ccd8736ae680063c2)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: I8c0811e69f82681c7fcfdca1111f1702e27bb80e
---
 net/netfilter/nft_set_pipapo.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 4e1cc31729b8..050672ccfa7e 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -2234,8 +2234,6 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
 	if (m) {
 		rcu_barrier();
 
-		nft_set_pipapo_match_destroy(ctx, set, m);
-
 #ifdef NFT_PIPAPO_ALIGN
 		free_percpu(m->scratch_aligned);
 #endif
@@ -2250,8 +2248,7 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
 	if (priv->clone) {
 		m = priv->clone;
 
-		if (priv->dirty)
-			nft_set_pipapo_match_destroy(ctx, set, m);
+		nft_set_pipapo_match_destroy(ctx, set, m);
 
 #ifdef NFT_PIPAPO_ALIGN
 		free_percpu(priv->clone->scratch_aligned);

From cd4da4b748147ccf78714e8f5acaed36ac9490d9 Mon Sep 17 00:00:00 2001
From: Carlos Galo <carlosgalo@google.com>
Date: Mon, 8 Apr 2024 18:29:10 +0000
Subject: [PATCH 41/98] Revert "FROMGIT: mm: update mark_victim tracepoints
 fields"

This reverts commit b9e9a2c0094d43a2f05e230a3a6db3accd1ca60d.

Reason for revert: b/331214192
Signed-off-by: Carlos Galo <carlosgalo@google.com>

Change-Id: I5895d3b8a0577f7aa67a8fbab81991ced49f8eab
---
 include/trace/events/oom.h | 19 ++++---------------
 mm/oom_kill.c              |  6 +-----
 2 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 3c5941da8075..26a11e4a2c36 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -72,30 +72,19 @@ TRACE_EVENT(reclaim_retry_zone,
 );
 
 TRACE_EVENT(mark_victim,
-	TP_PROTO(struct task_struct *task, uid_t uid),
+	TP_PROTO(int pid),
 
-	TP_ARGS(task, uid),
+	TP_ARGS(pid),
 
 	TP_STRUCT__entry(
 		__field(int, pid)
-		__field(uid_t, uid)
-		__string(comm, task->comm)
-		__field(short, oom_score_adj)
 	),
 
 	TP_fast_assign(
-		__entry->pid = task->pid;
-		__entry->uid = uid;
-		__assign_str(comm, task->comm);
-		__entry->oom_score_adj = task->signal->oom_score_adj;
+		__entry->pid = pid;
 	),
 
-	TP_printk("pid=%d uid=%u comm=%s oom_score_adj=%hd",
-		__entry->pid,
-		__entry->uid,
-		__get_str(comm),
-		__entry->oom_score_adj
-	)
+	TP_printk("pid=%d", __entry->pid)
 );
 
 TRACE_EVENT(wake_reaper,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 212f5d6aca01..67946e2f50ea 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -44,7 +44,6 @@
 #include <linux/kthread.h>
 #include <linux/init.h>
 #include <linux/mmu_notifier.h>
-#include <linux/cred.h>
 
 #include <asm/tlb.h>
 #include "internal.h"
@@ -729,7 +728,6 @@ static inline void queue_oom_reaper(struct task_struct *tsk)
  */
 static void mark_oom_victim(struct task_struct *tsk)
 {
-	const struct cred *cred;
 	struct mm_struct *mm = tsk->mm;
 
 	WARN_ON(oom_killer_disabled);
@@ -751,9 +749,7 @@ static void mark_oom_victim(struct task_struct *tsk)
 	 */
 	__thaw_task(tsk);
 	atomic_inc(&oom_victims);
-	cred = get_task_cred(tsk);
-	trace_mark_victim(tsk, cred->uid.val);
-	put_cred(cred);
+	trace_mark_victim(tsk->pid);
 }
 
 /**

From 3507c287a681cab08d81e41df9618d36ed572118 Mon Sep 17 00:00:00 2001
From: Carlos Galo <carlosgalo@google.com>
Date: Fri, 23 Feb 2024 17:32:49 +0000
Subject: [PATCH 42/98] UPSTREAM: mm: update mark_victim tracepoints fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current implementation of the mark_victim tracepoint provides only the
process ID (pid) of the victim process.  This limitation poses challenges
for userspace tools requiring real-time OOM analysis and intervention.
Although this information is available from the kernel logs, it’s not
the appropriate format to provide OOM notifications.  In Android, BPF
programs are used with the mark_victim trace events to notify userspace of
an OOM kill.  For consistency, update the trace event to include the same
information about the OOMed victim as the kernel logs.

- UID
   In Android each installed application has a unique UID. Including
   the `uid` assists in correlating OOM events with specific apps.

- Process Name (comm)
   Enables identification of the affected process.

- OOM Score
  Will allow userspace to get additional insight of the relative kill
  priority of the OOM victim. In Android, the oom_score_adj is used to
  categorize app state (foreground, background, etc.), which aids in
  analyzing user-perceptible impacts of OOM events [1].

- Total VM, RSS Stats, and pgtables
  Amount of memory used by the victim that will, potentially, be freed up
  by killing it.

[1] https://cs.android.com/android/platform/superproject/main/+/246dc8fc95b6d93afcba5c6d6c133307abb3ac2e:frameworks/base/services/core/java/com/android/server/am/ProcessList.java;l=188-283
Signed-off-by: Carlos Galo <carlosgalo@google.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

Bug: 331214192
(cherry picked from commit 72ba14deb40a9e9668ec5e66a341ed657e5215c2)
Link: https://lore.kernel.org/all/20240223173258.174828-1-carlosgalo@google.com/
Change-Id: I24f503ceca04b83f8abf42fcd04a3409e17be6b5
---
 include/trace/events/oom.h | 36 ++++++++++++++++++++++++++++++++----
 mm/oom_kill.c              |  6 +++++-
 2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 26a11e4a2c36..b799f3bcba82 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -7,6 +7,8 @@
 #include <linux/tracepoint.h>
 #include <trace/events/mmflags.h>
 
+#define PG_COUNT_TO_KB(x) ((x) << (PAGE_SHIFT - 10))
+
 TRACE_EVENT(oom_score_adj_update,
 
 	TP_PROTO(struct task_struct *task),
@@ -72,19 +74,45 @@ TRACE_EVENT(reclaim_retry_zone,
 );
 
 TRACE_EVENT(mark_victim,
-	TP_PROTO(int pid),
+	TP_PROTO(struct task_struct *task, uid_t uid),
 
-	TP_ARGS(pid),
+	TP_ARGS(task, uid),
 
 	TP_STRUCT__entry(
 		__field(int, pid)
+		__string(comm, task->comm)
+		__field(unsigned long, total_vm)
+		__field(unsigned long, anon_rss)
+		__field(unsigned long, file_rss)
+		__field(unsigned long, shmem_rss)
+		__field(uid_t, uid)
+		__field(unsigned long, pgtables)
+		__field(short, oom_score_adj)
 	),
 
 	TP_fast_assign(
-		__entry->pid = pid;
+		__entry->pid = task->pid;
+		__assign_str(comm, task->comm);
+		__entry->total_vm = PG_COUNT_TO_KB(task->mm->total_vm);
+		__entry->anon_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_ANONPAGES));
+		__entry->file_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_FILEPAGES));
+		__entry->shmem_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_SHMEMPAGES));
+		__entry->uid = uid;
+		__entry->pgtables = mm_pgtables_bytes(task->mm) >> 10;
+		__entry->oom_score_adj = task->signal->oom_score_adj;
 	),
 
-	TP_printk("pid=%d", __entry->pid)
+	TP_printk("pid=%d comm=%s total-vm=%lukB anon-rss=%lukB file-rss:%lukB shmem-rss:%lukB uid=%u pgtables=%lukB oom_score_adj=%hd",
+		__entry->pid,
+		__get_str(comm),
+		__entry->total_vm,
+		__entry->anon_rss,
+		__entry->file_rss,
+		__entry->shmem_rss,
+		__entry->uid,
+		__entry->pgtables,
+		__entry->oom_score_adj
+	)
 );
 
 TRACE_EVENT(wake_reaper,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 67946e2f50ea..212f5d6aca01 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -44,6 +44,7 @@
 #include <linux/kthread.h>
 #include <linux/init.h>
 #include <linux/mmu_notifier.h>
+#include <linux/cred.h>
 
 #include <asm/tlb.h>
 #include "internal.h"
@@ -728,6 +729,7 @@ static inline void queue_oom_reaper(struct task_struct *tsk)
  */
 static void mark_oom_victim(struct task_struct *tsk)
 {
+	const struct cred *cred;
 	struct mm_struct *mm = tsk->mm;
 
 	WARN_ON(oom_killer_disabled);
@@ -749,7 +751,9 @@ static void mark_oom_victim(struct task_struct *tsk)
 	 */
 	__thaw_task(tsk);
 	atomic_inc(&oom_victims);
-	trace_mark_victim(tsk->pid);
+	cred = get_task_cred(tsk);
+	trace_mark_victim(tsk, cred->uid.val);
+	put_cred(cred);
 }
 
 /**

From 19cbe316423133e087dbe89b6d3fac2858d683e6 Mon Sep 17 00:00:00 2001
From: Roderick Colenbrander <roderick@gaikai.com>
Date: Wed, 8 Sep 2021 09:55:37 -0700
Subject: [PATCH 43/98] UPSTREAM: HID: playstation: expose DualSense lightbar
 through a multi-color LED.

The DualSense lightbar has so far been supported, but it was not yet
adjustable from user space. This patch exposes it through a multi-color
LED.

Signed-off-by: Roderick Colenbrander <roderick.colenbrander@sony.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

Bug: 260685629
(cherry picked from commit fc97b4d6a1a6d418fd4053fd7716eca746fdd163)
Change-Id: I48204113da804b13ad5bed2f651a5826ab5a86f7
Signed-off-by: Farid Chahla <farid.chahla@sony.com>
(cherry picked from commit 392b327fe02113aaaa332ca4cf06e4edb36f5566)
Signed-off-by: Lee Jones <joneslee@google.com>
---
 drivers/hid/hid-playstation.c | 72 +++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c
index 944e5e5ff134..ba502522479a 100644
--- a/drivers/hid/hid-playstation.c
+++ b/drivers/hid/hid-playstation.c
@@ -11,6 +11,8 @@
 #include <linux/hid.h>
 #include <linux/idr.h>
 #include <linux/input/mt.h>
+#include <linux/leds.h>
+#include <linux/led-class-multicolor.h>
 #include <linux/module.h>
 
 #include <asm/unaligned.h>
@@ -38,6 +40,7 @@ struct ps_device {
 	uint8_t battery_capacity;
 	int battery_status;
 
+	const char *input_dev_name; /* Name of primary input device. */
 	uint8_t mac_address[6]; /* Note: stored in little endian order. */
 	uint32_t hw_version;
 	uint32_t fw_version;
@@ -147,6 +150,7 @@ struct dualsense {
 	uint8_t motor_right;
 
 	/* RGB lightbar */
+	struct led_classdev_mc lightbar;
 	bool update_lightbar;
 	uint8_t lightbar_red;
 	uint8_t lightbar_green;
@@ -288,6 +292,8 @@ static const struct {int x; int y; } ps_gamepad_hat_mapping[] = {
 	{0, 0},
 };
 
+static void dualsense_set_lightbar(struct dualsense *ds, uint8_t red, uint8_t green, uint8_t blue);
+
 /*
  * Add a new ps_device to ps_devices if it doesn't exist.
  * Return error on duplicate device, which can happen if the same
@@ -525,6 +531,45 @@ static int ps_get_report(struct hid_device *hdev, uint8_t report_id, uint8_t *bu
 	return 0;
 }
 
+/* Register a DualSense/DualShock4 RGB lightbar represented by a multicolor LED. */
+static int ps_lightbar_register(struct ps_device *ps_dev, struct led_classdev_mc *lightbar_mc_dev,
+	int (*brightness_set)(struct led_classdev *, enum led_brightness))
+{
+	struct hid_device *hdev = ps_dev->hdev;
+	struct mc_subled *mc_led_info;
+	struct led_classdev *led_cdev;
+	int ret;
+
+	mc_led_info = devm_kmalloc_array(&hdev->dev, 3, sizeof(*mc_led_info),
+					 GFP_KERNEL | __GFP_ZERO);
+	if (!mc_led_info)
+		return -ENOMEM;
+
+	mc_led_info[0].color_index = LED_COLOR_ID_RED;
+	mc_led_info[1].color_index = LED_COLOR_ID_GREEN;
+	mc_led_info[2].color_index = LED_COLOR_ID_BLUE;
+
+	lightbar_mc_dev->subled_info = mc_led_info;
+	lightbar_mc_dev->num_colors = 3;
+
+	led_cdev = &lightbar_mc_dev->led_cdev;
+	led_cdev->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s:rgb:indicator",
+			ps_dev->input_dev_name);
+	if (!led_cdev->name)
+		return -ENOMEM;
+	led_cdev->brightness = 255;
+	led_cdev->max_brightness = 255;
+	led_cdev->brightness_set_blocking = brightness_set;
+
+	ret = devm_led_classdev_multicolor_register(&hdev->dev, lightbar_mc_dev);
+	if (ret < 0) {
+		hid_err(hdev, "Cannot register multicolor LED device\n");
+		return ret;
+	}
+
+	return 0;
+}
+
 static struct input_dev *ps_sensors_create(struct hid_device *hdev, int accel_range, int accel_res,
 		int gyro_range, int gyro_res)
 {
@@ -793,6 +838,22 @@ err_free:
 	return ret;
 }
 
+static int dualsense_lightbar_set_brightness(struct led_classdev *cdev,
+	enum led_brightness brightness)
+{
+	struct led_classdev_mc *mc_cdev = lcdev_to_mccdev(cdev);
+	struct dualsense *ds = container_of(mc_cdev, struct dualsense, lightbar);
+	uint8_t red, green, blue;
+
+	led_mc_calc_color_components(mc_cdev, brightness);
+	red = mc_cdev->subled_info[0].brightness;
+	green = mc_cdev->subled_info[1].brightness;
+	blue = mc_cdev->subled_info[2].brightness;
+
+	dualsense_set_lightbar(ds, red, green, blue);
+	return 0;
+}
+
 static void dualsense_init_output_report(struct dualsense *ds, struct dualsense_output_report *rp,
 		void *buf)
 {
@@ -1138,10 +1199,14 @@ static int dualsense_reset_leds(struct dualsense *ds)
 
 static void dualsense_set_lightbar(struct dualsense *ds, uint8_t red, uint8_t green, uint8_t blue)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&ds->base.lock, flags);
 	ds->update_lightbar = true;
 	ds->lightbar_red = red;
 	ds->lightbar_green = green;
 	ds->lightbar_blue = blue;
+	spin_unlock_irqrestore(&ds->base.lock, flags);
 
 	schedule_work(&ds->output_worker);
 }
@@ -1228,6 +1293,8 @@ static struct ps_device *dualsense_create(struct hid_device *hdev)
 		ret = PTR_ERR(ds->gamepad);
 		goto err;
 	}
+	/* Use gamepad input device name as primary device name for e.g. LEDs */
+	ps_dev->input_dev_name = dev_name(&ds->gamepad->dev);
 
 	ds->sensors = ps_sensors_create(hdev, DS_ACC_RANGE, DS_ACC_RES_PER_G,
 			DS_GYRO_RANGE, DS_GYRO_RES_PER_DEG_S);
@@ -1255,6 +1322,11 @@ static struct ps_device *dualsense_create(struct hid_device *hdev)
 	if (ret)
 		goto err;
 
+	ret = ps_lightbar_register(ps_dev, &ds->lightbar, dualsense_lightbar_set_brightness);
+	if (ret)
+		goto err;
+
+	/* Set default lightbar color. */
 	dualsense_set_lightbar(ds, 0, 0, 128); /* blue */
 
 	ret = ps_device_set_player_id(ps_dev);

From f011142fea046cdc7fef755153acd6c2672e00ca Mon Sep 17 00:00:00 2001
From: Roderick Colenbrander <roderick@gaikai.com>
Date: Wed, 8 Sep 2021 09:55:38 -0700
Subject: [PATCH 44/98] UPSTREAM: leds: add new LED_FUNCTION_PLAYER for player
 LEDs for game controllers.

Player LEDs are commonly found on game controllers from Nintendo and Sony
to indicate a player ID across a number of LEDs. For example, "Player 2"
might be indicated as "-x--" on a device with 4 LEDs where "x" means on.

This patch introduces LED_FUNCTION_PLAYER1-5 defines to properly indicate
player LEDs from the kernel. Until now there was no good standard, which
resulted in inconsistent behavior across xpad, hid-sony, hid-wiimote and
other drivers. Moving forward new drivers should use LED_FUNCTION_PLAYERx.

Note: management of Player IDs is left to user space, though a kernel
driver may pick a default value.

Signed-off-by: Roderick Colenbrander <roderick.colenbrander@sony.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

Bug: 260685629
(cherry picked from commit 61177c088a57bed259122f3c7bc6d61984936a12)
Change-Id: Ie1de4d66304bb25fc2c9fcdb1ec9b7589ad9e7ac
Signed-off-by: Farid Chahla <farid.chahla@sony.com>
(cherry picked from commit 8abc9ed234b1b10e4949720e056c294dab4552d7)
Signed-off-by: Lee Jones <joneslee@google.com>
---
 Documentation/leds/well-known-leds.txt | 14 ++++++++++++++
 include/dt-bindings/leds/common.h      |  7 +++++++
 2 files changed, 21 insertions(+)

diff --git a/Documentation/leds/well-known-leds.txt b/Documentation/leds/well-known-leds.txt
index 4a8b9dc4bf52..2160382c86be 100644
--- a/Documentation/leds/well-known-leds.txt
+++ b/Documentation/leds/well-known-leds.txt
@@ -16,6 +16,20 @@ but then try the legacy ones, too.
 
 Notice there's a list of functions in include/dt-bindings/leds/common.h .
 
+* Gamepads and joysticks
+
+Game controllers may feature LEDs to indicate a player number. This is commonly
+used on game consoles in which multiple controllers can be connected to a system.
+The "player LEDs" are then programmed with a pattern to indicate a particular
+player. For example, a game controller with 4 LEDs, may be programmed with "x---"
+to indicate player 1, "-x--" to indicate player 2 etcetera where "x" means on.
+Input drivers can utilize the LED class to expose the individual player LEDs
+of a game controller using the function "player".
+Note: tracking and management of Player IDs is the responsibility of user space,
+though drivers may pick a default value.
+
+Good: "input*:*:player-{1,2,3,4,5}
+
 * Keyboards
   
 Good: "input*:*:capslock"
diff --git a/include/dt-bindings/leds/common.h b/include/dt-bindings/leds/common.h
index 52b619d44ba2..3be89a7c20a9 100644
--- a/include/dt-bindings/leds/common.h
+++ b/include/dt-bindings/leds/common.h
@@ -60,6 +60,13 @@
 #define LED_FUNCTION_MICMUTE "micmute"
 #define LED_FUNCTION_MUTE "mute"
 
+/* Used for player LEDs as found on game controllers from e.g. Nintendo, Sony. */
+#define LED_FUNCTION_PLAYER1 "player-1"
+#define LED_FUNCTION_PLAYER2 "player-2"
+#define LED_FUNCTION_PLAYER3 "player-3"
+#define LED_FUNCTION_PLAYER4 "player-4"
+#define LED_FUNCTION_PLAYER5 "player-5"
+
 /* Miscelleaus functions. Use functions above if you can. */
 #define LED_FUNCTION_ACTIVITY "activity"
 #define LED_FUNCTION_ALARM "alarm"

From c996cb50e262d2bf558daef3774ab7bd9c80ba8e Mon Sep 17 00:00:00 2001
From: Roderick Colenbrander <roderick@gaikai.com>
Date: Wed, 8 Sep 2021 09:55:39 -0700
Subject: [PATCH 45/98] UPSTREAM: HID: playstation: expose DualSense player
 LEDs through LED class.

The DualSense player LEDs were so far not adjustable from user-space.
This patch exposes each LED individually through the LED class. Each
LED uses the new 'player' function resulting in a name like:
'inputX:white:player-1' for the first LED.

Signed-off-by: Roderick Colenbrander <roderick.colenbrander@sony.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

Bug: 260685629
(cherry picked from commit 8c0ab553b072025530308f74b2c0223ec50dffe5)
Change-Id: I49c699a99b0b8a7bb7980560e3ea7a12faf646aa
Signed-off-by: Farid Chahla <farid.chahla@sony.com>
(cherry picked from commit 1c2aceb8d7ca297ec5b485163361d40a93023347)
Signed-off-by: Lee Jones <joneslee@google.com>
---
 drivers/hid/hid-playstation.c | 85 ++++++++++++++++++++++++++++++++++-
 1 file changed, 84 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c
index ba502522479a..074e1a2f6fca 100644
--- a/drivers/hid/hid-playstation.c
+++ b/drivers/hid/hid-playstation.c
@@ -56,6 +56,13 @@ struct ps_calibration_data {
 	int sens_denom;
 };
 
+struct ps_led_info {
+	const char *name;
+	const char *color;
+	enum led_brightness (*brightness_get)(struct led_classdev *cdev);
+	int (*brightness_set)(struct led_classdev *cdev, enum led_brightness);
+};
+
 /* Seed values for DualShock4 / DualSense CRC32 for different report types. */
 #define PS_INPUT_CRC32_SEED	0xA1
 #define PS_OUTPUT_CRC32_SEED	0xA2
@@ -531,6 +538,32 @@ static int ps_get_report(struct hid_device *hdev, uint8_t report_id, uint8_t *bu
 	return 0;
 }
 
+static int ps_led_register(struct ps_device *ps_dev, struct led_classdev *led,
+		const struct ps_led_info *led_info)
+{
+	int ret;
+
+	led->name = devm_kasprintf(&ps_dev->hdev->dev, GFP_KERNEL,
+			"%s:%s:%s", ps_dev->input_dev_name, led_info->color, led_info->name);
+
+	if (!led->name)
+		return -ENOMEM;
+
+	led->brightness = 0;
+	led->max_brightness = 1;
+	led->flags = LED_CORE_SUSPENDRESUME;
+	led->brightness_get = led_info->brightness_get;
+	led->brightness_set_blocking = led_info->brightness_set;
+
+	ret = devm_led_classdev_register(&ps_dev->hdev->dev, led);
+	if (ret) {
+		hid_err(ps_dev->hdev, "Failed to register LED %s: %d\n", led_info->name, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
 /* Register a DualSense/DualShock4 RGB lightbar represented by a multicolor LED. */
 static int ps_lightbar_register(struct ps_device *ps_dev, struct led_classdev_mc *lightbar_mc_dev,
 	int (*brightness_set)(struct led_classdev *, enum led_brightness))
@@ -854,6 +887,35 @@ static int dualsense_lightbar_set_brightness(struct led_classdev *cdev,
 	return 0;
 }
 
+static enum led_brightness dualsense_player_led_get_brightness(struct led_classdev *led)
+{
+	struct hid_device *hdev = to_hid_device(led->dev->parent);
+	struct dualsense *ds = hid_get_drvdata(hdev);
+
+	return !!(ds->player_leds_state & BIT(led - ds->player_leds));
+}
+
+static int dualsense_player_led_set_brightness(struct led_classdev *led, enum led_brightness value)
+{
+	struct hid_device *hdev = to_hid_device(led->dev->parent);
+	struct dualsense *ds = hid_get_drvdata(hdev);
+	unsigned long flags;
+	unsigned int led_index;
+
+	spin_lock_irqsave(&ds->base.lock, flags);
+
+	led_index = led - ds->player_leds;
+	if (value == LED_OFF)
+		ds->player_leds_state &= ~BIT(led_index);
+	else
+		ds->player_leds_state |= BIT(led_index);
+
+	ds->update_player_leds = true;
+	spin_unlock_irqrestore(&ds->base.lock, flags);
+
+	schedule_work(&ds->output_worker);
+}
+
 static void dualsense_init_output_report(struct dualsense *ds, struct dualsense_output_report *rp,
 		void *buf)
 {
@@ -1239,7 +1301,20 @@ static struct ps_device *dualsense_create(struct hid_device *hdev)
 	struct dualsense *ds;
 	struct ps_device *ps_dev;
 	uint8_t max_output_report_size;
-	int ret;
+	int i, ret;
+
+	static const struct ps_led_info player_leds_info[] = {
+		{ LED_FUNCTION_PLAYER1, "white", dualsense_player_led_get_brightness,
+				dualsense_player_led_set_brightness },
+		{ LED_FUNCTION_PLAYER2, "white", dualsense_player_led_get_brightness,
+				dualsense_player_led_set_brightness },
+		{ LED_FUNCTION_PLAYER3, "white", dualsense_player_led_get_brightness,
+				dualsense_player_led_set_brightness },
+		{ LED_FUNCTION_PLAYER4, "white", dualsense_player_led_get_brightness,
+				dualsense_player_led_set_brightness },
+		{ LED_FUNCTION_PLAYER5, "white", dualsense_player_led_get_brightness,
+				dualsense_player_led_set_brightness }
+	};
 
 	ds = devm_kzalloc(&hdev->dev, sizeof(*ds), GFP_KERNEL);
 	if (!ds)
@@ -1329,6 +1404,14 @@ static struct ps_device *dualsense_create(struct hid_device *hdev)
 	/* Set default lightbar color. */
 	dualsense_set_lightbar(ds, 0, 0, 128); /* blue */
 
+	for (i = 0; i < ARRAY_SIZE(player_leds_info); i++) {
+		const struct ps_led_info *led_info = &player_leds_info[i];
+
+		ret = ps_led_register(ps_dev, &ds->player_leds[i], led_info);
+		if (ret < 0)
+			goto err;
+	}
+
 	ret = ps_device_set_player_id(ps_dev);
 	if (ret) {
 		hid_err(hdev, "Failed to assign player id for DualSense: %d\n", ret);

From adce8aae671e837b5ebcf5a5a431f3f00b19dfa1 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Wed, 27 Oct 2021 10:04:10 +0200
Subject: [PATCH 46/98] UPSTREAM: HID: playstation: fix return from
 dualsense_player_led_set_brightness()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

brightness_set_blocking() callback expects function returning int. This fixes
the follwoing build failure:

drivers/hid/hid-playstation.c: In function ‘dualsense_player_led_set_brightness’:
drivers/hid/hid-playstation.c:885:1: error: no return statement in function returning non-void [-Werror=return-type]
 }
 ^

Signed-off-by: Jiri Kosina <jkosina@suse.cz>

Bug: 260685629
(cherry picked from commit 3c92cb4cb60c71b574e47108ead8b6f0470850db)
Change-Id: Id16b960826a26ac22c1a14572444f9af29689ed6
Signed-off-by: Farid Chahla <farid.chahla@sony.com>
(cherry picked from commit 4281e236100d7ca198bca4e0e7e74410dc3fe751)
Signed-off-by: Lee Jones <joneslee@google.com>
---
 drivers/hid/hid-playstation.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c
index 074e1a2f6fca..ba148aa2d151 100644
--- a/drivers/hid/hid-playstation.c
+++ b/drivers/hid/hid-playstation.c
@@ -914,6 +914,8 @@ static int dualsense_player_led_set_brightness(struct led_classdev *led, enum le
 	spin_unlock_irqrestore(&ds->base.lock, flags);
 
 	schedule_work(&ds->output_worker);
+
+	return 0;
 }
 
 static void dualsense_init_output_report(struct dualsense *ds, struct dualsense_output_report *rp,

From 62085a0e6d90c0fda226d2e1de331ad244acd7b7 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 4 Aug 2022 13:30:52 +0200
Subject: [PATCH 47/98] UPSTREAM: HID: playstation: convert to use dev_groups

There is no need for a driver to individually add/create device groups,
the driver core will do it automatically for you.  Convert the
hid-playstation driver to use the dev_groups pointer instead of manually
calling the driver core to create the group and have it be cleaned up
later on by the devm core.

Cc: Roderick Colenbrander <roderick.colenbrander@sony.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: linux-input@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Roderick Colenbrander <roderick.colenbrander@sony.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

Bug: 260685629
(cherry picked from commit b4a9af9be628e4f9d09997e0bdef30f6718e88ec)
Change-Id: I516a1b0ef7f4f8545e0c1b9485b49879dd7a3136
Signed-off-by: Farid Chahla <farid.chahla@sony.com>
(cherry picked from commit 2096eced42faf94979f530ddb99cf0cef601af46)
Signed-off-by: Lee Jones <joneslee@google.com>
---
 drivers/hid/hid-playstation.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c
index ba148aa2d151..9959472e6db2 100644
--- a/drivers/hid/hid-playstation.c
+++ b/drivers/hid/hid-playstation.c
@@ -692,15 +692,12 @@ static ssize_t hardware_version_show(struct device *dev,
 
 static DEVICE_ATTR_RO(hardware_version);
 
-static struct attribute *ps_device_attributes[] = {
+static struct attribute *ps_device_attrs[] = {
 	&dev_attr_firmware_version.attr,
 	&dev_attr_hardware_version.attr,
 	NULL
 };
-
-static const struct attribute_group ps_device_attribute_group = {
-	.attrs = ps_device_attributes,
-};
+ATTRIBUTE_GROUPS(ps_device);
 
 static int dualsense_get_calibration_data(struct dualsense *ds)
 {
@@ -1481,12 +1478,6 @@ static int ps_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		}
 	}
 
-	ret = devm_device_add_group(&hdev->dev, &ps_device_attribute_group);
-	if (ret) {
-		hid_err(hdev, "Failed to register sysfs nodes.\n");
-		goto err_close;
-	}
-
 	return ret;
 
 err_close:
@@ -1522,6 +1513,9 @@ static struct hid_driver ps_driver = {
 	.probe		= ps_probe,
 	.remove		= ps_remove,
 	.raw_event	= ps_raw_event,
+	.driver = {
+		.dev_groups = ps_device_groups,
+	},
 };
 
 static int __init ps_init(void)

From e3da19b2180f079866af1aeba5db23a5e73eef58 Mon Sep 17 00:00:00 2001
From: Roderick Colenbrander <roderick@gaikai.com>
Date: Mon, 10 Oct 2022 14:23:11 -0700
Subject: [PATCH 48/98] UPSTREAM: HID: playstation: stop DualSense output work
 on remove.

Ensure we don't schedule any new output work on removal and wait
for any existing work to complete. If we don't do this e.g. rumble
work can get queued during deletion and we trigger a kernel crash.

Signed-off-by: Roderick Colenbrander <roderick.colenbrander@sony.com>
CC: stable@vger.kernel.org
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Link: https://lore.kernel.org/r/20221010212313.78275-2-roderick.colenbrander@sony.com

Bug: 260685629
(cherry picked from commit 182934a1e93b17f4edf71f4fcc8d19b19a6fe67a)
Change-Id: I40cadfde5765cdabf45def929860258d6019bf10
Signed-off-by: Farid Chahla <farid.chahla@sony.com>
(cherry picked from commit 72fd6526898fc536159dc2ee72f6aaff34183547)
Signed-off-by: Lee Jones <joneslee@google.com>
---
 drivers/hid/hid-playstation.c | 41 ++++++++++++++++++++++++++++++-----
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c
index 9959472e6db2..df9761764289 100644
--- a/drivers/hid/hid-playstation.c
+++ b/drivers/hid/hid-playstation.c
@@ -46,6 +46,7 @@ struct ps_device {
 	uint32_t fw_version;
 
 	int (*parse_report)(struct ps_device *dev, struct hid_report *report, u8 *data, int size);
+	void (*remove)(struct ps_device *dev);
 };
 
 /* Calibration data for playstation motion sensors. */
@@ -174,6 +175,7 @@ struct dualsense {
 	struct led_classdev player_leds[5];
 
 	struct work_struct output_worker;
+	bool output_worker_initialized;
 	void *output_report_dmabuf;
 	uint8_t output_seq; /* Sequence number for output report. */
 };
@@ -299,6 +301,7 @@ static const struct {int x; int y; } ps_gamepad_hat_mapping[] = {
 	{0, 0},
 };
 
+static inline void dualsense_schedule_work(struct dualsense *ds);
 static void dualsense_set_lightbar(struct dualsense *ds, uint8_t red, uint8_t green, uint8_t blue);
 
 /*
@@ -821,6 +824,7 @@ err_free:
 	return ret;
 }
 
+
 static int dualsense_get_firmware_info(struct dualsense *ds)
 {
 	uint8_t *buf;
@@ -910,7 +914,7 @@ static int dualsense_player_led_set_brightness(struct led_classdev *led, enum le
 	ds->update_player_leds = true;
 	spin_unlock_irqrestore(&ds->base.lock, flags);
 
-	schedule_work(&ds->output_worker);
+	dualsense_schedule_work(ds);
 
 	return 0;
 }
@@ -954,6 +958,16 @@ static void dualsense_init_output_report(struct dualsense *ds, struct dualsense_
 	}
 }
 
+static inline void dualsense_schedule_work(struct dualsense *ds)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ds->base.lock, flags);
+	if (ds->output_worker_initialized)
+		schedule_work(&ds->output_worker);
+	spin_unlock_irqrestore(&ds->base.lock, flags);
+}
+
 /*
  * Helper function to send DualSense output reports. Applies a CRC at the end of a report
  * for Bluetooth reports.
@@ -1114,7 +1128,7 @@ static int dualsense_parse_report(struct ps_device *ps_dev, struct hid_report *r
 		spin_unlock_irqrestore(&ps_dev->lock, flags);
 
 		/* Schedule updating of microphone state at hardware level. */
-		schedule_work(&ds->output_worker);
+		dualsense_schedule_work(ds);
 	}
 	ds->last_btn_mic_state = btn_mic_state;
 
@@ -1229,10 +1243,22 @@ static int dualsense_play_effect(struct input_dev *dev, void *data, struct ff_ef
 	ds->motor_right = effect->u.rumble.weak_magnitude / 256;
 	spin_unlock_irqrestore(&ds->base.lock, flags);
 
-	schedule_work(&ds->output_worker);
+	dualsense_schedule_work(ds);
 	return 0;
 }
 
+static void dualsense_remove(struct ps_device *ps_dev)
+{
+	struct dualsense *ds = container_of(ps_dev, struct dualsense, base);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ds->base.lock, flags);
+	ds->output_worker_initialized = false;
+	spin_unlock_irqrestore(&ds->base.lock, flags);
+
+	cancel_work_sync(&ds->output_worker);
+}
+
 static int dualsense_reset_leds(struct dualsense *ds)
 {
 	struct dualsense_output_report report;
@@ -1269,7 +1295,7 @@ static void dualsense_set_lightbar(struct dualsense *ds, uint8_t red, uint8_t gr
 	ds->lightbar_blue = blue;
 	spin_unlock_irqrestore(&ds->base.lock, flags);
 
-	schedule_work(&ds->output_worker);
+	dualsense_schedule_work(ds);
 }
 
 static void dualsense_set_player_leds(struct dualsense *ds)
@@ -1292,7 +1318,7 @@ static void dualsense_set_player_leds(struct dualsense *ds)
 
 	ds->update_player_leds = true;
 	ds->player_leds_state = player_ids[player_id];
-	schedule_work(&ds->output_worker);
+	dualsense_schedule_work(ds);
 }
 
 static struct ps_device *dualsense_create(struct hid_device *hdev)
@@ -1331,7 +1357,9 @@ static struct ps_device *dualsense_create(struct hid_device *hdev)
 	ps_dev->battery_capacity = 100; /* initial value until parse_report. */
 	ps_dev->battery_status = POWER_SUPPLY_STATUS_UNKNOWN;
 	ps_dev->parse_report = dualsense_parse_report;
+	ps_dev->remove = dualsense_remove;
 	INIT_WORK(&ds->output_worker, dualsense_output_worker);
+	ds->output_worker_initialized = true;
 	hid_set_drvdata(hdev, ds);
 
 	max_output_report_size = sizeof(struct dualsense_output_report_bt);
@@ -1494,6 +1522,9 @@ static void ps_remove(struct hid_device *hdev)
 	ps_devices_list_remove(dev);
 	ps_device_release_player_id(dev);
 
+	if (dev->remove)
+		dev->remove(dev);
+
 	hid_hw_close(hdev);
 	hid_hw_stop(hdev);
 }

From 0cf6fdfb0a991c8d3bdc474434f44e86d2934fef Mon Sep 17 00:00:00 2001
From: Roderick Colenbrander <roderick@gaikai.com>
Date: Mon, 10 Oct 2022 14:23:13 -0700
Subject: [PATCH 49/98] UPSTREAM: HID: playstation: support updated DualSense
 rumble mode.

Newer DualSense firmware supports a revised classic rumble mode,
which feels more similar to rumble as supported on previous PlayStation
controllers. It has been made the default on PlayStation and non-PlayStation
devices now (e.g. iOS and Windows). Default to this new mode when
supported.

Signed-off-by: Roderick Colenbrander <roderick.colenbrander@sony.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Link: https://lore.kernel.org/r/20221010212313.78275-4-roderick.colenbrander@sony.com

Bug: 260685629
(cherry picked from commit 9fecab247ed15e6145c126fc56ee1e89860741a7)
Change-Id: Icd330111a4d1b1e76a04cd11c623d0982ce3d66f
Signed-off-by: Farid Chahla <farid.chahla@sony.com>
(cherry picked from commit cf8edf192858c5997cae10fa2c028ee9e2a9db6b)
Signed-off-by: Lee Jones <joneslee@google.com>
---
 drivers/hid/hid-playstation.c | 37 ++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c
index df9761764289..2228f6e4ba23 100644
--- a/drivers/hid/hid-playstation.c
+++ b/drivers/hid/hid-playstation.c
@@ -108,6 +108,9 @@ struct ps_led_info {
 #define DS_STATUS_CHARGING		GENMASK(7, 4)
 #define DS_STATUS_CHARGING_SHIFT	4
 
+/* Feature version from DualSense Firmware Info report. */
+#define DS_FEATURE_VERSION(major, minor) ((major & 0xff) << 8 | (minor & 0xff))
+
 /*
  * Status of a DualSense touch point contact.
  * Contact IDs, with highest bit set are 'inactive'
@@ -126,6 +129,7 @@ struct ps_led_info {
 #define DS_OUTPUT_VALID_FLAG1_RELEASE_LEDS BIT(3)
 #define DS_OUTPUT_VALID_FLAG1_PLAYER_INDICATOR_CONTROL_ENABLE BIT(4)
 #define DS_OUTPUT_VALID_FLAG2_LIGHTBAR_SETUP_CONTROL_ENABLE BIT(1)
+#define DS_OUTPUT_VALID_FLAG2_COMPATIBLE_VIBRATION2 BIT(2)
 #define DS_OUTPUT_POWER_SAVE_CONTROL_MIC_MUTE BIT(4)
 #define DS_OUTPUT_LIGHTBAR_SETUP_LIGHT_OUT BIT(1)
 
@@ -143,6 +147,9 @@ struct dualsense {
 	struct input_dev *sensors;
 	struct input_dev *touchpad;
 
+	/* Update version is used as a feature/capability version. */
+	uint16_t update_version;
+
 	/* Calibration data for accelerometer and gyroscope. */
 	struct ps_calibration_data accel_calib_data[3];
 	struct ps_calibration_data gyro_calib_data[3];
@@ -153,6 +160,7 @@ struct dualsense {
 	uint32_t sensor_timestamp_us;
 
 	/* Compatible rumble state */
+	bool use_vibration_v2;
 	bool update_rumble;
 	uint8_t motor_left;
 	uint8_t motor_right;
@@ -844,6 +852,15 @@ static int dualsense_get_firmware_info(struct dualsense *ds)
 	ds->base.hw_version = get_unaligned_le32(&buf[24]);
 	ds->base.fw_version = get_unaligned_le32(&buf[28]);
 
+	/* Update version is some kind of feature version. It is distinct from
+	 * the firmware version as there can be many different variations of a
+	 * controller over time with the same physical shell, but with different
+	 * PCBs and other internal changes. The update version (internal name) is
+	 * used as a means to detect what features are available and change behavior.
+	 * Note: the version is different between DualSense and DualSense Edge.
+	 */
+	ds->update_version = get_unaligned_le16(&buf[44]);
+
 err_free:
 	kfree(buf);
 	return ret;
@@ -1006,7 +1023,10 @@ static void dualsense_output_worker(struct work_struct *work)
 	if (ds->update_rumble) {
 		/* Select classic rumble style haptics and enable it. */
 		common->valid_flag0 |= DS_OUTPUT_VALID_FLAG0_HAPTICS_SELECT;
-		common->valid_flag0 |= DS_OUTPUT_VALID_FLAG0_COMPATIBLE_VIBRATION;
+		if (ds->use_vibration_v2)
+			common->valid_flag2 |= DS_OUTPUT_VALID_FLAG2_COMPATIBLE_VIBRATION2;
+		else
+			common->valid_flag0 |= DS_OUTPUT_VALID_FLAG0_COMPATIBLE_VIBRATION;
 		common->motor_left = ds->motor_left;
 		common->motor_right = ds->motor_right;
 		ds->update_rumble = false;
@@ -1380,6 +1400,21 @@ static struct ps_device *dualsense_create(struct hid_device *hdev)
 		return ERR_PTR(ret);
 	}
 
+	/* Original DualSense firmware simulated classic controller rumble through
+	 * its new haptics hardware. It felt different from classic rumble users
+	 * were used to. Since then new firmwares were introduced to change behavior
+	 * and make this new 'v2' behavior default on PlayStation and other platforms.
+	 * The original DualSense requires a new enough firmware as bundled with PS5
+	 * software released in 2021. DualSense edge supports it out of the box.
+	 * Both devices also support the old mode, but it is not really used.
+	 */
+	if (hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER) {
+		/* Feature version 2.21 introduced new vibration method. */
+		ds->use_vibration_v2 = ds->update_version >= DS_FEATURE_VERSION(2, 21);
+	} else if (hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER_2) {
+		ds->use_vibration_v2 = true;
+	}
+
 	ret = ps_devices_list_add(ps_dev);
 	if (ret)
 		return ERR_PTR(ret);

From a5d03f57d6c26bc7b5f23c264fd26f43b6016332 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 18 Jan 2024 10:56:26 +0100
Subject: [PATCH 50/98] UPSTREAM: netfilter: nft_chain_filter: handle
 NETDEV_UNREGISTER for inet/ingress basechain

commit 01acb2e8666a6529697141a6017edbf206921913 upstream.

Remove netdevice from inet/ingress basechain in case NETDEV_UNREGISTER
event is reported, otherwise a stale reference to netdevice remains in
the hook list.

Bug: 332803585
Fixes: 60a3815da702 ("netfilter: add inet ingress support")
Cc: stable@vger.kernel.org
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 70f17b48c86622217a58d5099d29242fc9adac58)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: I28482dca416b61dcf2e722ba0aef62d2d41a8f23
---
 net/netfilter/nft_chain_filter.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 5b02408a920b..35aa4ea94205 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -355,9 +355,10 @@ static int nf_tables_netdev_event(struct notifier_block *this,
 				  unsigned long event, void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct nft_base_chain *basechain;
 	struct nftables_pernet *nft_net;
-	struct nft_table *table;
 	struct nft_chain *chain, *nr;
+	struct nft_table *table;
 	struct nft_ctx ctx = {
 		.net	= dev_net(dev),
 	};
@@ -369,7 +370,8 @@ static int nf_tables_netdev_event(struct notifier_block *this,
 	nft_net = nft_pernet(ctx.net);
 	mutex_lock(&nft_net->commit_mutex);
 	list_for_each_entry(table, &nft_net->tables, list) {
-		if (table->family != NFPROTO_NETDEV)
+		if (table->family != NFPROTO_NETDEV &&
+		    table->family != NFPROTO_INET)
 			continue;
 
 		ctx.family = table->family;
@@ -378,6 +380,11 @@ static int nf_tables_netdev_event(struct notifier_block *this,
 			if (!nft_is_base_chain(chain))
 				continue;
 
+			basechain = nft_base_chain(chain);
+			if (table->family == NFPROTO_INET &&
+			    basechain->ops.hooknum != NF_INET_INGRESS)
+				continue;
+
 			ctx.chain = chain;
 			nft_netdev_event(event, dev, &ctx);
 		}

From f395ea0980ef1c07569d532a84e5f6d13ebc19bf Mon Sep 17 00:00:00 2001
From: "yenchia.chen" <yenchia.chen@mediatek.com>
Date: Mon, 8 Apr 2024 20:21:17 +0800
Subject: [PATCH 51/98] ANDROID: GKI: update mtktv symbol

8 function symbol(s) added
  'int tty_termios_hw_change(const struct ktermios*, const struct ktermios*)'
  'void usb_serial_deregister_drivers(struct usb_serial_driver* const*)'
  'void usb_serial_generic_close(struct usb_serial_port*)'
  'int usb_serial_generic_get_icount(struct tty_struct*, struct serial_icounter_struct*)'
  'int usb_serial_generic_open(struct tty_struct*, struct usb_serial_port*)'
  'void usb_serial_generic_throttle(struct tty_struct*)'
  'void usb_serial_generic_unthrottle(struct tty_struct*)'
  'int usb_serial_register_drivers(struct usb_serial_driver* const*, const char*, const struct usb_device_id*)'

Bug: 333350374
Change-Id: Ie1ea35a1c6795adef7d5fd65f9fc29f855d683bb
Signed-off-by: yenchia.chen <yenchia.chen@mediatek.com>
---
 android/abi_gki_aarch64.stg   | 1159 +++++++++++++++++++++++++++++++++
 android/abi_gki_aarch64_mtktv |   37 ++
 2 files changed, 1196 insertions(+)

diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg
index 72278aa6623b..a1c1af173972 100644
--- a/android/abi_gki_aarch64.stg
+++ b/android/abi_gki_aarch64.stg
@@ -3178,6 +3178,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x1b445821
 }
+pointer_reference {
+  id: 0x0c436bab
+  kind: POINTER
+  pointee_type_id: 0x1b4d4832
+}
 pointer_reference {
   id: 0x0c43d124
   kind: POINTER
@@ -3423,6 +3428,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x1b939067
 }
+pointer_reference {
+  id: 0x0c780bd8
+  kind: POINTER
+  pointee_type_id: 0x1ba0c9ff
+}
 pointer_reference {
   id: 0x0c786e08
   kind: POINTER
@@ -3518,6 +3528,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x181b0acd
 }
+pointer_reference {
+  id: 0x0c975eef
+  kind: POINTER
+  pointee_type_id: 0x181d9d22
+}
 pointer_reference {
   id: 0x0c97f018
   kind: POINTER
@@ -3558,6 +3573,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x18d41dd9
 }
+pointer_reference {
+  id: 0x0ca5cbd8
+  kind: POINTER
+  pointee_type_id: 0x18d7c9fc
+}
 pointer_reference {
   id: 0x0ca62e19
   kind: POINTER
@@ -3818,6 +3838,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x19fec76b
 }
+pointer_reference {
+  id: 0x0cf07e3c
+  kind: POINTER
+  pointee_type_id: 0x19811e6e
+}
 pointer_reference {
   id: 0x0cf0f1be
   kind: POINTER
@@ -6573,6 +6598,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x1738d1f7
 }
+pointer_reference {
+  id: 0x0f5e5d84
+  kind: POINTER
+  pointee_type_id: 0x1739908d
+}
 pointer_reference {
   id: 0x0f5f18a4
   kind: POINTER
@@ -10058,6 +10088,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x50573700
 }
+pointer_reference {
+  id: 0x1e870478
+  kind: POINTER
+  pointee_type_id: 0x505cf77f
+}
 pointer_reference {
   id: 0x1e881fcb
   kind: POINTER
@@ -10758,6 +10793,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0xa2172324
 }
+pointer_reference {
+  id: 0x221732c5
+  kind: POINTER
+  pointee_type_id: 0xa21c2d88
+}
 pointer_reference {
   id: 0x22198273
   kind: POINTER
@@ -12703,6 +12743,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x9a20634b
 }
+pointer_reference {
+  id: 0x2c18712b
+  kind: POINTER
+  pointee_type_id: 0x9a212231
+}
 pointer_reference {
   id: 0x2c18d6ee
   kind: POINTER
@@ -12758,6 +12803,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x9a391cba
 }
+pointer_reference {
+  id: 0x2c207521
+  kind: POINTER
+  pointee_type_id: 0x9ac13218
+}
 pointer_reference {
   id: 0x2c209d56
   kind: POINTER
@@ -12808,6 +12858,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x9ae52b81
 }
+pointer_reference {
+  id: 0x2c2aca57
+  kind: POINTER
+  pointee_type_id: 0x9aebcfc2
+}
 pointer_reference {
   id: 0x2c2bf57a
   kind: POINTER
@@ -14828,6 +14883,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x99e0f6e0
 }
+pointer_reference {
+  id: 0x2ce866aa
+  kind: POINTER
+  pointee_type_id: 0x99e17c37
+}
 pointer_reference {
   id: 0x2ce9f40c
   kind: POINTER
@@ -19413,6 +19473,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x96ef964a
 }
+pointer_reference {
+  id: 0x2f2c9c0e
+  kind: POINTER
+  pointee_type_id: 0x96f296a4
+}
 pointer_reference {
   id: 0x2f30a05a
   kind: POINTER
@@ -19488,6 +19553,16 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x96b5469b
 }
+pointer_reference {
+  id: 0x2f3dfe58
+  kind: POINTER
+  pointee_type_id: 0x96b71ffd
+}
+pointer_reference {
+  id: 0x2f3e1dbd
+  kind: POINTER
+  pointee_type_id: 0x96b8906b
+}
 pointer_reference {
   id: 0x2f3e5017
   kind: POINTER
@@ -20198,6 +20273,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x9559376c
 }
+pointer_reference {
+  id: 0x2fc68c73
+  kind: POINTER
+  pointee_type_id: 0x955ad750
+}
 pointer_reference {
   id: 0x2fc7c937
   kind: POINTER
@@ -20233,6 +20313,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x9502dd09
 }
+pointer_reference {
+  id: 0x2fd17240
+  kind: POINTER
+  pointee_type_id: 0x95052f9e
+}
 pointer_reference {
   id: 0x2fd46ff4
   kind: POINTER
@@ -20573,6 +20658,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0xeb0f6de6
 }
+pointer_reference {
+  id: 0x3054f2d7
+  kind: POINTER
+  pointee_type_id: 0xeb132dc2
+}
 pointer_reference {
   id: 0x3058262d
   kind: POINTER
@@ -21468,6 +21558,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0xe095cad8
 }
+pointer_reference {
+  id: 0x32bb7cf5
+  kind: POINTER
+  pointee_type_id: 0xe0ad154a
+}
 pointer_reference {
   id: 0x32bd639f
   kind: POINTER
@@ -22288,6 +22383,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0xfba05a49
 }
+pointer_reference {
+  id: 0x347de8b1
+  kind: POINTER
+  pointee_type_id: 0xfbb74458
+}
 pointer_reference {
   id: 0x3481766c
   kind: POINTER
@@ -23403,6 +23503,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0xf620b983
 }
+pointer_reference {
+  id: 0x37185c4a
+  kind: POINTER
+  pointee_type_id: 0xf62197b5
+}
 pointer_reference {
   id: 0x3719a0ef
   kind: POINTER
@@ -26408,6 +26513,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0xd1740ca6
 }
+pointer_reference {
+  id: 0x3ed39c44
+  kind: POINTER
+  pointee_type_id: 0xd10e978e
+}
 pointer_reference {
   id: 0x3ed52bb5
   kind: POINTER
@@ -29583,6 +29693,11 @@ qualified {
   qualifier: CONST
   qualified_type_id: 0x33341885
 }
+qualified {
+  id: 0xd10e978e
+  qualifier: CONST
+  qualified_type_id: 0x347de8b1
+}
 qualified {
   id: 0xd12be7af
   qualifier: CONST
@@ -30913,6 +31028,11 @@ qualified {
   qualifier: CONST
   qualified_type_id: 0xdbbcb810
 }
+qualified {
+  id: 0xeb132dc2
+  qualifier: CONST
+  qualified_type_id: 0xdc0b0182
+}
 qualified {
   id: 0xeb2226c1
   qualifier: CONST
@@ -32681,6 +32801,11 @@ array {
   number_of_elements: 16
   element_type_id: 0x21069feb
 }
+array {
+  id: 0x44b60e20
+  number_of_elements: 16
+  element_type_id: 0x221732c5
+}
 array {
   id: 0x44b8b776
   number_of_elements: 2
@@ -32816,6 +32941,11 @@ array {
   number_of_elements: 16
   element_type_id: 0x094c30e0
 }
+array {
+  id: 0x4f77c35e
+  number_of_elements: 16
+  element_type_id: 0x0d10073d
+}
 array {
   id: 0x4f935f0c
   number_of_elements: 32
@@ -33251,6 +33381,11 @@ array {
   number_of_elements: 2
   element_type_id: 0x1e5f9cbf
 }
+array {
+  id: 0x6bd55bd0
+  number_of_elements: 2
+  element_type_id: 0x1df06cce
+}
 array {
   id: 0x6bdac314
   number_of_elements: 2
@@ -33266,6 +33401,11 @@ array {
   number_of_elements: 2
   element_type_id: 0x007e8ce4
 }
+array {
+  id: 0x6ce54884
+  number_of_elements: 2
+  element_type_id: 0x0130219f
+}
 array {
   id: 0x6d099744
   number_of_elements: 2
@@ -45511,6 +45651,12 @@ member {
   type_id: 0x2de1dbe2
   offset: 64
 }
+member {
+  id: 0x9619b60e
+  name: "attach"
+  type_id: 0x2c18712b
+  offset: 2176
+}
 member {
   id: 0x961a0bf7
   name: "attach"
@@ -45678,6 +45824,13 @@ member {
   type_id: 0x6d7f5ff6
   offset: 2016
 }
+member {
+  id: 0x57f5e518
+  name: "attached"
+  type_id: 0x5d8155a5
+  offset: 289
+  bitsize: 1
+}
 member {
   id: 0x95a2ac4d
   name: "attached_dev"
@@ -50186,6 +50339,12 @@ member {
   type_id: 0xc9082b19
   offset: 576
 }
+member {
+  id: 0xfd552aa2
+  name: "break_ctl"
+  type_id: 0x0c3ee516
+  offset: 3264
+}
 member {
   id: 0xfd57c225
   name: "break_ctl"
@@ -51238,12 +51397,30 @@ member {
   offset: 354
   bitsize: 1
 }
+member {
+  id: 0x98845503
+  name: "bulk_in"
+  type_id: 0x4f77c35e
+  offset: 64
+}
 member {
   id: 0x98c50225
   name: "bulk_in"
   type_id: 0x0e2680c2
   offset: 1984
 }
+member {
+  id: 0x80e885f4
+  name: "bulk_in_buffer"
+  type_id: 0x1df06cce
+  offset: 4032
+}
+member {
+  id: 0xfd487f77
+  name: "bulk_in_buffers"
+  type_id: 0x6bd55bd0
+  offset: 4288
+}
 member {
   id: 0x0bed83d9
   name: "bulk_in_enabled"
@@ -51251,12 +51428,48 @@ member {
   offset: 1872
   bitsize: 1
 }
+member {
+  id: 0x1926cd5d
+  name: "bulk_in_endpointAddress"
+  type_id: 0xb3e7bac9
+  offset: 4224
+}
+member {
+  id: 0x7e50fd73
+  name: "bulk_in_size"
+  type_id: 0x6720d32f
+  offset: 4096
+}
+member {
+  id: 0x7ec3e667
+  name: "bulk_in_size"
+  type_id: 0xf435685e
+  offset: 1984
+}
 member {
   id: 0x223191aa
   name: "bulk_out"
   type_id: 0x0e2680c2
   offset: 2048
 }
+member {
+  id: 0x2270ca1d
+  name: "bulk_out"
+  type_id: 0x4f77c35e
+  offset: 1088
+}
+member {
+  id: 0x6fbe0621
+  name: "bulk_out_buffer"
+  type_id: 0x1df06cce
+  offset: 4608
+}
+member {
+  id: 0x2b1788a0
+  name: "bulk_out_buffers"
+  type_id: 0x6bd55bd0
+  offset: 4992
+}
 member {
   id: 0x1c3ca9e4
   name: "bulk_out_enabled"
@@ -51264,6 +51477,12 @@ member {
   offset: 1873
   bitsize: 1
 }
+member {
+  id: 0xa1532b64
+  name: "bulk_out_endpointAddress"
+  type_id: 0xb3e7bac9
+  offset: 5312
+}
 member {
   id: 0xc866f0d4
   name: "bulk_out_intended_length"
@@ -51276,6 +51495,18 @@ member {
   type_id: 0x4585663f
   offset: 2880
 }
+member {
+  id: 0x6529283d
+  name: "bulk_out_size"
+  type_id: 0x6720d32f
+  offset: 4672
+}
+member {
+  id: 0x65ba3b43
+  name: "bulk_out_size"
+  type_id: 0xf435685e
+  offset: 2048
+}
 member {
   id: 0x460505fb
   name: "burst"
@@ -52731,6 +52962,12 @@ member {
   type_id: 0x33756485
   offset: 27200
 }
+member {
+  id: 0x7e1dc95a
+  name: "calc_num_ports"
+  type_id: 0x2c2aca57
+  offset: 2240
+}
 member {
   id: 0x8d929646
   name: "calc_sets"
@@ -53777,6 +54014,12 @@ member {
   type_id: 0x4585663f
   offset: 128
 }
+member {
+  id: 0x59488b26
+  name: "carrier_raised"
+  type_id: 0x2fd17240
+  offset: 3968
+}
 member {
   id: 0x594b0e58
   name: "carrier_raised"
@@ -55074,6 +55317,12 @@ member {
   type_id: 0x0483e6f8
   offset: 9664
 }
+member {
+  id: 0xd08d5830
+  name: "chars_in_buffer"
+  type_id: 0x3bc90e1f
+  offset: 3328
+}
 member {
   id: 0xd08d5d4d
   name: "chars_in_buffer"
@@ -56937,6 +57186,12 @@ member {
   type_id: 0x0d22c400
   offset: 128
 }
+member {
+  id: 0xcd7f6f7d
+  name: "close"
+  type_id: 0x0c975eef
+  offset: 2816
+}
 member {
   id: 0xcd7fa645
   name: "close"
@@ -67666,6 +67921,12 @@ member {
   type_id: 0x23230326
   offset: 320
 }
+member {
+  id: 0xce1ac7ea
+  name: "dev"
+  type_id: 0x23230326
+  offset: 6272
+}
 member {
   id: 0xce1ac83c
   name: "dev"
@@ -67798,6 +68059,11 @@ member {
   type_id: 0x0d7ce7cc
   offset: 64
 }
+member {
+  id: 0xce349ead
+  name: "dev"
+  type_id: 0x0d7ce7cc
+}
 member {
   id: 0xce3785c0
   name: "dev"
@@ -70140,6 +70406,12 @@ member {
   type_id: 0x18bd6530
   offset: 5568
 }
+member {
+  id: 0xdc0f3007
+  name: "disc_mutex"
+  type_id: 0xa7c362b0
+  offset: 1472
+}
 member {
   id: 0x8c3076ff
   name: "discard"
@@ -70188,6 +70460,12 @@ member {
   type_id: 0x0f3dfb90
   offset: 1408
 }
+member {
+  id: 0x8da4e999
+  name: "disconnect"
+  type_id: 0x0f5e5d84
+  offset: 2304
+}
 member {
   id: 0x8da522b5
   name: "disconnect"
@@ -70267,6 +70545,13 @@ member {
   type_id: 0x1f3c8679
   offset: 1280
 }
+member {
+  id: 0xd1a76d30
+  name: "disconnected"
+  type_id: 0x5d8155a5
+  offset: 288
+  bitsize: 1
+}
 member {
   id: 0x77b2de2b
   name: "discov_interleaved_timeout"
@@ -72260,6 +72545,12 @@ member {
   type_id: 0x6e3b7d7f
   offset: 50752
 }
+member {
+  id: 0xee864cc4
+  name: "driver_list"
+  type_id: 0xd3c80119
+  offset: 128
+}
 member {
   id: 0xb3c8d0ce
   name: "driver_max_VFs"
@@ -73056,6 +73347,12 @@ member {
   type_id: 0x0d14f575
   offset: 512
 }
+member {
+  id: 0xc9730947
+  name: "dtr_rts"
+  type_id: 0x0cf07e3c
+  offset: 3904
+}
 member {
   id: 0x8c9f51a9
   name: "dual_link"
@@ -73562,6 +73859,12 @@ member {
   type_id: 0x3077bd6f
   offset: 2368
 }
+member {
+  id: 0x1b86a2bd
+  name: "dynids"
+  type_id: 0x91a9600d
+  offset: 1728
+}
 member {
   id: 0x1b86a7a5
   name: "dynids"
@@ -82452,6 +82755,12 @@ member {
   type_id: 0x33756485
   offset: 4032
 }
+member {
+  id: 0x2d5bf7e0
+  name: "flags"
+  type_id: 0x33756485
+  offset: 5760
+}
 member {
   id: 0x2d5bf7f4
   name: "flags"
@@ -87533,6 +87842,12 @@ member {
   name: "get_hwirq"
   type_id: 0x30934160
 }
+member {
+  id: 0x34f4a74e
+  name: "get_icount"
+  type_id: 0x2f1a6bce
+  offset: 3840
+}
 member {
   id: 0x34f4a83a
   name: "get_icount"
@@ -88200,6 +88515,12 @@ member {
   type_id: 0x2f054704
   offset: 1856
 }
+member {
+  id: 0x956eb42c
+  name: "get_serial"
+  type_id: 0x0c436bab
+  offset: 3072
+}
 member {
   id: 0x545ea760
   name: "get_sg_table"
@@ -94227,6 +94548,12 @@ member {
   type_id: 0x02900546
   offset: 192
 }
+member {
+  id: 0x083df5c3
+  name: "icount"
+  type_id: 0x63b8b563
+  offset: 5344
+}
 member {
   id: 0x0843960a
   name: "icount"
@@ -95217,6 +95544,12 @@ member {
   type_id: 0x3b2ca4e8
   offset: 64
 }
+member {
+  id: 0xc4fbc795
+  name: "id_table"
+  type_id: 0x38040a6c
+  offset: 64
+}
 member {
   id: 0xc4fbca06
   name: "id_table"
@@ -97844,6 +98177,12 @@ member {
   type_id: 0x3a47ea7a
   offset: 128
 }
+member {
+  id: 0xe7104099
+  name: "init_termios"
+  type_id: 0x0c59c5c5
+  offset: 4032
+}
 member {
   id: 0xe7c0151f
   name: "init_termios"
@@ -98682,6 +99021,12 @@ member {
   type_id: 0x442bf459
   offset: 768
 }
+member {
+  id: 0x1e592a48
+  name: "interface"
+  type_id: 0x21069feb
+  offset: 128
+}
 member {
   id: 0x1e64a125
   name: "interface"
@@ -98859,6 +99204,60 @@ member {
   type_id: 0x6d7f5ff6
   offset: 72
 }
+member {
+  id: 0xb25f3460
+  name: "interrupt_in"
+  type_id: 0x4f77c35e
+  offset: 2112
+}
+member {
+  id: 0xff6c376e
+  name: "interrupt_in_buffer"
+  type_id: 0x1df06cce
+  offset: 3584
+}
+member {
+  id: 0x85b71e23
+  name: "interrupt_in_endpointAddress"
+  type_id: 0xb3e7bac9
+  offset: 3712
+}
+member {
+  id: 0xa213a93e
+  name: "interrupt_in_urb"
+  type_id: 0x0130219f
+  offset: 3648
+}
+member {
+  id: 0x88026c22
+  name: "interrupt_out"
+  type_id: 0x4f77c35e
+  offset: 3136
+}
+member {
+  id: 0xf35c5c33
+  name: "interrupt_out_buffer"
+  type_id: 0x1df06cce
+  offset: 3776
+}
+member {
+  id: 0xda583590
+  name: "interrupt_out_endpointAddress"
+  type_id: 0xb3e7bac9
+  offset: 3968
+}
+member {
+  id: 0xa83090b1
+  name: "interrupt_out_size"
+  type_id: 0x6720d32f
+  offset: 3840
+}
+member {
+  id: 0x5e3b6037
+  name: "interrupt_out_urb"
+  type_id: 0x0130219f
+  offset: 3904
+}
 member {
   id: 0x05278e35
   name: "interrupt_pin"
@@ -99573,6 +99972,12 @@ member {
   type_id: 0x2e1b56db
   offset: 384
 }
+member {
+  id: 0x4d4b01d0
+  name: "ioctl"
+  type_id: 0x2f595b5a
+  offset: 3008
+}
 member {
   id: 0x4d4b0f76
   name: "ioctl"
@@ -103539,6 +103944,12 @@ member {
   name: "kref"
   type_id: 0x6f1daf87
 }
+member {
+  id: 0x02ce5b75
+  name: "kref"
+  type_id: 0x6f1daf87
+  offset: 1408
+}
 member {
   id: 0x02ce5d67
   name: "kref"
@@ -107637,6 +108048,12 @@ member {
   type_id: 0xf313e71a
   offset: 896
 }
+member {
+  id: 0x2d1fe6cf
+  name: "lock"
+  type_id: 0xf313e71a
+  offset: 3456
+}
 member {
   id: 0x2d1fe798
   name: "lock"
@@ -115766,6 +116183,12 @@ member {
   type_id: 0xc9082b19
   offset: 32
 }
+member {
+  id: 0xc8ee0756
+  name: "minor"
+  type_id: 0xc9082b19
+  offset: 3488
+}
 member {
   id: 0xc8ee0a2c
   name: "minor"
@@ -115814,6 +116237,13 @@ member {
   type_id: 0x6720d32f
   offset: 64
 }
+member {
+  id: 0x4d99c9ee
+  name: "minors_reserved"
+  type_id: 0x5d8155a5
+  offset: 290
+  bitsize: 1
+}
 member {
   id: 0x096ecb61
   name: "minutes"
@@ -125598,6 +126028,41 @@ member {
   type_id: 0x4585663f
   offset: 5376
 }
+member {
+  id: 0x4cfd40f2
+  name: "num_bulk_in"
+  type_id: 0x5d8155a5
+  offset: 1928
+}
+member {
+  id: 0x4cfd4941
+  name: "num_bulk_in"
+  type_id: 0x5d8155a5
+  offset: 328
+}
+member {
+  id: 0x4cfd4a18
+  name: "num_bulk_in"
+  type_id: 0x5d8155a5
+}
+member {
+  id: 0xd78f59f9
+  name: "num_bulk_out"
+  type_id: 0x5d8155a5
+  offset: 1936
+}
+member {
+  id: 0xd78f5a1c
+  name: "num_bulk_out"
+  type_id: 0x5d8155a5
+  offset: 336
+}
+member {
+  id: 0xd78f5f05
+  name: "num_bulk_out"
+  type_id: 0x5d8155a5
+  offset: 8
+}
 member {
   id: 0x32b760e4
   name: "num_bus_formats"
@@ -126049,6 +126514,42 @@ member {
   type_id: 0x6720d32f
   offset: 2048
 }
+member {
+  id: 0x1968f7ca
+  name: "num_interrupt_in"
+  type_id: 0x5d8155a5
+  offset: 312
+}
+member {
+  id: 0x1968f921
+  name: "num_interrupt_in"
+  type_id: 0x5d8155a5
+  offset: 16
+}
+member {
+  id: 0x1968fceb
+  name: "num_interrupt_in"
+  type_id: 0x5d8155a5
+  offset: 1944
+}
+member {
+  id: 0xc578d426
+  name: "num_interrupt_out"
+  type_id: 0x5d8155a5
+  offset: 24
+}
+member {
+  id: 0xc578d580
+  name: "num_interrupt_out"
+  type_id: 0x5d8155a5
+  offset: 320
+}
+member {
+  id: 0xc578dab3
+  name: "num_interrupt_out"
+  type_id: 0x5d8155a5
+  offset: 1952
+}
 member {
   id: 0xdf32959c
   name: "num_ioctls"
@@ -126346,12 +126847,30 @@ member {
   type_id: 0x4585663f
   offset: 58624
 }
+member {
+  id: 0x176ff071
+  name: "num_port_pointers"
+  type_id: 0x5d8155a5
+  offset: 304
+}
 member {
   id: 0x0f014be3
   name: "num_ports"
   type_id: 0x4585663f
   offset: 64
 }
+member {
+  id: 0x0f194513
+  name: "num_ports"
+  type_id: 0x5d8155a5
+  offset: 296
+}
+member {
+  id: 0x0f194aef
+  name: "num_ports"
+  type_id: 0x5d8155a5
+  offset: 1920
+}
 member {
   id: 0x0f23ee6e
   name: "num_ports"
@@ -128776,6 +129295,12 @@ member {
   type_id: 0x2f1fe96a
   offset: 128
 }
+member {
+  id: 0xad9bbf06
+  name: "open"
+  type_id: 0x2f3dfe58
+  offset: 2752
+}
 member {
   id: 0xadb0bada
   name: "open"
@@ -130539,6 +131064,12 @@ member {
   type_id: 0xe62ebf07
   offset: 256
 }
+member {
+  id: 0x455a2c83
+  name: "overrun"
+  type_id: 0xe62ebf07
+  offset: 256
+}
 member {
   id: 0x455a2f0c
   name: "overrun"
@@ -132605,6 +133136,12 @@ member {
   type_id: 0xe62ebf07
   offset: 256
 }
+member {
+  id: 0xd6e7fef7
+  name: "parity"
+  type_id: 0xe62ebf07
+  offset: 224
+}
 member {
   id: 0xc70bed2a
   name: "park"
@@ -137396,6 +137933,12 @@ member {
   name: "port"
   type_id: 0x4201a01e
 }
+member {
+  id: 0x48b91cec
+  name: "port"
+  type_id: 0x4201a01e
+  offset: 64
+}
 member {
   id: 0x48be982a
   name: "port"
@@ -137407,6 +137950,12 @@ member {
   name: "port"
   type_id: 0x4585663f
 }
+member {
+  id: 0x48bfae31
+  name: "port"
+  type_id: 0x44b60e20
+  offset: 384
+}
 member {
   id: 0x48cc7707
   name: "port"
@@ -137622,6 +138171,12 @@ member {
   type_id: 0x295c7202
   offset: 3520
 }
+member {
+  id: 0xdc31667a
+  name: "port_number"
+  type_id: 0x295c7202
+  offset: 3520
+}
 member {
   id: 0xdcd13b2b
   name: "port_number"
@@ -137651,12 +138206,24 @@ member {
   type_id: 0x0baa70a7
   offset: 8224
 }
+member {
+  id: 0x66c5c2ea
+  name: "port_probe"
+  type_id: 0x2fd17240
+  offset: 2432
+}
 member {
   id: 0x687fd28c
   name: "port_remote_wakeup"
   type_id: 0xc9082b19
   offset: 192
 }
+member {
+  id: 0xe4114db1
+  name: "port_remove"
+  type_id: 0x0c975eef
+  offset: 2496
+}
 member {
   id: 0xa22ff2af
   name: "port_split"
@@ -139111,6 +139678,12 @@ member {
   type_id: 0x0f7ac5c1
   offset: 128
 }
+member {
+  id: 0x934f43bd
+  name: "prepare_write_buffer"
+  type_id: 0x2fc68c73
+  offset: 4416
+}
 member {
   id: 0x9ef237da
   name: "prepare_writeback_job"
@@ -140111,6 +140684,12 @@ member {
   type_id: 0x18bd6530
   offset: 7296
 }
+member {
+  id: 0x91796dae
+  name: "private"
+  type_id: 0x18bd6530
+  offset: 1856
+}
 member {
   id: 0x91796e7a
   name: "private"
@@ -140516,6 +141095,12 @@ member {
   type_id: 0x2c27cb1a
   offset: 64
 }
+member {
+  id: 0xd77a2d00
+  name: "probe"
+  type_id: 0x2c207521
+  offset: 2112
+}
 member {
   id: 0xd77a3f94
   name: "probe"
@@ -140947,6 +141532,12 @@ member {
   type_id: 0x578f9c2b
   offset: 128
 }
+member {
+  id: 0x320901bb
+  name: "process_read_urb"
+  type_id: 0x0ea52fda
+  offset: 4352
+}
 member {
   id: 0x3cce5fd6
   name: "process_todo"
@@ -145695,6 +146286,12 @@ member {
   type_id: 0x4585663f
   offset: 224
 }
+member {
+  id: 0xda4d0f3d
+  name: "read_bulk_callback"
+  type_id: 0x0ea52fda
+  offset: 4224
+}
 member {
   id: 0xc9397873
   name: "read_bytes"
@@ -145821,6 +146418,12 @@ member {
   type_id: 0x2e7062fc
   offset: 448
 }
+member {
+  id: 0xe2acc05f
+  name: "read_int_callback"
+  type_id: 0x0ea52fda
+  offset: 4096
+}
 member {
   id: 0xeb51ccd7
   name: "read_iter"
@@ -146021,6 +146624,24 @@ member {
   type_id: 0x1dcc0874
   offset: 1088
 }
+member {
+  id: 0x20931d2e
+  name: "read_urb"
+  type_id: 0x0130219f
+  offset: 4160
+}
+member {
+  id: 0xc6c744ad
+  name: "read_urbs"
+  type_id: 0x6ce54884
+  offset: 4416
+}
+member {
+  id: 0x599921b1
+  name: "read_urbs_free"
+  type_id: 0x33756485
+  offset: 4544
+}
 member {
   id: 0x1cafcc11
   name: "read_w"
@@ -148207,6 +148828,12 @@ member {
   type_id: 0x0f626ee5
   offset: 1152
 }
+member {
+  id: 0xae97a5b3
+  name: "release"
+  type_id: 0x0f5e5d84
+  offset: 2368
+}
 member {
   id: 0xae97f307
   name: "release"
@@ -150801,6 +151428,12 @@ member {
   type_id: 0x2fe06892
   offset: 384
 }
+member {
+  id: 0xa792c035
+  name: "reset_resume"
+  type_id: 0x2c18712b
+  offset: 2688
+}
 member {
   id: 0xa793f54a
   name: "reset_resume"
@@ -151410,6 +152043,12 @@ member {
   type_id: 0x2cee6908
   offset: 448
 }
+member {
+  id: 0xcab3f147
+  name: "resume"
+  type_id: 0x2c18712b
+  offset: 2624
+}
 member {
   id: 0xa4d8edf6
   name: "resume_done"
@@ -153933,6 +154572,12 @@ member {
   type_id: 0xe62ebf07
   offset: 128
 }
+member {
+  id: 0x6c1664a5
+  name: "rx"
+  type_id: 0xe62ebf07
+  offset: 160
+}
 member {
   id: 0x6c511725
   name: "rx"
@@ -158526,6 +159171,11 @@ member {
   type_id: 0x0483e6f8
   offset: 11392
 }
+member {
+  id: 0xa79bc81d
+  name: "serial"
+  type_id: 0x1e870478
+}
 member {
   id: 0xa7a9403c
   name: "serial"
@@ -159734,6 +160384,12 @@ member {
   type_id: 0x2f054704
   offset: 1920
 }
+member {
+  id: 0xd9943837
+  name: "set_serial"
+  type_id: 0x2f054704
+  offset: 3136
+}
 member {
   id: 0xdada940c
   name: "set_signals"
@@ -159853,6 +160509,12 @@ member {
   type_id: 0x2de347ce
   offset: 384
 }
+member {
+  id: 0x7e9ca3e9
+  name: "set_termios"
+  type_id: 0x0c780bd8
+  offset: 3200
+}
 member {
   id: 0x7e9cb9f7
   name: "set_termios"
@@ -161511,6 +162173,12 @@ member {
   type_id: 0xd3c80119
   offset: 13184
 }
+member {
+  id: 0xeec37d27
+  name: "sibling"
+  type_id: 0x21069feb
+  offset: 192
+}
 member {
   id: 0xeed6076d
   name: "sibling"
@@ -169671,6 +170339,12 @@ member {
   type_id: 0x2ce67932
   offset: 576
 }
+member {
+  id: 0xf3963ffd
+  name: "suspend"
+  type_id: 0x2ce866aa
+  offset: 2560
+}
 member {
   id: 0xf396c9d5
   name: "suspend"
@@ -169781,6 +170455,12 @@ member {
   offset: 2498
   bitsize: 1
 }
+member {
+  id: 0xdcd36a92
+  name: "suspend_count"
+  type_id: 0x4585663f
+  offset: 256
+}
 member {
   id: 0xdce23cc4
   name: "suspend_count"
@@ -171634,6 +172314,12 @@ member {
   type_id: 0x33756485
   offset: 2752
 }
+member {
+  id: 0x1e9bc448
+  name: "sysrq"
+  type_id: 0x33756485
+  offset: 6208
+}
 member {
   id: 0x4d8ad507
   name: "sysrq_ch"
@@ -173806,6 +174492,12 @@ member {
   type_id: 0xa7c362b0
   offset: 9280
 }
+member {
+  id: 0xca51229f
+  name: "throttle"
+  type_id: 0x0c59c5c5
+  offset: 3520
+}
 member {
   id: 0xca512364
   name: "throttle"
@@ -174777,6 +175469,18 @@ member {
   type_id: 0x2f1fe96a
   offset: 1600
 }
+member {
+  id: 0x7363590a
+  name: "tiocmget"
+  type_id: 0x2f1fe96a
+  offset: 3648
+}
+member {
+  id: 0x6e7a79e5
+  name: "tiocmiwait"
+  type_id: 0x2f2c9c0e
+  offset: 3776
+}
 member {
   id: 0x7e407b75
   name: "tiocmset"
@@ -174789,6 +175493,12 @@ member {
   type_id: 0x2f5e345a
   offset: 1664
 }
+member {
+  id: 0x7e4156be
+  name: "tiocmset"
+  type_id: 0x2f5e345a
+  offset: 3712
+}
 member {
   id: 0x073cf00c
   name: "tipc_ptr"
@@ -177594,6 +178304,12 @@ member {
   type_id: 0xe62ebf07
   offset: 160
 }
+member {
+  id: 0x653ea6fe
+  name: "tx"
+  type_id: 0xe62ebf07
+  offset: 128
+}
 member {
   id: 0x6548ea05
   name: "tx"
@@ -177668,6 +178384,12 @@ member {
   type_id: 0x391f15ea
   offset: 128
 }
+member {
+  id: 0xb51b57a4
+  name: "tx_bytes"
+  type_id: 0x6720d32f
+  offset: 5696
+}
 member {
   id: 0xb54f06ad
   name: "tx_bytes"
@@ -177831,6 +178553,12 @@ member {
   type_id: 0xb02b353a
   offset: 3904
 }
+member {
+  id: 0x14573c12
+  name: "tx_empty"
+  type_id: 0x37185c4a
+  offset: 3456
+}
 member {
   id: 0x1459039b
   name: "tx_empty"
@@ -178801,6 +179529,12 @@ member {
   type_id: 0x37ce2c2c
   offset: 544
 }
+member {
+  id: 0x5c62ac77
+  name: "type"
+  type_id: 0x347de8b1
+  offset: 64
+}
 member {
   id: 0x5c62c9fc
   name: "type"
@@ -181188,6 +181922,12 @@ member {
   type_id: 0x0c59c5c5
   offset: 1024
 }
+member {
+  id: 0x80cd11b3
+  name: "unthrottle"
+  type_id: 0x0c59c5c5
+  offset: 3584
+}
 member {
   id: 0x80cff2d9
   name: "unthrottle"
@@ -182067,6 +182807,12 @@ member {
   type_id: 0x0258f96e
   offset: 7680
 }
+member {
+  id: 0xfc09f0db
+  name: "usb_driver"
+  type_id: 0x3c9a9fb2
+  offset: 1664
+}
 member {
   id: 0x075c8af5
   name: "usb_id"
@@ -186849,6 +187595,12 @@ member {
   type_id: 0x0c3ee516
   offset: 1472
 }
+member {
+  id: 0x691df5b8
+  name: "wait_until_sent"
+  type_id: 0x0ca5cbd8
+  offset: 3392
+}
 member {
   id: 0x691e4bbe
   name: "wait_until_sent"
@@ -188139,6 +188891,12 @@ member {
   type_id: 0x1f3c8679
   offset: 832
 }
+member {
+  id: 0xd6e6643f
+  name: "work"
+  type_id: 0x1f3c8679
+  offset: 5824
+}
 member {
   id: 0xd6e66625
   name: "work"
@@ -188790,6 +189548,12 @@ member {
   type_id: 0x2f830764
   offset: 256
 }
+member {
+  id: 0x342fe464
+  name: "write"
+  type_id: 0x2f3e1dbd
+  offset: 2880
+}
 member {
   id: 0x342ff252
   name: "write"
@@ -188856,6 +189620,12 @@ member {
   type_id: 0x1df06cce
   offset: 5952
 }
+member {
+  id: 0x68c4d9d3
+  name: "write_bulk_callback"
+  type_id: 0x0ea52fda
+  offset: 4288
+}
 member {
   id: 0xfeec54df
   name: "write_busy"
@@ -188933,6 +189703,12 @@ member {
   type_id: 0x2c45ef00
   offset: 704
 }
+member {
+  id: 0xa2006573
+  name: "write_fifo"
+  type_id: 0x18745118
+  offset: 4800
+}
 member {
   id: 0x2ec77a1f
   name: "write_file_info"
@@ -188999,6 +189775,12 @@ member {
   type_id: 0x2c685816
   offset: 256
 }
+member {
+  id: 0x84573c9c
+  name: "write_int_callback"
+  type_id: 0x0ea52fda
+  offset: 4160
+}
 member {
   id: 0xa854c899
   name: "write_iter"
@@ -189097,6 +189879,12 @@ member {
   type_id: 0x0f0e8ef4
   offset: 1408
 }
+member {
+  id: 0xb08e3438
+  name: "write_room"
+  type_id: 0x3bc90e1f
+  offset: 2944
+}
 member {
   id: 0xb08e3b1a
   name: "write_room"
@@ -189145,6 +189933,24 @@ member {
   type_id: 0x2f53e65e
   offset: 1472
 }
+member {
+  id: 0xd4bce130
+  name: "write_urb"
+  type_id: 0x0130219f
+  offset: 4736
+}
+member {
+  id: 0x47ac84ab
+  name: "write_urbs"
+  type_id: 0x6ce54884
+  offset: 5120
+}
+member {
+  id: 0x34019984
+  name: "write_urbs_free"
+  type_id: 0x33756485
+  offset: 5248
+}
 member {
   id: 0x10a6114d
   name: "write_w"
@@ -197737,6 +198543,25 @@ struct_union {
     member_id: 0x9d3e49d9
   }
 }
+struct_union {
+  id: 0x63b8b563
+  kind: STRUCT
+  name: "async_icount"
+  definition {
+    bytesize: 44
+    member_id: 0x273f08da
+    member_id: 0x3992ebe5
+    member_id: 0xdd584c28
+    member_id: 0x510e9896
+    member_id: 0x653ea6fe
+    member_id: 0x6c1664a5
+    member_id: 0x1691771b
+    member_id: 0xd6e7fef7
+    member_id: 0x455a2c83
+    member_id: 0x56c792be
+    member_id: 0x408101ca
+  }
+}
 struct_union {
   id: 0x3cd7d077
   kind: STRUCT
@@ -241481,6 +242306,147 @@ struct_union {
     member_id: 0x0de577f0
   }
 }
+struct_union {
+  id: 0x505cf77f
+  kind: STRUCT
+  name: "usb_serial"
+  definition {
+    bytesize: 240
+    member_id: 0xce349ead
+    member_id: 0x5c62ac77
+    member_id: 0x1e592a48
+    member_id: 0xeec37d27
+    member_id: 0xdcd36a92
+    member_id: 0xd1a76d30
+    member_id: 0x57f5e518
+    member_id: 0x4d99c9ee
+    member_id: 0x0f194513
+    member_id: 0x176ff071
+    member_id: 0x1968f7ca
+    member_id: 0xc578d580
+    member_id: 0x4cfd4941
+    member_id: 0xd78f5a1c
+    member_id: 0x48bfae31
+    member_id: 0x02ce5b75
+    member_id: 0xdc0f3007
+    member_id: 0x91796dae
+  }
+}
+struct_union {
+  id: 0xfbb74458
+  kind: STRUCT
+  name: "usb_serial_driver"
+  definition {
+    bytesize: 560
+    member_id: 0x3144f518
+    member_id: 0xc4fbc795
+    member_id: 0xee864cc4
+    member_id: 0xd4ad4cc3
+    member_id: 0xfc09f0db
+    member_id: 0x1b86a2bd
+    member_id: 0x0f194aef
+    member_id: 0x4cfd40f2
+    member_id: 0xd78f59f9
+    member_id: 0x1968fceb
+    member_id: 0xc578dab3
+    member_id: 0x7ec3e667
+    member_id: 0x65ba3b43
+    member_id: 0xd77a2d00
+    member_id: 0x9619b60e
+    member_id: 0x7e1dc95a
+    member_id: 0x8da4e999
+    member_id: 0xae97a5b3
+    member_id: 0x66c5c2ea
+    member_id: 0xe4114db1
+    member_id: 0xf3963ffd
+    member_id: 0xcab3f147
+    member_id: 0xa792c035
+    member_id: 0xad9bbf06
+    member_id: 0xcd7f6f7d
+    member_id: 0x342fe464
+    member_id: 0xb08e3438
+    member_id: 0x4d4b01d0
+    member_id: 0x956eb42c
+    member_id: 0xd9943837
+    member_id: 0x7e9ca3e9
+    member_id: 0xfd552aa2
+    member_id: 0xd08d5830
+    member_id: 0x691df5b8
+    member_id: 0x14573c12
+    member_id: 0xca51229f
+    member_id: 0x80cd11b3
+    member_id: 0x7363590a
+    member_id: 0x7e4156be
+    member_id: 0x6e7a79e5
+    member_id: 0x34f4a74e
+    member_id: 0xc9730947
+    member_id: 0x59488b26
+    member_id: 0xe7104099
+    member_id: 0xe2acc05f
+    member_id: 0x84573c9c
+    member_id: 0xda4d0f3d
+    member_id: 0x68c4d9d3
+    member_id: 0x320901bb
+    member_id: 0x934f43bd
+  }
+}
+struct_union {
+  id: 0xe0ad154a
+  kind: STRUCT
+  name: "usb_serial_endpoints"
+  definition {
+    bytesize: 520
+    member_id: 0x4cfd4a18
+    member_id: 0xd78f5f05
+    member_id: 0x1968f921
+    member_id: 0xc578d426
+    member_id: 0x98845503
+    member_id: 0x2270ca1d
+    member_id: 0xb25f3460
+    member_id: 0x88026c22
+  }
+}
+struct_union {
+  id: 0xa21c2d88
+  kind: STRUCT
+  name: "usb_serial_port"
+  definition {
+    bytesize: 1696
+    member_id: 0xa79bc81d
+    member_id: 0x48b91cec
+    member_id: 0x2d1fe6cf
+    member_id: 0xc8ee0756
+    member_id: 0xdc31667a
+    member_id: 0xff6c376e
+    member_id: 0xa213a93e
+    member_id: 0x85b71e23
+    member_id: 0xf35c5c33
+    member_id: 0xa83090b1
+    member_id: 0x5e3b6037
+    member_id: 0xda583590
+    member_id: 0x80e885f4
+    member_id: 0x7e50fd73
+    member_id: 0x20931d2e
+    member_id: 0x1926cd5d
+    member_id: 0xfd487f77
+    member_id: 0xc6c744ad
+    member_id: 0x599921b1
+    member_id: 0x6fbe0621
+    member_id: 0x6529283d
+    member_id: 0xd4bce130
+    member_id: 0xa2006573
+    member_id: 0x2b1788a0
+    member_id: 0x47ac84ab
+    member_id: 0x34019984
+    member_id: 0xa1532b64
+    member_id: 0x083df5c3
+    member_id: 0xb51b57a4
+    member_id: 0x2d5bf7e0
+    member_id: 0xd6e6643f
+    member_id: 0x1e9bc448
+    member_id: 0xce1ac7ea
+  }
+}
 struct_union {
   id: 0xabc64e21
   kind: STRUCT
@@ -267011,6 +267977,11 @@ function {
   return_type_id: 0x48b5725f
   parameter_id: 0x1e820193
 }
+function {
+  id: 0x1739908d
+  return_type_id: 0x48b5725f
+  parameter_id: 0x1e870478
+}
 function {
   id: 0x173c840d
   return_type_id: 0x48b5725f
@@ -267472,6 +268443,11 @@ function {
   return_type_id: 0x48b5725f
   parameter_id: 0x2208f89a
 }
+function {
+  id: 0x181d9d22
+  return_type_id: 0x48b5725f
+  parameter_id: 0x221732c5
+}
 function {
   id: 0x181ece84
   return_type_id: 0x48b5725f
@@ -267800,6 +268776,12 @@ function {
   return_type_id: 0x48b5725f
   parameter_id: 0x2131312a
 }
+function {
+  id: 0x18d7c9fc
+  return_type_id: 0x48b5725f
+  parameter_id: 0x2efe8065
+  parameter_id: 0xfc0e1dbd
+}
 function {
   id: 0x18d85efa
   return_type_id: 0x48b5725f
@@ -268083,6 +269065,12 @@ function {
   parameter_id: 0x26a80a21
   parameter_id: 0x13f8b706
 }
+function {
+  id: 0x19811e6e
+  return_type_id: 0x48b5725f
+  parameter_id: 0x221732c5
+  parameter_id: 0x6720d32f
+}
 function {
   id: 0x19832066
   return_type_id: 0x48b5725f
@@ -268849,6 +269837,12 @@ function {
   parameter_id: 0x2cba2cd4
   parameter_id: 0x3eacd4c8
 }
+function {
+  id: 0x1b4d4832
+  return_type_id: 0x48b5725f
+  parameter_id: 0x2efe8065
+  parameter_id: 0x1aae6e0f
+}
 function {
   id: 0x1b4fa20e
   return_type_id: 0x48b5725f
@@ -269020,6 +270014,13 @@ function {
   parameter_id: 0x18bd6530
   parameter_id: 0xe02e14d6
 }
+function {
+  id: 0x1ba0c9ff
+  return_type_id: 0x48b5725f
+  parameter_id: 0x2efe8065
+  parameter_id: 0x221732c5
+  parameter_id: 0x3d92f9c7
+}
 function {
   id: 0x1ba0fc33
   return_type_id: 0x48b5725f
@@ -271629,6 +272630,11 @@ function {
   parameter_id: 0x3ee88c45
   parameter_id: 0x0258f96e
 }
+function {
+  id: 0x1f2cb682
+  return_type_id: 0x48b5725f
+  parameter_id: 0x3ed39c44
+}
 function {
   id: 0x1f3094b2
   return_type_id: 0x48b5725f
@@ -279205,6 +280211,12 @@ function {
   parameter_id: 0x315b7e01
   parameter_id: 0x21003da7
 }
+function {
+  id: 0x91548c51
+  return_type_id: 0x6720d32f
+  parameter_id: 0x3054f2d7
+  parameter_id: 0x3054f2d7
+}
 function {
   id: 0x9154ff7c
   return_type_id: 0x6720d32f
@@ -281315,6 +282327,13 @@ function {
   parameter_id: 0x3eed77c0
   parameter_id: 0x0258f96e
 }
+function {
+  id: 0x92c247e8
+  return_type_id: 0x6720d32f
+  parameter_id: 0x3ed39c44
+  parameter_id: 0x3e10b518
+  parameter_id: 0x38040a6c
+}
 function {
   id: 0x92c286e9
   return_type_id: 0x6720d32f
@@ -283450,6 +284469,11 @@ function {
   return_type_id: 0x6720d32f
   parameter_id: 0x2208f89a
 }
+function {
+  id: 0x95052f9e
+  return_type_id: 0x6720d32f
+  parameter_id: 0x221732c5
+}
 function {
   id: 0x950581be
   return_type_id: 0x6720d32f
@@ -283682,6 +284706,13 @@ function {
   parameter_id: 0x6720d32f
   parameter_id: 0x6d7f5ff6
 }
+function {
+  id: 0x955ad750
+  return_type_id: 0x6720d32f
+  parameter_id: 0x221732c5
+  parameter_id: 0x18bd6530
+  parameter_id: 0xf435685e
+}
 function {
   id: 0x955f7e5a
   return_type_id: 0x6720d32f
@@ -284531,6 +285562,20 @@ function {
   return_type_id: 0x6720d32f
   parameter_id: 0x2cd31328
 }
+function {
+  id: 0x96b71ffd
+  return_type_id: 0x6720d32f
+  parameter_id: 0x2efe8065
+  parameter_id: 0x221732c5
+}
+function {
+  id: 0x96b8906b
+  return_type_id: 0x6720d32f
+  parameter_id: 0x2efe8065
+  parameter_id: 0x221732c5
+  parameter_id: 0x384c5795
+  parameter_id: 0x6720d32f
+}
 function {
   id: 0x96b9a6c2
   return_type_id: 0x6720d32f
@@ -284797,6 +285842,12 @@ function {
   parameter_id: 0x054f691a
   parameter_id: 0x3fe8ca70
 }
+function {
+  id: 0x96f296a4
+  return_type_id: 0x6720d32f
+  parameter_id: 0x2efe8065
+  parameter_id: 0x33756485
+}
 function {
   id: 0x96fd9031
   return_type_id: 0x6720d32f
@@ -287414,6 +288465,12 @@ function {
   parameter_id: 0x92233392
   parameter_id: 0x2e0f9112
 }
+function {
+  id: 0x99e17c37
+  return_type_id: 0x6720d32f
+  parameter_id: 0x1e870478
+  parameter_id: 0xf017819f
+}
 function {
   id: 0x99e350e4
   return_type_id: 0x6720d32f
@@ -287700,6 +288757,11 @@ function {
   return_type_id: 0x6720d32f
   parameter_id: 0x1e820193
 }
+function {
+  id: 0x9a212231
+  return_type_id: 0x6720d32f
+  parameter_id: 0x1e870478
+}
 function {
   id: 0x9a23bd25
   return_type_id: 0xcc33625b
@@ -288436,6 +289498,12 @@ function {
   parameter_id: 0x1c2f6323
   parameter_id: 0x128c0fdb
 }
+function {
+  id: 0x9ac13218
+  return_type_id: 0x6720d32f
+  parameter_id: 0x1e870478
+  parameter_id: 0x38040a6c
+}
 function {
   id: 0x9ac293c4
   return_type_id: 0x6720d32f
@@ -288574,6 +289642,12 @@ function {
   parameter_id: 0x1a8d1bcb
   parameter_id: 0x716d7970
 }
+function {
+  id: 0x9aebcfc2
+  return_type_id: 0x6720d32f
+  parameter_id: 0x1e870478
+  parameter_id: 0x32bb7cf5
+}
 function {
   id: 0x9aef3374
   return_type_id: 0x6720d32f
@@ -301341,6 +302415,11 @@ function {
   parameter_id: 0x38fdd541
   parameter_id: 0x3c01aef6
 }
+function {
+  id: 0xf62197b5
+  return_type_id: 0x6d7f5ff6
+  parameter_id: 0x221732c5
+}
 function {
   id: 0xf6266522
   return_type_id: 0x6d7f5ff6
@@ -355982,6 +357061,15 @@ elf_symbol {
   type_id: 0x1d825cc4
   full_name: "tty_termios_encode_baud_rate"
 }
+elf_symbol {
+  id: 0xacb09a3e
+  name: "tty_termios_hw_change"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x6c257ac0
+  type_id: 0x91548c51
+  full_name: "tty_termios_hw_change"
+}
 elf_symbol {
   id: 0x6eee841a
   name: "tty_unregister_device"
@@ -358810,6 +359898,69 @@ elf_symbol {
   type_id: 0x1d8d80fc
   full_name: "usb_scuttle_anchored_urbs"
 }
+elf_symbol {
+  id: 0x5fde6ab0
+  name: "usb_serial_deregister_drivers"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x6cfec83b
+  type_id: 0x1f2cb682
+  full_name: "usb_serial_deregister_drivers"
+}
+elf_symbol {
+  id: 0xcb415220
+  name: "usb_serial_generic_close"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0xf57aed21
+  type_id: 0x181d9d22
+  full_name: "usb_serial_generic_close"
+}
+elf_symbol {
+  id: 0x256f289d
+  name: "usb_serial_generic_get_icount"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x7c014a1a
+  type_id: 0x962949a5
+  full_name: "usb_serial_generic_get_icount"
+}
+elf_symbol {
+  id: 0xcc99e836
+  name: "usb_serial_generic_open"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x509aaf7d
+  type_id: 0x96b71ffd
+  full_name: "usb_serial_generic_open"
+}
+elf_symbol {
+  id: 0xbeec161b
+  name: "usb_serial_generic_throttle"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x339df3ba
+  type_id: 0x1b27f18a
+  full_name: "usb_serial_generic_throttle"
+}
+elf_symbol {
+  id: 0x1e85ca64
+  name: "usb_serial_generic_unthrottle"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0xf1a85827
+  type_id: 0x1b27f18a
+  full_name: "usb_serial_generic_unthrottle"
+}
+elf_symbol {
+  id: 0xbc49d007
+  name: "usb_serial_register_drivers"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x2b562ef7
+  type_id: 0x92c247e8
+  full_name: "usb_serial_register_drivers"
+}
 elf_symbol {
   id: 0x34af8a35
   name: "usb_set_device_state"
@@ -368433,6 +369584,7 @@ interface {
   symbol_id: 0x40ef0583
   symbol_id: 0x66974d1b
   symbol_id: 0x4cc18d95
+  symbol_id: 0xacb09a3e
   symbol_id: 0x6eee841a
   symbol_id: 0x0c2de3ab
   symbol_id: 0xaf7b86f3
@@ -368747,6 +369899,13 @@ interface {
   symbol_id: 0x2db97071
   symbol_id: 0x88509066
   symbol_id: 0x3f64aa24
+  symbol_id: 0x5fde6ab0
+  symbol_id: 0xcb415220
+  symbol_id: 0x256f289d
+  symbol_id: 0xcc99e836
+  symbol_id: 0xbeec161b
+  symbol_id: 0x1e85ca64
+  symbol_id: 0xbc49d007
   symbol_id: 0x34af8a35
   symbol_id: 0x3cc50b4b
   symbol_id: 0x3e6b6dd2
diff --git a/android/abi_gki_aarch64_mtktv b/android/abi_gki_aarch64_mtktv
index 20425705a20c..3b924f3c08cc 100644
--- a/android/abi_gki_aarch64_mtktv
+++ b/android/abi_gki_aarch64_mtktv
@@ -192,6 +192,7 @@
   copy_page
   _copy_to_iter
   cpu_bit_bitmap
+  cpufreq_boost_enabled
   cpufreq_cpu_get_raw
   cpufreq_dbs_governor_exit
   cpufreq_dbs_governor_init
@@ -199,6 +200,8 @@
   cpufreq_dbs_governor_start
   cpufreq_dbs_governor_stop
   __cpufreq_driver_target
+  cpufreq_freq_attr_scaling_available_freqs
+  cpufreq_freq_attr_scaling_boost_freqs
   cpufreq_generic_attr
   cpufreq_generic_frequency_table_verify
   cpufreq_register_driver
@@ -352,6 +355,7 @@
   device_register
   device_remove_file
   device_rename
+  device_set_wakeup_capable
   device_set_wakeup_enable
   device_unregister
   _dev_info
@@ -403,8 +407,10 @@
   devm_phy_optional_get
   devm_pinctrl_get
   devm_pinctrl_put
+  devm_platform_ioremap_resource
   devm_pwm_get
   __devm_regmap_init_i2c
+  __devm_regmap_init_mmio_clk
   devm_regulator_bulk_get
   devm_regulator_get
   devm_regulator_register
@@ -523,6 +529,7 @@
   d_obtain_alias
   do_exit
   do_trace_netlink_extack
+  do_wait_intr
   down
   downgrade_write
   down_interruptible
@@ -563,6 +570,7 @@
   drm_atomic_helper_wait_for_fences
   drm_atomic_state_default_clear
   __drm_atomic_state_free
+  drm_bridge_add
   drm_compat_ioctl
   drm_connector_attach_encoder
   drm_connector_cleanup
@@ -582,6 +590,11 @@
   drm_dev_alloc
   drm_dev_register
   drm_display_mode_from_videomode
+  drm_dp_aux_init
+  drm_dp_channel_eq_ok
+  drm_dp_clock_recovery_ok
+  drm_dp_dpcd_read
+  drm_dp_dpcd_write
   drm_encoder_cleanup
   drm_encoder_init
   __drm_err
@@ -994,11 +1007,14 @@
   kmsg_dump_register
   kmsg_dump_rewind
   kmsg_dump_unregister
+  kobject_add
   kobject_create_and_add
   kobject_del
+  kobject_init
   kobject_init_and_add
   kobject_put
   kobject_uevent
+  kobj_sysfs_ops
   krealloc
   kstrdup
   kstrndup
@@ -1013,13 +1029,18 @@
   kstrtoul_from_user
   kstrtoull
   kthread_bind
+  kthread_cancel_work_sync
   kthread_create_on_node
+  kthread_flush_worker
+  __kthread_init_worker
   kthread_park
   kthread_parkme
+  kthread_queue_work
   kthread_should_park
   kthread_should_stop
   kthread_stop
   kthread_unpark
+  kthread_worker_fn
   ktime_get
   ktime_get_coarse_ts64
   ktime_get_coarse_with_offset
@@ -1208,6 +1229,7 @@
   of_address_to_resource
   of_clk_add_provider
   of_clk_get
+  of_clk_get_by_name
   of_clk_get_from_provider
   of_clk_src_onecell_get
   of_count_phandle_with_args
@@ -1382,6 +1404,7 @@
   pm_runtime_force_resume
   pm_runtime_force_suspend
   __pm_runtime_idle
+  pm_runtime_no_callbacks
   __pm_runtime_resume
   pm_runtime_set_autosuspend_delay
   __pm_runtime_set_status
@@ -1488,6 +1511,7 @@
   __register_blkdev
   __register_chrdev
   register_chrdev_region
+  register_die_notifier
   register_filesystem
   register_inet6addr_notifier
   register_inetaddr_notifier
@@ -1569,6 +1593,7 @@
   rpmsg_create_ept
   rpmsg_find_device
   rpmsg_register_device
+  rpmsg_register_device_override
   rpmsg_release_channel
   rpmsg_send
   rpmsg_sendto
@@ -1759,6 +1784,7 @@
   snd_ctl_notify
   snd_devm_card_new
   snd_ecards_limit
+  snd_hwdep_new
   snd_info_get_line
   snd_interval_refine
   snd_pcm_format_big_endian
@@ -1871,6 +1897,7 @@
   strncat
   strncmp
   strncpy
+  strndup_user
   strnlen
   strnstr
   strpbrk
@@ -2015,6 +2042,7 @@
   tty_termios_baud_rate
   tty_termios_copy_hw
   tty_termios_encode_baud_rate
+  tty_termios_hw_change
   tty_unregister_device
   tty_unregister_driver
   tty_unregister_ldisc
@@ -2041,6 +2069,7 @@
   unregister_blkdev
   __unregister_chrdev
   unregister_chrdev_region
+  unregister_die_notifier
   unregister_filesystem
   unregister_inet6addr_notifier
   unregister_inetaddr_notifier
@@ -2131,6 +2160,13 @@
   usb_role_switch_get_drvdata
   usb_role_switch_register
   usb_role_switch_unregister
+  usb_serial_deregister_drivers
+  usb_serial_generic_close
+  usb_serial_generic_get_icount
+  usb_serial_generic_open
+  usb_serial_generic_throttle
+  usb_serial_generic_unthrottle
+  usb_serial_register_drivers
   usb_set_interface
   usb_show_dynids
   usb_speed_string
@@ -2281,6 +2317,7 @@
   wait_woken
   __wake_up
   wake_up_bit
+  __wake_up_locked
   wake_up_process
   wakeup_source_add
   wakeup_source_create

From 6b883cdac21f8a6826557c45f703368652eaba15 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 4 Mar 2024 14:22:12 +0100
Subject: [PATCH 52/98] UPSTREAM: netfilter: nf_tables: mark set as dead when
 unbinding anonymous set with timeout

commit 552705a3650bbf46a22b1adedc1b04181490fc36 upstream.

While the rhashtable set gc runs asynchronously, a race allows it to
collect elements from anonymous sets with timeouts while it is being
released from the commit path.

Mingi Cho originally reported this issue in a different path in 6.1.x
with a pipapo set with low timeouts which is not possible upstream since
7395dfacfff6 ("netfilter: nf_tables: use timestamp to check for set
element timeout").

Fix this by setting on the dead flag for anonymous sets to skip async gc
in this case.

According to 08e4c8c5919f ("netfilter: nf_tables: mark newset as dead on
transaction abort"), Florian plans to accelerate abort path by releasing
objects via workqueue, therefore, this sets on the dead flag for abort
path too.

Bug: 329205787
Cc: stable@vger.kernel.org
Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
Reported-by: Mingi Cho <mgcho.minic@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 406b0241d0eb598a0b330ab20ae325537d8d8163)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: I6170493c267e020c50a739150f8c421deb635b35
---
 net/netfilter/nf_tables_api.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 520bd64144d6..2285548f0292 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5055,6 +5055,7 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
 
 	if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {
 		list_del_rcu(&set->list);
+		set->dead = 1;
 		if (event)
 			nf_tables_set_notify(ctx, set, NFT_MSG_DELSET,
 					     GFP_KERNEL);

From ea419cda5cbbff97648e1434ef532f8d36879f30 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 28 Mar 2024 13:27:36 +0100
Subject: [PATCH 53/98] UPSTREAM: netfilter: nf_tables: release batch on table
 validation from abort path

commit a45e6889575c2067d3c0212b6bc1022891e65b91 upstream.

Unlike early commit path stage which triggers a call to abort, an
explicit release of the batch is required on abort, otherwise mutex is
released and commit_list remains in place.

Add WARN_ON_ONCE to ensure commit_list is empty from the abort path
before releasing the mutex.

After this patch, commit_list is always assumed to be empty before
grabbing the mutex, therefore

  03c1f1ef1584 ("netfilter: Cleanup nft_net->module_list from nf_tables_exit_net()")

only needs to release the pending modules for registration.

Bug: 332996726
Cc: stable@vger.kernel.org
Fixes: c0391b6ab810 ("netfilter: nf_tables: missing validation from the abort path")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit b0b36dcbe0f24383612e5e62bd48df5a8107f7fc)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: I38f9b05ac4eadd1d2b7b306cccaf0aeacb61b57a
---
 net/netfilter/nf_tables_api.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2285548f0292..17c34afa6779 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -9680,10 +9680,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 	struct nft_trans *trans, *next;
 	LIST_HEAD(set_update_list);
 	struct nft_trans_elem *te;
+	int err = 0;
 
 	if (action == NFNL_ABORT_VALIDATE &&
 	    nf_tables_validate(net) < 0)
-		return -EAGAIN;
+		err = -EAGAIN;
 
 	list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
 					 list) {
@@ -9859,7 +9860,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 	else
 		nf_tables_module_autoload_cleanup(net);
 
-	return 0;
+	return err;
 }
 
 static int nf_tables_abort(struct net *net, struct sk_buff *skb,
@@ -9873,6 +9874,8 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
 	ret = __nf_tables_abort(net, action);
 	nft_gc_seq_end(nft_net, gc_seq);
 
+	WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+
 	mutex_unlock(&nft_net->commit_mutex);
 
 	return ret;
@@ -10674,9 +10677,10 @@ static void __net_exit nf_tables_exit_net(struct net *net)
 
 	gc_seq = nft_gc_seq_begin(nft_net);
 
-	if (!list_empty(&nft_net->commit_list) ||
-	    !list_empty(&nft_net->module_list))
-		__nf_tables_abort(net, NFNL_ABORT_NONE);
+	WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+
+	if (!list_empty(&nft_net->module_list))
+		nf_tables_module_autoload_cleanup(net);
 
 	__nft_release_tables(net);
 

From ceb8c595f8072bc2af914930ac659fb3c6f63e12 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 28 Mar 2024 14:23:55 +0100
Subject: [PATCH 54/98] UPSTREAM: netfilter: nf_tables: release mutex after
 nft_gc_seq_end from abort path

commit 0d459e2ffb541841714839e8228b845458ed3b27 upstream.

The commit mutex should not be released during the critical section
between nft_gc_seq_begin() and nft_gc_seq_end(), otherwise, async GC
worker could collect expired objects and get the released commit lock
within the same GC sequence.

nf_tables_module_autoload() temporarily releases the mutex to load
module dependencies, then it goes back to replay the transaction again.
Move it at the end of the abort phase after nft_gc_seq_end() is called.

Bug: 332996726
Cc: stable@vger.kernel.org
Fixes: 720344340fb9 ("netfilter: nf_tables: GC transaction race with abort path")
Reported-by: Kuan-Ting Chen <hexrabbit@devco.re>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 8038ee3c3e5b59bcd78467686db5270c68544e30)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: I637389421d8eca5ab59a41bd1a4b70432440034c
---
 net/netfilter/nf_tables_api.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 17c34afa6779..165524d06995 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -9855,11 +9855,6 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 		nf_tables_abort_release(trans);
 	}
 
-	if (action == NFNL_ABORT_AUTOLOAD)
-		nf_tables_module_autoload(net);
-	else
-		nf_tables_module_autoload_cleanup(net);
-
 	return err;
 }
 
@@ -9876,6 +9871,14 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
 
 	WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
 
+	/* module autoload needs to happen after GC sequence update because it
+	 * temporarily releases and grabs mutex again.
+	 */
+	if (action == NFNL_ABORT_AUTOLOAD)
+		nf_tables_module_autoload(net);
+	else
+		nf_tables_module_autoload_cleanup(net);
+
 	mutex_unlock(&nft_net->commit_mutex);
 
 	return ret;

From 9274c308d8469e26a5f3fb8b598000edcff6b135 Mon Sep 17 00:00:00 2001
From: Kalesh Singh <kaleshsingh@google.com>
Date: Fri, 5 Apr 2024 11:00:40 -0700
Subject: [PATCH 55/98] ANDROID: 16K: Introduce
 /sys/kernel/mm/pgsize_miration/enabled

Migrating from 4kB to 16kB page-size in Android requires first making
the platform page-agnostic, which involves increasing Android-ELFs'
max-page-size (p_align) from 4kB to 16kB.

Increasing the ELF max-page-size was found to cause compatibility issues
in apps that use obfuscation or depend on the ELF segments being mapped
based on 4kB-alignment.

Working around these compatibility issues involves both kernel and
userspace (dynamic linker) changes.

Introduce a knob for userspace (dynamic linker) to determine whether the
kernel supports the mitigations needed for page-size migration compatibility.

The knob also allows for userspace to turn on or off these mitigations
by writing 1 or 0 to /sys/kernel/mm/pgsize_miration/enabled:

    echo 1 > /sys/kernel/mm//pgsize_miration/enabled  # Enable
    echo 0 > /sys/kernel/mm//pgsize_miration/enabled  # Disable

Bug: 330117029
Bug: 327600007
Bug: 330767927
Bug: 328266487
Bug: 329803029
Change-Id: I9ac1d15d397b8226b27827ecffa30502da91e10e
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
---
 mm/Makefile           |   2 +-
 mm/pgsize_migration.c | 105 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+), 1 deletion(-)
 create mode 100644 mm/pgsize_migration.c

diff --git a/mm/Makefile b/mm/Makefile
index 8a9954121e4d..a17ebb357dcb 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -52,7 +52,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
 			   mm_init.o percpu.o slab_common.o \
 			   compaction.o vmacache.o \
 			   interval_tree.o list_lru.o workingset.o \
-			   debug.o gup.o mmap_lock.o $(mmu-y)
+			   debug.o gup.o mmap_lock.o pgsize_migration.o $(mmu-y)
 
 # Give 'page_alloc' its own module-parameter namespace
 page-alloc-y := page_alloc.o
diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c
new file mode 100644
index 000000000000..e840cda99e22
--- /dev/null
+++ b/mm/pgsize_migration.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Page Size Migration
+ *
+ * This file contains the core logic of mitigations to ensure
+ * app compatibility during the transition from 4kB to 16kB
+ * page size in Android.
+ *
+ * Copyright (c) 2024, Google LLC.
+ * Author: Kalesh Singh <kaleshsingh@goole.com>
+ */
+
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <linux/kobject.h>
+#include <linux/kstrtox.h>
+#include <linux/mm.h>
+#include <linux/sysfs.h>
+
+#ifdef CONFIG_64BIT
+#if PAGE_SIZE == SZ_4K
+DEFINE_STATIC_KEY_TRUE(pgsize_migration_enabled);
+
+#define is_pgsize_migration_enabled() 	(static_branch_likely(&pgsize_migration_enabled))
+#else /* PAGE_SIZE != SZ_4K */
+DEFINE_STATIC_KEY_FALSE(pgsize_migration_enabled);
+
+#define is_pgsize_migration_enabled() 	(static_branch_unlikely(&pgsize_migration_enabled))
+#endif /* PAGE_SIZE == SZ_4K */
+
+static ssize_t show_pgsize_migration_enabled(struct kobject *kobj,
+					     struct kobj_attribute *attr,
+					     char *buf)
+{
+	if (is_pgsize_migration_enabled())
+		return sprintf(buf, "%d\n", 1);
+	else
+		return sprintf(buf, "%d\n", 0);
+}
+
+static ssize_t store_pgsize_migration_enabled(struct kobject *kobj,
+					      struct kobj_attribute *attr,
+					      const char *buf, size_t n)
+{
+	unsigned long val;
+
+	/* Migration is only applicable to 4kB kernels */
+	if (PAGE_SIZE != SZ_4K)
+		return n;
+
+	if (kstrtoul(buf, 10, &val))
+		return -EINVAL;
+
+	if (val > 1)
+		return -EINVAL;
+
+	if (val == 1)
+		static_branch_enable(&pgsize_migration_enabled);
+	else if (val == 0)
+		static_branch_disable(&pgsize_migration_enabled);
+
+	return n;
+}
+
+static struct kobj_attribute pgsize_migration_enabled_attr = __ATTR(
+	enabled,
+	0644,
+	show_pgsize_migration_enabled,
+	store_pgsize_migration_enabled
+);
+
+static struct attribute *pgsize_migration_attrs[] = {
+	&pgsize_migration_enabled_attr.attr,
+	NULL
+};
+
+static struct attribute_group pgsize_migration_attr_group = {
+	.name = "pgsize_migration",
+	.attrs = pgsize_migration_attrs,
+};
+
+/**
+ * What:          /sys/kernel/mm/pgsize_migration/enabled
+ * Date:          April 2024
+ * KernelVersion: v5.4+ (GKI kernels)
+ * Contact:       Kalesh Singh <kaleshsingh@google.com>
+ * Description:   /sys/kernel/mm/pgsize_migration/enabled
+ *                allows for userspace to turn on or off page size
+ *                migration mitigations necessary for app compatibility
+ *                during Android's transition from 4kB to 16kB page size.
+ *                Such mitigations include preserving /proc/<pid>/[s]maps
+ *                output as if there was no segment extension by the
+ *                dynamic loader; and preventing fault around in the padding
+ *                sections of ELF LOAD segment mappings.
+ * Users:         Bionic's dynamic linker
+ */
+static int __init init_pgsize_migration(void)
+{
+	if (sysfs_create_group(mm_kobj, &pgsize_migration_attr_group))
+		pr_err("pgsize_migration: failed to create sysfs group\n");
+
+	return 0;
+};
+late_initcall(init_pgsize_migration);
+#endif /* CONFIG_64BIT */

From 38cccb91549e0daf566f955e88d6e92f883c484c Mon Sep 17 00:00:00 2001
From: Kalesh Singh <kaleshsingh@google.com>
Date: Thu, 4 Apr 2024 22:21:32 -0700
Subject: [PATCH 56/98] ANDROID: 16K: Introduce ELF padding representation for
 VMAs

The dynamic linker may extend ELF LOAD segment mappings to be contiguous
in memory when loading a 16kB compatible ELF on a 4kB page-size system.
This is done to reduce the use of unreclaimable VMA slab memory for the
otherwise necessary "gap" VMAs. The extended portion of the mapping
(VMA) can be viewed as "padding", meaning that the mapping in that range
corresponds to an area of the file that does not contain contents of the
respective segments (maybe zero's depending on how the ELF is built).

For some compatibility mitigations, the region of a VMA corresponding to
these padding sections need to be known.

In order to represent such regions without adding addtional overhead or
breaking ABI, some upper bits of vm_flags are used.

Add the VMA padding pages representation and the necessary APIs to
manipulate it.

Bug: 330117029
Bug: 327600007
Bug: 330767927
Bug: 328266487
Bug: 329803029
Change-Id: Ieb9fa98e30ec9b0bec62256624f14e3ed6062a75
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
---
 include/linux/pgsize_migration.h | 64 ++++++++++++++++++++++++++++++++
 mm/pgsize_migration.c            | 22 ++++++++++-
 2 files changed, 85 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/pgsize_migration.h

diff --git a/include/linux/pgsize_migration.h b/include/linux/pgsize_migration.h
new file mode 100644
index 000000000000..60f719d44107
--- /dev/null
+++ b/include/linux/pgsize_migration.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PAGE_SIZE_MIGRATION_H
+#define _LINUX_PAGE_SIZE_MIGRATION_H
+
+/*
+ * Page Size Migration
+ *
+ * Copyright (c) 2024, Google LLC.
+ * Author: Kalesh Singh <kaleshsingh@goole.com>
+ *
+ * This file contains the APIs for mitigations to ensure
+ * app compatibility during the transition from 4kB to 16kB
+ * page size in Android.
+ */
+
+#include <linux/mm.h>
+#include <linux/sizes.h>
+
+/*
+ * vm_flags representation of VMA padding pages.
+ *
+ * This allows the kernel to identify the portion of an ELF LOAD segment VMA
+ * that is padding.
+ *
+ * 4 high bits of vm_flags [63,60] are used to represent ELF segment padding
+ * up to 60kB, which is sufficient for ELFs of both 16kB and 64kB segment
+ * alignment (p_align).
+ *
+ * The representation is illustrated below.
+ *
+ *                    63        62        61        60
+ *                _________ _________ _________ _________
+ *               |  Bit 3  |  Bit 2  |  Bit 1  |  Bit 0  |
+ *               | of  4kB | of  4kB | of  4kB | of  4kB |
+ *               |  chunks |  chunks |  chunks |  chunks |
+ *               |_________|_________|_________|_________|
+ */
+
+#define VM_PAD_WIDTH		4
+#define VM_PAD_SHIFT		(BITS_PER_LONG - VM_PAD_WIDTH)
+#define VM_TOTAL_PAD_PAGES	((1ULL << VM_PAD_WIDTH) - 1)
+
+#if PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT)
+extern void vma_set_pad_pages(struct vm_area_struct *vma,
+			      unsigned long nr_pages);
+
+extern unsigned long vma_pad_pages(struct vm_area_struct *vma);
+#else /* PAGE_SIZE != SZ_4K || !defined(CONFIG_64BIT) */
+static inline void vma_set_pad_pages(struct vm_area_struct *vma,
+				     unsigned long nr_pages)
+{
+}
+
+static inline unsigned long vma_pad_pages(struct vm_area_struct *vma)
+{
+	return 0;
+}
+#endif /* PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT) */
+
+static inline unsigned long vma_data_pages(struct vm_area_struct *vma)
+{
+	return vma_pages(vma) - vma_pad_pages(vma);
+}
+#endif /* _LINUX_PAGE_SIZE_MIGRATION_H */
diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c
index e840cda99e22..dda4ec802332 100644
--- a/mm/pgsize_migration.c
+++ b/mm/pgsize_migration.c
@@ -10,11 +10,12 @@
  * Author: Kalesh Singh <kaleshsingh@goole.com>
  */
 
+#include <linux/pgsize_migration.h>
+
 #include <linux/init.h>
 #include <linux/jump_label.h>
 #include <linux/kobject.h>
 #include <linux/kstrtox.h>
-#include <linux/mm.h>
 #include <linux/sysfs.h>
 
 #ifdef CONFIG_64BIT
@@ -102,4 +103,23 @@ static int __init init_pgsize_migration(void)
 	return 0;
 };
 late_initcall(init_pgsize_migration);
+
+#if PAGE_SIZE == SZ_4K
+void vma_set_pad_pages(struct vm_area_struct *vma,
+		       unsigned long nr_pages)
+{
+	if (!is_pgsize_migration_enabled())
+		return;
+
+	vma->vm_flags |= (nr_pages << VM_PAD_SHIFT);
+}
+
+unsigned long vma_pad_pages(struct vm_area_struct *vma)
+{
+	if (!is_pgsize_migration_enabled())
+		return 0;
+
+	return vma->vm_flags >> VM_PAD_SHIFT;
+}
+#endif /* PAGE_SIZE == SZ_4K */
 #endif /* CONFIG_64BIT */

From e7bff50b229b4658f2ab7e3649635da54d9fcdec Mon Sep 17 00:00:00 2001
From: Kalesh Singh <kaleshsingh@google.com>
Date: Thu, 4 Apr 2024 22:21:32 -0700
Subject: [PATCH 57/98] ANDROID: 16K: Use MADV_DONTNEED to save VMA padding
 pages.

When performing LOAD segment extension, the dynamic linker knows what
portion of the VMA is padding. In order for the kernel to implement
mitigations that ensure app compatibility, the extent of the padding
must be made available to the kernel.

To achieve this, reuse MADV_DONTNEED on single VMAs to hint the padding
range to the kernel. This information is then stored in vm_flag bits.
This allows userspace (dynamic linker) to set the padding pages on the
VMA without a need for new out-of-tree UAPI.

Bug: 330117029
Bug: 327600007
Bug: 330767927
Bug: 328266487
Bug: 329803029
Change-Id: I3421de32ab38ad3cb0fbce73ecbd8f7314287cde
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
---
 include/linux/pgsize_migration.h |  8 +++++
 mm/madvise.c                     |  3 ++
 mm/pgsize_migration.c            | 56 ++++++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+)

diff --git a/include/linux/pgsize_migration.h b/include/linux/pgsize_migration.h
index 60f719d44107..fd1e74ea4283 100644
--- a/include/linux/pgsize_migration.h
+++ b/include/linux/pgsize_migration.h
@@ -45,6 +45,9 @@ extern void vma_set_pad_pages(struct vm_area_struct *vma,
 			      unsigned long nr_pages);
 
 extern unsigned long vma_pad_pages(struct vm_area_struct *vma);
+
+extern void madvise_vma_pad_pages(struct vm_area_struct *vma,
+				  unsigned long start, unsigned long end);
 #else /* PAGE_SIZE != SZ_4K || !defined(CONFIG_64BIT) */
 static inline void vma_set_pad_pages(struct vm_area_struct *vma,
 				     unsigned long nr_pages)
@@ -55,6 +58,11 @@ static inline unsigned long vma_pad_pages(struct vm_area_struct *vma)
 {
 	return 0;
 }
+
+static inline void madvise_vma_pad_pages(struct vm_area_struct *vma,
+					 unsigned long start, unsigned long end)
+{
+}
 #endif /* PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT) */
 
 static inline unsigned long vma_data_pages(struct vm_area_struct *vma)
diff --git a/mm/madvise.c b/mm/madvise.c
index 703b68381241..b365c15112b6 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -11,6 +11,7 @@
 #include <linux/syscalls.h>
 #include <linux/mempolicy.h>
 #include <linux/page-isolation.h>
+#include <linux/pgsize_migration.h>
 #include <linux/page_idle.h>
 #include <linux/userfaultfd_k.h>
 #include <linux/hugetlb.h>
@@ -788,6 +789,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
 static long madvise_dontneed_single_vma(struct vm_area_struct *vma,
 					unsigned long start, unsigned long end)
 {
+	madvise_vma_pad_pages(vma, start, end);
+
 	zap_page_range(vma, start, end - start);
 	return 0;
 }
diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c
index dda4ec802332..59d4fbfb96da 100644
--- a/mm/pgsize_migration.c
+++ b/mm/pgsize_migration.c
@@ -121,5 +121,61 @@ unsigned long vma_pad_pages(struct vm_area_struct *vma)
 
 	return vma->vm_flags >> VM_PAD_SHIFT;
 }
+
+static __always_inline bool str_has_suffix(const char *str, const char *suffix)
+{
+	size_t str_len = strlen(str);
+	size_t suffix_len = strlen(suffix);
+
+	if (str_len < suffix_len)
+		return false;
+
+	return !strncmp(str + str_len - suffix_len, suffix, suffix_len);
+}
+
+/*
+ * Saves the number of padding pages for an ELF segment mapping
+ * in vm_flags.
+ *
+ * The number of padding pages is deduced from the madvise DONTNEED range [start, end)
+ * if the following conditions are met:
+ *    1) The range is enclosed by a single VMA
+ *    2) The range ends at the end address of the VMA
+ *    3) The range starts at an address greater than the start address of the VMA
+ *    4) The number of the pages in the range does not exceed VM_TOTAL_PAD_PAGES.
+ *    5) The VMA is a regular file backed VMA (filemap_fault)
+ *    6) The file backing the VMA is a shared library (*.so)
+ */
+void madvise_vma_pad_pages(struct vm_area_struct *vma,
+			   unsigned long start, unsigned long end)
+{
+	unsigned long nr_pad_pages;
+
+	if (!is_pgsize_migration_enabled())
+		return;
+
+	/* Only handle this for file backed VMAs */
+	if (!vma->vm_file || !vma->vm_ops || vma->vm_ops->fault != filemap_fault)
+		return;
+
+
+	/* Limit this to only shared libraries (*.so) */
+	if (!str_has_suffix(vma->vm_file->f_path.dentry->d_name.name, ".so"))
+		return;
+
+	/*
+	 * If the madvise range is it at the end of the file save the number of
+	 * pages in vm_flags (only need 4 bits are needed for 16kB aligned ELFs).
+	 */
+	if (start <= vma->vm_start || end != vma->vm_end)
+		return;
+
+	nr_pad_pages = (end - start) >> PAGE_SHIFT;
+
+	if (!nr_pad_pages || nr_pad_pages > VM_TOTAL_PAD_PAGES)
+		return;
+
+	vma_set_pad_pages(vma, nr_pad_pages);
+}
 #endif /* PAGE_SIZE == SZ_4K */
 #endif /* CONFIG_64BIT */

From 37ea0e848513390d7ccda788702f19bc23a52e06 Mon Sep 17 00:00:00 2001
From: Kalesh Singh <kaleshsingh@google.com>
Date: Thu, 4 Apr 2024 22:37:48 -0700
Subject: [PATCH 58/98] ANDROID: 16K: Exclude ELF padding for fault around
 range

Userspace apps often analyze memory consumption by the use of mm
rss_stat counters -- via the kmem/rss_stat trace event or from
/proc/<pid>/statm.

rss_stat counters are only updated when the PTEs are updated. What this
means is that pages can be present in the page cache from readahead but
not visible to userspace (not attributed to the app) as there is no
corresponding VMA (PTEs) for the respective page cache pages.

A side effect of the loader now extending ELF LOAD segments to be
contiguously mapped in the virtual address space, means that the VMA is
extended to cover the padding pages.

When filesystems, such as f2fs and ext4, that implement
vm_ops->map_pages() attempt to perform a do_fault_around() the extent of
the fault around is restricted by the area of the enclosing VMA. Since
the loader extends LOAD segment VMAs to be contiguously mapped, the extent
of the fault around is also increased. The result of which, is that the
PTEs corresponding to the padding pages are updated and reflected in the
rss_stat counters.

It is not common that userspace application developers be aware of this
nuance in the kernel's memory accounting. To avoid apparent regressions
in memory usage to userspace, restrict the fault around range to only
valid data pages (i.e. exclude the padding pages at the end of the VMA).

Bug: 330117029
Bug: 327600007
Bug: 330767927
Bug: 328266487
Bug: 329803029
Change-Id: I2c7a39ec1b040be2b9fb47801f95042f5dbf869d
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
---
 mm/memory.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/memory.c b/mm/memory.c
index 37640dc59e60..4ee6c484e85d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -58,6 +58,7 @@
 #include <linux/delayacct.h>
 #include <linux/init.h>
 #include <linux/pfn_t.h>
+#include <linux/pgsize_migration.h>
 #include <linux/writeback.h>
 #include <linux/memcontrol.h>
 #include <linux/mmu_notifier.h>
@@ -4461,7 +4462,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
 	end_pgoff = start_pgoff -
 		((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
 		PTRS_PER_PTE - 1;
-	end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1,
+	end_pgoff = min3(end_pgoff, vma_data_pages(vmf->vma) + vmf->vma->vm_pgoff - 1,
 			start_pgoff + nr_pages - 1);
 
 	if (!(vmf->flags & FAULT_FLAG_SPECULATIVE) &&

From 084d22016ccabe60338ea69b0a1c4c264551bd4c Mon Sep 17 00:00:00 2001
From: Kalesh Singh <kaleshsingh@google.com>
Date: Thu, 4 Apr 2024 23:02:30 -0700
Subject: [PATCH 59/98] ANDROID: 16K: Separate padding from ELF LOAD segment
 mappings

In has been found that some in-field apps depend on the output of
/proc/*/maps to determine the address ranges of other operations.

With the extension of LOAD segments VMAs to be contiguous in memory,
the apps may perform operations on an area that is not backed by the
underlying file, which results in a SIGBUS. Other apps have crashed
with yet unindentified reasons.

To avoid breaking in-field apps, maintain the output of /proc/*/[s]maps
with PROT_NONE VMAs for the padding pages of LOAD segmetns instead of
showing the segment extensions.

NOTE: This does not allocate actual backing VMAs for the shown
      PROT_NONE mappings.

This approach maintains 2 possible assumptions that userspace (apps)
could be depending on:
   1) That LOAD segment mappings are "contiguous" (not speparated by
      unrelated mappings) in memory.
   2) That no virtual address space is available between mappings of
      consecutive LOAD segments for the same ELF.

For example the output of /proc/*/[s]maps before and after this change
is shown below. Segments maintain PROT_NONE gaps ("[page size compat]")
for app compatiblity but these are not backed by actual slab VMA memory.

Maps Before:

7fb03604d000-7fb036051000 r--p 00000000 fe:09 21935719                   /system/lib64/libnetd_client.so
7fb036051000-7fb036055000 r-xp 00004000 fe:09 21935719                   /system/lib64/libnetd_client.so
7fb036055000-7fb036059000 r--p 00008000 fe:09 21935719                   /system/lib64/libnetd_client.so
7fb036059000-7fb03605a000 rw-p 0000c000 fe:09 21935719                   /system/lib64/libnetd_client.so

Maps After:

7fc707390000-7fc707393000 r--p 00000000 fe:09 21935719                   /system/lib64/libnetd_client.so
7fc707393000-7fc707394000 ---p 00000000 00:00 0                          [page size compat]
7fc707394000-7fc707398000 r-xp 00004000 fe:09 21935719                   /system/lib64/libnetd_client.so
7fc707398000-7fc707399000 r--p 00008000 fe:09 21935719                   /system/lib64/libnetd_client.so
7fc707399000-7fc70739c000 ---p 00000000 00:00 0                          [page size compat]
7fc70739c000-7fc70739d000 rw-p 0000c000 fe:09 21935719                   /system/lib64/libnetd_client.so

Smaps Before:

7fb03604d000-7fb036051000 r--p 00000000 fe:09 21935719                   /system/lib64/libnetd_client.so
Size:                 16 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB
Rss:                  16 kB
Pss:                   0 kB
Pss_Dirty:             0 kB
Shared_Clean:         16 kB
Shared_Dirty:          0 kB
Private_Clean:         0 kB
Private_Dirty:         0 kB
Referenced:           16 kB
Anonymous:             0 kB
LazyFree:              0 kB
AnonHugePages:         0 kB
ShmemPmdMapped:        0 kB
FilePmdMapped:         0 kB
Shared_Hugetlb:        0 kB
Private_Hugetlb:       0 kB
Swap:                  0 kB
SwapPss:               0 kB
Locked:                0 kB
THPeligible:    0
VmFlags: rd mr mw me
7fb036051000-7fb036055000 r-xp 00004000 fe:09 21935719                   /system/lib64/libnetd_client.so
Size:                 16 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB
Rss:                  16 kB
Pss:                   0 kB
Pss_Dirty:             0 kB
Shared_Clean:         16 kB
Shared_Dirty:          0 kB
Private_Clean:         0 kB
Private_Dirty:         0 kB
Referenced:           16 kB
Anonymous:             0 kB
LazyFree:              0 kB
AnonHugePages:         0 kB
ShmemPmdMapped:        0 kB
FilePmdMapped:         0 kB
Shared_Hugetlb:        0 kB
Private_Hugetlb:       0 kB
Swap:                  0 kB
SwapPss:               0 kB
Locked:                0 kB
THPeligible:    0
VmFlags: rd ex mr mw me
7fb036055000-7fb036059000 r--p 00008000 fe:09 21935719                   /system/lib64/libnetd_client.so
Size:                 16 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB
Rss:                   4 kB
Pss:                   4 kB
Pss_Dirty:             4 kB
Shared_Clean:          0 kB
Shared_Dirty:          0 kB
Private_Clean:         0 kB
Private_Dirty:         4 kB
Referenced:            4 kB
Anonymous:             4 kB
LazyFree:              0 kB
AnonHugePages:         0 kB
ShmemPmdMapped:        0 kB
FilePmdMapped:         0 kB
Shared_Hugetlb:        0 kB
Private_Hugetlb:       0 kB
Swap:                  0 kB
SwapPss:               0 kB
Locked:                0 kB
THPeligible:    0
VmFlags: rd mr mw me ac
7fb036059000-7fb03605a000 rw-p 0000c000 fe:09 21935719                   /system/lib64/libnetd_client.so
Size:                  4 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB
Rss:                   4 kB
Pss:                   4 kB
Pss_Dirty:             4 kB
Shared_Clean:          0 kB
Shared_Dirty:          0 kB
Private_Clean:         0 kB
Private_Dirty:         4 kB
Referenced:            4 kB
Anonymous:             4 kB
LazyFree:              0 kB
AnonHugePages:         0 kB
ShmemPmdMapped:        0 kB
FilePmdMapped:         0 kB
Shared_Hugetlb:        0 kB
Private_Hugetlb:       0 kB
Swap:                  0 kB
SwapPss:               0 kB
Locked:                0 kB
THPeligible:    0
VmFlags: rd wr mr mw me ac

Smaps After:

7fc707390000-7fc707393000 r--p 00000000 fe:09 21935719                   /system/lib64/libnetd_client.so
Size:                 12 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB
Rss:                  12 kB
Pss:                   0 kB
Shared_Clean:         12 kB
Shared_Dirty:          0 kB
Private_Clean:         0 kB
Private_Dirty:         0 kB
Referenced:           12 kB
Anonymous:             0 kB
LazyFree:              0 kB
AnonHugePages:         0 kB
ShmemPmdMapped:        0 kB
FilePmdMapped:         0 kB
Shared_Hugetlb:        0 kB
Private_Hugetlb:       0 kB
Swap:                  0 kB
SwapPss:               0 kB
Locked:                0 kB
THPeligible:    0
VmFlags: rd mr mw me ??
7fc707393000-7fc707394000 ---p 00000000 00:00 0                          [page size compat]
Size:                  4 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB
Rss:                   0 kB
Pss:                   0 kB
Shared_Clean:          0 kB
Shared_Dirty:          0 kB
Private_Clean:         0 kB
Private_Dirty:         0 kB
Referenced:            0 kB
Anonymous:             0 kB
LazyFree:              0 kB
AnonHugePages:         0 kB
ShmemPmdMapped:        0 kB
FilePmdMapped:         0 kB
Shared_Hugetlb:        0 kB
Private_Hugetlb:       0 kB
Swap:                  0 kB
SwapPss:               0 kB
Locked:                0 kB
THPeligible:    0
VmFlags: mr mw me
7fc707394000-7fc707398000 r-xp 00004000 fe:09 21935719                   /system/lib64/libnetd_client.so
Size:                 16 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB
Rss:                  16 kB
Pss:                   0 kB
Shared_Clean:         16 kB
Shared_Dirty:          0 kB
Private_Clean:         0 kB
Private_Dirty:         0 kB
Referenced:           16 kB
Anonymous:             0 kB
LazyFree:              0 kB
AnonHugePages:         0 kB
ShmemPmdMapped:        0 kB
FilePmdMapped:         0 kB
Shared_Hugetlb:        0 kB
Private_Hugetlb:       0 kB
Swap:                  0 kB
SwapPss:               0 kB
Locked:                0 kB
THPeligible:    0
VmFlags: rd ex mr mw me
7fc707398000-7fc707399000 r--p 00008000 fe:09 21935719                   /system/lib64/libnetd_client.so
Size:                  4 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB
Rss:                   4 kB
Pss:                   4 kB
Shared_Clean:          0 kB
Shared_Dirty:          0 kB
Private_Clean:         0 kB
Private_Dirty:         4 kB
Referenced:            4 kB
Anonymous:             4 kB
LazyFree:              0 kB
AnonHugePages:         0 kB
ShmemPmdMapped:        0 kB
FilePmdMapped:         0 kB
Shared_Hugetlb:        0 kB
Private_Hugetlb:       0 kB
Swap:                  0 kB
SwapPss:               0 kB
Locked:                0 kB
THPeligible:    0
VmFlags: rd mr mw me ac ?? ??
7fc707399000-7fc70739c000 ---p 00000000 00:00 0                          [page size compat]
Size:                 12 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB
Rss:                   0 kB
Pss:                   0 kB
Shared_Clean:          0 kB
Shared_Dirty:          0 kB
Private_Clean:         0 kB
Private_Dirty:         0 kB
Referenced:            0 kB
Anonymous:             0 kB
LazyFree:              0 kB
AnonHugePages:         0 kB
ShmemPmdMapped:        0 kB
FilePmdMapped:         0 kB
Shared_Hugetlb:        0 kB
Private_Hugetlb:       0 kB
Swap:                  0 kB
SwapPss:               0 kB
Locked:                0 kB
THPeligible:    0
VmFlags: mr mw me ac
7fc70739c000-7fc70739d000 rw-p 0000c000 fe:09 21935719                   /system/lib64/libnetd_client.so
Size:                  4 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB
Rss:                   4 kB
Pss:                   4 kB
Shared_Clean:          0 kB
Shared_Dirty:          0 kB
Private_Clean:         0 kB
Private_Dirty:         4 kB
Referenced:            4 kB
Anonymous:             4 kB
LazyFree:              0 kB
AnonHugePages:         0 kB
ShmemPmdMapped:        0 kB
FilePmdMapped:         0 kB
Shared_Hugetlb:        0 kB
Private_Hugetlb:       0 kB
Swap:                  0 kB
SwapPss:               0 kB
Locked:                0 kB
THPeligible:    0
VmFlags: rd wr mr mw me ac

Bug: 330117029
Bug: 327600007
Bug: 330767927
Bug: 328266487
Bug: 329803029
Change-Id: I12bf2c106fafc74a500d79155b81dde5db42661e
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
---
 fs/proc/task_mmu.c               | 14 ++++-
 include/linux/pgsize_migration.h | 29 ++++++++++
 mm/pgsize_migration.c            | 92 ++++++++++++++++++++++++++++++++
 3 files changed, 133 insertions(+), 2 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 16a537a60c80..095aa7c80ee1 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -10,6 +10,7 @@
 #include <linux/ptrace.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
+#include <linux/pgsize_migration.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
 #include <linux/swap.h>
@@ -344,7 +345,13 @@ done:
 
 static int show_map(struct seq_file *m, void *v)
 {
-	show_map_vma(m, v);
+	struct vm_area_struct *pad_vma = get_pad_vma(v);
+	struct vm_area_struct *vma = get_data_vma(v);
+
+	show_map_vma(m, vma);
+
+	show_map_pad_vma(vma, pad_vma, m, show_map_vma);
+
 	return 0;
 }
 
@@ -838,7 +845,8 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
 
 static int show_smap(struct seq_file *m, void *v)
 {
-	struct vm_area_struct *vma = v;
+	struct vm_area_struct *pad_vma = get_pad_vma(v);
+	struct vm_area_struct *vma = get_data_vma(v);
 	struct mem_size_stats mss;
 
 	memset(&mss, 0, sizeof(mss));
@@ -861,6 +869,8 @@ static int show_smap(struct seq_file *m, void *v)
 		seq_printf(m, "ProtectionKey:  %8u\n", vma_pkey(vma));
 	show_smap_vma_flags(m, vma);
 
+	show_map_pad_vma(vma, pad_vma, m, (show_pad_vma_fn)show_smap);
+
 	return 0;
 }
 
diff --git a/include/linux/pgsize_migration.h b/include/linux/pgsize_migration.h
index fd1e74ea4283..7ab0f288bcf9 100644
--- a/include/linux/pgsize_migration.h
+++ b/include/linux/pgsize_migration.h
@@ -14,6 +14,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/seq_file.h>
 #include <linux/sizes.h>
 
 /*
@@ -39,6 +40,10 @@
 #define VM_PAD_WIDTH		4
 #define VM_PAD_SHIFT		(BITS_PER_LONG - VM_PAD_WIDTH)
 #define VM_TOTAL_PAD_PAGES	((1ULL << VM_PAD_WIDTH) - 1)
+#define VM_PAD_MASK		(VM_TOTAL_PAD_PAGES << VM_PAD_SHIFT)
+#define VMA_PAD_START(vma)	(vma->vm_end - (vma_pad_pages(vma) << PAGE_SHIFT))
+
+typedef void (*show_pad_vma_fn)(struct seq_file *m, struct vm_area_struct *vma);
 
 #if PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT)
 extern void vma_set_pad_pages(struct vm_area_struct *vma,
@@ -48,6 +53,14 @@ extern unsigned long vma_pad_pages(struct vm_area_struct *vma);
 
 extern void madvise_vma_pad_pages(struct vm_area_struct *vma,
 				  unsigned long start, unsigned long end);
+
+extern struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma);
+
+extern struct vm_area_struct *get_data_vma(struct vm_area_struct *vma);
+
+extern void show_map_pad_vma(struct vm_area_struct *vma,
+			     struct vm_area_struct *pad,
+			     struct seq_file *m, show_pad_vma_fn func);
 #else /* PAGE_SIZE != SZ_4K || !defined(CONFIG_64BIT) */
 static inline void vma_set_pad_pages(struct vm_area_struct *vma,
 				     unsigned long nr_pages)
@@ -63,6 +76,22 @@ static inline void madvise_vma_pad_pages(struct vm_area_struct *vma,
 					 unsigned long start, unsigned long end)
 {
 }
+
+static inline struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma)
+{
+	return NULL;
+}
+
+static inline struct vm_area_struct *get_data_vma(struct vm_area_struct *vma)
+{
+	return vma;
+}
+
+static inline void show_map_pad_vma(struct vm_area_struct *vma,
+				    struct vm_area_struct *pad,
+				    struct seq_file *m, show_pad_vma_fn func)
+{
+}
 #endif /* PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT) */
 
 static inline unsigned long vma_data_pages(struct vm_area_struct *vma)
diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c
index 59d4fbfb96da..b7264f49a9cb 100644
--- a/mm/pgsize_migration.c
+++ b/mm/pgsize_migration.c
@@ -16,6 +16,7 @@
 #include <linux/jump_label.h>
 #include <linux/kobject.h>
 #include <linux/kstrtox.h>
+#include <linux/slab.h>
 #include <linux/sysfs.h>
 
 #ifdef CONFIG_64BIT
@@ -177,5 +178,96 @@ void madvise_vma_pad_pages(struct vm_area_struct *vma,
 
 	vma_set_pad_pages(vma, nr_pad_pages);
 }
+
+static const char *pad_vma_name(struct vm_area_struct *vma)
+{
+	return "[page size compat]";
+}
+
+static const struct vm_operations_struct pad_vma_ops = {
+	.name = pad_vma_name,
+};
+
+/*
+ * Returns a new VMA representing the padding in @vma, if no padding
+ * in @vma returns NULL.
+ */
+struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma)
+{
+	struct vm_area_struct *pad;
+
+	if (!is_pgsize_migration_enabled() || !(vma->vm_flags & VM_PAD_MASK))
+		return NULL;
+
+	pad = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
+
+	*pad = *vma;
+
+	/* Remove file */
+	pad->vm_file = NULL;
+
+	/* Add vm_ops->name */
+	pad->vm_ops = &pad_vma_ops;
+
+	/* Adjust the start to begin at the start of the padding section */
+	pad->vm_start = VMA_PAD_START(pad);
+
+	/* Make the pad vma PROT_NONE */
+	pad->vm_flags = pad->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
+
+	/* Remove padding bits */
+	pad->vm_flags = pad->vm_flags & ~VM_PAD_MASK;
+
+	return pad;
+}
+
+/*
+ * Returns a new VMA exclusing the padding from @vma; if no padding in
+ * @vma returns @vma.
+ */
+struct vm_area_struct *get_data_vma(struct vm_area_struct *vma)
+{
+	struct vm_area_struct *data;
+
+	if (!is_pgsize_migration_enabled() || !(vma->vm_flags & VM_PAD_MASK))
+		return vma;
+
+	data = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
+
+	*data = *vma;
+
+	/* Adjust the end to the start of the padding section */
+	data->vm_end = VMA_PAD_START(data);
+
+	return data;
+}
+
+/*
+ * Calls the show_pad_vma_fn on the @pad VMA, and frees the copies of @vma
+ * and @pad.
+ */
+void show_map_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *pad,
+		      struct seq_file *m, show_pad_vma_fn func)
+{
+	if (!pad)
+		return;
+
+	/*
+	 * This cannot happen. If @pad vma was allocated the corresponding
+	 * @vma should have the VM_PAD_MASK bit(s) set.
+	 */
+	BUG_ON(!(vma->vm_flags & VM_PAD_MASK));
+
+	/*
+	 * This cannot happen. @pad is a section of the original VMA.
+	 * Therefore @vma cannot be null if @pad is not null.
+	 */
+	BUG_ON(!vma);
+
+	func(m, pad);
+
+	kfree(pad);
+	kfree(vma);
+}
 #endif /* PAGE_SIZE == SZ_4K */
 #endif /* CONFIG_64BIT */

From 264477e0d844ee272cbf7d1bc277705b2f7232ad Mon Sep 17 00:00:00 2001
From: Varad Gautam <varadgautam@google.com>
Date: Fri, 12 Apr 2024 14:32:14 +0000
Subject: [PATCH 60/98] ANDROID: Update the ABI symbol list

Adding the following symbols:
  - iov_iter_kvec
  - seq_read_iter

1 function symbol(s) added
  'ssize_t seq_read_iter(struct kiocb*, struct iov_iter*)'

Bug: 332885803
Change-Id: I4068f8a28395deee9a7bcd1cccf786cdd169f0c1
Signed-off-by: Varad Gautam <varadgautam@google.com>
---
 android/abi_gki_aarch64.stg   | 10 ++++++++++
 android/abi_gki_aarch64_pixel |  2 ++
 2 files changed, 12 insertions(+)

diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg
index a1c1af173972..97fca3ab154c 100644
--- a/android/abi_gki_aarch64.stg
+++ b/android/abi_gki_aarch64.stg
@@ -350351,6 +350351,15 @@ elf_symbol {
   type_id: 0x12e4741f
   full_name: "seq_read"
 }
+elf_symbol {
+  id: 0x8ad22c43
+  name: "seq_read_iter"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x3bf0fbdb
+  type_id: 0x16637235
+  full_name: "seq_read_iter"
+}
 elf_symbol {
   id: 0x91763ae6
   name: "seq_release"
@@ -368839,6 +368848,7 @@ interface {
   symbol_id: 0x25bebf3b
   symbol_id: 0x59b4ca07
   symbol_id: 0xba8007cd
+  symbol_id: 0x8ad22c43
   symbol_id: 0x91763ae6
   symbol_id: 0x2cc9ecc6
   symbol_id: 0x56c495a4
diff --git a/android/abi_gki_aarch64_pixel b/android/abi_gki_aarch64_pixel
index beed11452de5..96052847087d 100644
--- a/android/abi_gki_aarch64_pixel
+++ b/android/abi_gki_aarch64_pixel
@@ -1106,6 +1106,7 @@
   io_schedule_timeout
   iounmap
   iov_iter_bvec
+  iov_iter_kvec
   ip_send_check
   iput
   __irq_alloc_descs
@@ -1837,6 +1838,7 @@
   seq_putc
   seq_puts
   seq_read
+  seq_read_iter
   seq_release
   seq_release_private
   seq_write

From 25ebc0917814352a3a25edb0b1c0a1725d4cae55 Mon Sep 17 00:00:00 2001
From: Oven <liyangouwen1@oppo.com>
Date: Wed, 10 Apr 2024 19:23:08 +0800
Subject: [PATCH 61/98] ANDROID: mm: fix incorrect unlock mmap_lock for
 speculative swap fault

In a20b68c396127cd6387f37845c5bc05e44e2fd0e, SPF is supported for swap
fault. But in __lock_page_or_retry(), it will unlock mmap_lock
unconditionally. That will cause unpaired lock release in handling SPF.

Bug: 333508035
Change-Id: Ia1da66c85e0d58883cf518f10cd33fc5cad387b8
Signed-off-by: Oven <liyangouwen1@oppo.com>
(cherry picked from commit 63070883166ae63620a87d958319deba86f236ae)
---
 mm/filemap.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index c659d7bf7a81..7b17a22943aa 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1710,7 +1710,8 @@ __sched int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 		if (flags & FAULT_FLAG_RETRY_NOWAIT)
 			return 0;
 
-		mmap_read_unlock(mm);
+		if (!(flags & FAULT_FLAG_SPECULATIVE))
+			mmap_read_unlock(mm);
 		if (flags & FAULT_FLAG_KILLABLE)
 			wait_on_page_locked_killable(page);
 		else
@@ -1722,7 +1723,8 @@ __sched int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 
 		ret = __lock_page_killable(page);
 		if (ret) {
-			mmap_read_unlock(mm);
+			if (!(flags & FAULT_FLAG_SPECULATIVE))
+				mmap_read_unlock(mm);
 			return 0;
 		}
 	} else {

From aa07d6b28d2ce69654e8da4e73fa7868dc7ce604 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@google.com>
Date: Thu, 11 Apr 2024 09:43:13 -0700
Subject: [PATCH 62/98] ANDROID: scsi: ufs: Unexport
 ufshcd_mcq_poll_cqe_nolock()

Unexport this function because it is not used outside the UFSHCI core
driver and because it is not possible to use this function from outside
the UFSHCI core driver without triggering a race condition.

Bug: 312786487
Bug: 326329246
Bug: 333069246
Bug: 333317508
Change-Id: I1bb504b0310c3618db94e9401ff4f7e13633d6a0
Signed-off-by: Bart Van Assche <bvanassche@google.com>
---
 drivers/ufs/core/ufs-mcq.c | 1 -
 include/ufs/ufshcd.h       | 2 --
 2 files changed, 3 deletions(-)

diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index 202ff71e1b58..cf06702e1b8f 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -294,7 +294,6 @@ unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba,
 
 	return completed_reqs;
 }
-EXPORT_SYMBOL_GPL(ufshcd_mcq_poll_cqe_nolock);
 
 unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
 				       struct ufs_hw_queue *hwq)
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index b381daaac32c..91f11f37b390 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -1298,8 +1298,6 @@ void ufshcd_update_evt_hist(struct ufs_hba *hba, u32 id, u32 val);
 void ufshcd_hba_stop(struct ufs_hba *hba);
 void ufshcd_schedule_eh_work(struct ufs_hba *hba);
 void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i);
-unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba,
-					 struct ufs_hw_queue *hwq);
 void ufshcd_mcq_enable_esi(struct ufs_hba *hba);
 void ufshcd_mcq_config_esi(struct ufs_hba *hba, struct msi_msg *msg);
 

From 0fcd7a1c7ca498527c2a56f0413594a797e451ac Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 11 Apr 2024 15:49:11 -0700
Subject: [PATCH 63/98] BACKPORT: FROMLIST: scsi: ufs: Make ufshcd_poll()
 complain about unsupported arguments

The ufshcd_poll() implementation does not support queue_num ==
UFSHCD_POLL_FROM_INTERRUPT_CONTEXT in MCQ mode. Hence complain
if queue_num == UFSHCD_POLL_FROM_INTERRUPT_CONTEXT in MCQ mode.

Bug: 312786487
Bug: 326329246
Bug: 333069246
Bug: 333317508
Link: https://lore.kernel.org/linux-scsi/20240416171357.1062583-1-bvanassche@acm.org/T/#mf141ffd0528e062eccaceb98f326abae709da3c1
Change-Id: I4182872aa86ed84f074a3f11364138cfde19e74b
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Bart Van Assche <bvanassche@google.com>
---
 drivers/ufs/core/ufshcd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index fdf196d70658..fb9d88e5778a 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -5540,6 +5540,7 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
 	struct ufs_hw_queue *hwq;
 
 	if (is_mcq_enabled(hba)) {
+		WARN_ON_ONCE(queue_num == UFSHCD_POLL_FROM_INTERRUPT_CONTEXT);
 		hwq = &hba->uhq[queue_num + UFSHCD_MCQ_IO_QUEUE_OFFSET];
 
 		return ufshcd_mcq_poll_cqe_lock(hba, hwq);

From 8563ce58954679187f0c269a504520d8def37e41 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 11 Apr 2024 15:49:25 -0700
Subject: [PATCH 64/98] BACKPORT: FROMLIST: scsi: ufs: Make the polling code
 report which command has been completed

Prepare for introducing a new __ufshcd_poll() caller that will need to
know whether or not a specific command has been completed.

Bug: 312786487
Bug: 326329246
Bug: 333069246
Bug: 333317508
Link: https://lore.kernel.org/linux-scsi/20240416171357.1062583-1-bvanassche@acm.org/T/#m68901e4f4e2437e7d0cb747049006ab19f57e038
Change-Id: I1b25b095b4bf9fbf175aa963ec85fcbbcb2be0ed
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Bart Van Assche <bvanassche@google.com>
---
 drivers/ufs/core/ufs-mcq.c     | 22 ++++++++++++++-------
 drivers/ufs/core/ufshcd-priv.h | 10 ++++++----
 drivers/ufs/core/ufshcd.c      | 36 ++++++++++++++++++++++++----------
 3 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index cf06702e1b8f..693a7da672a0 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -268,23 +268,29 @@ static int ufshcd_mcq_get_tag(struct ufs_hba *hba,
 	return div_u64(addr, sizeof(struct utp_transfer_cmd_desc));
 }
 
-static void ufshcd_mcq_process_cqe(struct ufs_hba *hba,
-					    struct ufs_hw_queue *hwq)
+/* Returns true if and only if @compl_cmd has been completed. */
+static bool ufshcd_mcq_process_cqe(struct ufs_hba *hba,
+				   struct ufs_hw_queue *hwq,
+				   struct scsi_cmnd *compl_cmd)
 {
 	struct cq_entry *cqe = ufshcd_mcq_cur_cqe(hwq);
 	int tag = ufshcd_mcq_get_tag(hba, hwq, cqe);
 
-	ufshcd_compl_one_cqe(hba, tag, cqe);
+	return ufshcd_compl_one_cqe(hba, tag, cqe, compl_cmd);
 }
 
+/* Clears *@compl_cmd if and only if *@compl_cmd has been completed. */
 unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba,
-					 struct ufs_hw_queue *hwq)
+					 struct ufs_hw_queue *hwq,
+					 struct scsi_cmnd **compl_cmd)
 {
 	unsigned long completed_reqs = 0;
 
 	ufshcd_mcq_update_cq_tail_slot(hwq);
 	while (!ufshcd_mcq_is_cq_empty(hwq)) {
-		ufshcd_mcq_process_cqe(hba, hwq);
+		if (ufshcd_mcq_process_cqe(hba, hwq,
+					   compl_cmd ? *compl_cmd : NULL))
+			*compl_cmd = NULL;
 		ufshcd_mcq_inc_cq_head_slot(hwq);
 		completed_reqs++;
 	}
@@ -295,13 +301,15 @@ unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba,
 	return completed_reqs;
 }
 
+/* Clears *@compl_cmd if and only if *@compl_cmd has been completed. */
 unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
-				       struct ufs_hw_queue *hwq)
+				       struct ufs_hw_queue *hwq,
+				       struct scsi_cmnd **compl_cmd)
 {
 	unsigned long completed_reqs, flags;
 
 	spin_lock_irqsave(&hwq->cq_lock, flags);
-	completed_reqs = ufshcd_mcq_poll_cqe_nolock(hba, hwq);
+	completed_reqs = ufshcd_mcq_poll_cqe_nolock(hba, hwq, compl_cmd);
 	spin_unlock_irqrestore(&hwq->cq_lock, flags);
 
 	return completed_reqs;
diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h
index 76e2d15ff698..8fd8156ce40a 100644
--- a/drivers/ufs/core/ufshcd-priv.h
+++ b/drivers/ufs/core/ufshcd-priv.h
@@ -56,8 +56,8 @@ int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode,
 int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode,
 	enum flag_idn idn, u8 index, bool *flag_res);
 void ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit);
-void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag,
-			  struct cq_entry *cqe);
+bool ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag,
+			  struct cq_entry *cqe, struct scsi_cmnd *compl_cmd);
 int ufshcd_mcq_init(struct ufs_hba *hba);
 int ufshcd_mcq_decide_queue_depth(struct ufs_hba *hba);
 int ufshcd_mcq_memory_alloc(struct ufs_hba *hba);
@@ -67,11 +67,13 @@ void ufshcd_mcq_select_mcq_mode(struct ufs_hba *hba);
 u32 ufshcd_mcq_read_cqis(struct ufs_hba *hba, int i);
 void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i);
 unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba,
-					 struct ufs_hw_queue *hwq);
+					 struct ufs_hw_queue *hwq,
+					 struct scsi_cmnd **compl_cmd);
 struct ufs_hw_queue *ufshcd_mcq_req_to_hwq(struct ufs_hba *hba,
 					   struct request *req);
 unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
-				       struct ufs_hw_queue *hwq);
+				       struct ufs_hw_queue *hwq,
+				       struct scsi_cmnd **compl_cmd);
 
 #define UFSHCD_MCQ_IO_QUEUE_OFFSET	1
 #define SD_ASCII_STD true
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index fb9d88e5778a..320986753b13 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -5463,9 +5463,12 @@ static void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
  * @hba: per adapter instance
  * @task_tag: the task tag of the request to be completed
  * @cqe: pointer to the completion queue entry
+ * @compl_cmd: if not NULL, check whether this command has been completed
+ *
+ * Returns: true if and only if @compl_cmd has been completed.
  */
-void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag,
-			  struct cq_entry *cqe)
+bool ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag,
+			  struct cq_entry *cqe, struct scsi_cmnd *compl_cmd)
 {
 	struct ufshcd_lrb *lrbp;
 	struct scsi_cmnd *cmd;
@@ -5482,6 +5485,7 @@ void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag,
 		ufshcd_release_scsi_cmd(hba, lrbp);
 		/* Do not touch lrbp after scsi done */
 		cmd->scsi_done(cmd);
+		return cmd == compl_cmd;
 	} else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE ||
 		   lrbp->command_type == UTP_CMD_TYPE_UFS_STORAGE) {
 		if (hba->dev_cmd.complete) {
@@ -5492,20 +5496,26 @@ void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag,
 			ufshcd_clk_scaling_update_busy(hba);
 		}
 	}
+	return false;
 }
 
 /**
  * __ufshcd_transfer_req_compl - handle SCSI and query command completion
  * @hba: per adapter instance
  * @completed_reqs: bitmask that indicates which requests to complete
+ * @compl_cmd: if not NULL, check whether *@compl_cmd has been completed.
+ *	Clear *@compl_cmd if it has been completed.
  */
 static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
-					unsigned long completed_reqs)
+					unsigned long completed_reqs,
+					struct scsi_cmnd **compl_cmd)
 {
 	int tag;
 
 	for_each_set_bit(tag, &completed_reqs, hba->nutrs)
-		ufshcd_compl_one_cqe(hba, tag, NULL);
+		if (ufshcd_compl_one_cqe(hba, tag, NULL,
+					 compl_cmd ? *compl_cmd : NULL))
+			*compl_cmd = NULL;
 }
 
 /* Any value that is not an existing queue number is fine for this constant. */
@@ -5532,7 +5542,8 @@ static void ufshcd_clear_polled(struct ufs_hba *hba,
  * Returns > 0 if one or more commands have been completed or 0 if no
  * requests have been completed.
  */
-static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
+static int __ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num,
+			 struct scsi_cmnd **compl_cmd)
 {
 	struct ufs_hba *hba = shost_priv(shost);
 	unsigned long completed_reqs, flags;
@@ -5543,7 +5554,7 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
 		WARN_ON_ONCE(queue_num == UFSHCD_POLL_FROM_INTERRUPT_CONTEXT);
 		hwq = &hba->uhq[queue_num + UFSHCD_MCQ_IO_QUEUE_OFFSET];
 
-		return ufshcd_mcq_poll_cqe_lock(hba, hwq);
+		return ufshcd_mcq_poll_cqe_lock(hba, hwq, compl_cmd);
 	}
 
 	spin_lock_irqsave(&hba->outstanding_lock, flags);
@@ -5560,11 +5571,16 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
 	spin_unlock_irqrestore(&hba->outstanding_lock, flags);
 
 	if (completed_reqs)
-		__ufshcd_transfer_req_compl(hba, completed_reqs);
+		__ufshcd_transfer_req_compl(hba, completed_reqs, compl_cmd);
 
 	return completed_reqs != 0;
 }
 
+static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
+{
+	return __ufshcd_poll(shost, queue_num, NULL);
+}
+
 /**
  * ufshcd_transfer_req_compl - handle SCSI and query command completion
  * @hba: per adapter instance
@@ -6820,7 +6836,7 @@ static irqreturn_t ufshcd_handle_mcq_cq_events(struct ufs_hba *hba)
 			ufshcd_mcq_write_cqis(hba, events, i);
 
 		if (events & UFSHCD_MCQ_CQIS_TAIL_ENT_PUSH_STS)
-			ufshcd_mcq_poll_cqe_nolock(hba, hwq);
+			ufshcd_mcq_poll_cqe_nolock(hba, hwq, NULL);
 	}
 
 	return IRQ_HANDLED;
@@ -7361,7 +7377,7 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
 		dev_err(hba->dev, "%s: failed to clear requests %#lx\n",
 			__func__, not_cleared);
 	}
-	__ufshcd_transfer_req_compl(hba, pending_reqs & ~not_cleared);
+	__ufshcd_transfer_req_compl(hba, pending_reqs & ~not_cleared, NULL);
 
 out:
 	hba->req_abort_count = 0;
@@ -7522,7 +7538,7 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
 		dev_err(hba->dev,
 		"%s: cmd was completed, but without a notifying intr, tag = %d",
 		__func__, tag);
-		__ufshcd_transfer_req_compl(hba, 1UL << tag);
+		__ufshcd_transfer_req_compl(hba, 1UL << tag, NULL);
 		goto release;
 	}
 

From 5725caa2965e7f68b5145272c12ac0bbe99b49e4 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 11 Apr 2024 15:49:39 -0700
Subject: [PATCH 65/98] FROMLIST: scsi: ufs: Check for completion from the
 timeout handler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If ufshcd_abort() returns SUCCESS for an already completed command then
that command is completed twice. This results in a crash. Prevent this by
checking whether a command has completed without completion interrupt from
the timeout handler. This CL fixes the following kernel crash:

Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
Call trace:
 dma_direct_map_sg+0x70/0x274
 scsi_dma_map+0x84/0x124
 ufshcd_queuecommand+0x3fc/0x880
 scsi_queue_rq+0x7d0/0x111c
 blk_mq_dispatch_rq_list+0x440/0xebc
 blk_mq_do_dispatch_sched+0x5a4/0x6b8
 __blk_mq_sched_dispatch_requests+0x150/0x220
 __blk_mq_run_hw_queue+0xf0/0x218
 __blk_mq_delay_run_hw_queue+0x8c/0x18c
 blk_mq_run_hw_queue+0x1a4/0x360
 blk_mq_sched_insert_requests+0x130/0x334
 blk_mq_flush_plug_list+0x138/0x234
 blk_flush_plug_list+0x118/0x164
 blk_finish_plug()
 read_pages+0x38c/0x408
 page_cache_ra_unbounded+0x230/0x2f8
 do_sync_mmap_readahead+0x1a4/0x208
 filemap_fault+0x27c/0x8f4
 f2fs_filemap_fault+0x28/0xfc
 __do_fault+0xc4/0x208
 handle_pte_fault+0x290/0xe04
 do_handle_mm_fault+0x52c/0x858
 do_page_fault+0x5dc/0x798
 do_translation_fault+0x40/0x54
 do_mem_abort+0x60/0x134
 el0_da+0x40/0xb8
 el0t_64_sync_handler+0xc4/0xe4
 el0t_64_sync+0x1b4/0x1b8

Bug: 312786487
Bug: 326329246
Bug: 333069246
Bug: 333317508
Link: https://lore.kernel.org/linux-scsi/20240416171357.1062583-1-bvanassche@acm.org/T/#mbfa6b7a56e07c792ddca7801fb8900f8370d4731
Change-Id: I48e93516d2aae3b2ad62b0b51144e8e2e39d7476
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Bart Van Assche <bvanassche@google.com>
---
 drivers/ufs/core/ufshcd.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 320986753b13..bdc1b3d34adb 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -8781,6 +8781,25 @@ out:
 static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd)
 {
 	struct ufs_hba *hba = shost_priv(scmd->device->host);
+	struct scsi_cmnd *cmd2 = scmd;
+
+	WARN_ON_ONCE(!scmd);
+
+	if (is_mcq_enabled(hba)) {
+		struct request *rq = scsi_cmd_to_rq(scmd);
+		struct ufs_hw_queue *hwq = ufshcd_mcq_req_to_hwq(hba, rq);
+
+		ufshcd_mcq_poll_cqe_lock(hba, hwq, &cmd2);
+	} else {
+		__ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT,
+			      &cmd2);
+	}
+	if (cmd2 == NULL) {
+		sdev_printk(KERN_INFO, scmd->device,
+			    "%s: cmd with tag %#x has already been completed\n",
+			    __func__, blk_mq_unique_tag(scsi_cmd_to_rq(scmd)));
+		return SCSI_EH_DONE;
+	}
 
 	if (!hba->system_suspending) {
 		/* Activate the error handler in the SCSI core. */

From 65e0a92c6d27d4cbaa0deef668df12b69853d65e Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 23 Jan 2024 09:08:53 -0800
Subject: [PATCH 66/98] UPSTREAM: af_unix: Do not use atomic ops for
 unix_sk(sk)->inflight.

[ Upstream commit 97af84a6bba2ab2b9c704c08e67de3b5ea551bb2 ]

When touching unix_sk(sk)->inflight, we are always under
spin_lock(&unix_gc_lock).

Let's convert unix_sk(sk)->inflight to the normal unsigned long.

Bug: 336226035
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20240123170856.41348-3-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Stable-dep-of: 47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()")
Signed-off-by: Sasha Levin <sashal@kernel.org>
(cherry picked from commit 301fdbaa0bba4653570f07789909939f977a7620)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: I0d965d5f2a863d798c06de9f21d0467f256b538e
---
 include/net/af_unix.h |  2 +-
 net/unix/af_unix.c    |  4 ++--
 net/unix/garbage.c    | 17 ++++++++---------
 net/unix/scm.c        |  8 +++++---
 4 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 7d142e8a0550..01f3aec7128c 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -62,7 +62,7 @@ struct unix_sock {
 	struct mutex		iolock, bindlock;
 	struct sock		*peer;
 	struct list_head	link;
-	atomic_long_t		inflight;
+	unsigned long		inflight;
 	spinlock_t		lock;
 	unsigned long		gc_flags;
 #define UNIX_GC_CANDIDATE	0
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f079a5bd909d..23bfe5d22a9a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -877,11 +877,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
 	sk->sk_write_space	= unix_write_space;
 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
 	sk->sk_destruct		= unix_sock_destructor;
-	u	  = unix_sk(sk);
+	u = unix_sk(sk);
+	u->inflight = 0;
 	u->path.dentry = NULL;
 	u->path.mnt = NULL;
 	spin_lock_init(&u->lock);
-	atomic_long_set(&u->inflight, 0);
 	INIT_LIST_HEAD(&u->link);
 	mutex_init(&u->iolock); /* single task reading lock */
 	mutex_init(&u->bindlock); /* single task binding lock */
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index dc2763540393..312474c23e9a 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -166,17 +166,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
 
 static void dec_inflight(struct unix_sock *usk)
 {
-	atomic_long_dec(&usk->inflight);
+	usk->inflight--;
 }
 
 static void inc_inflight(struct unix_sock *usk)
 {
-	atomic_long_inc(&usk->inflight);
+	usk->inflight++;
 }
 
 static void inc_inflight_move_tail(struct unix_sock *u)
 {
-	atomic_long_inc(&u->inflight);
+	u->inflight++;
+
 	/* If this still might be part of a cycle, move it to the end
 	 * of the list, so that it's checked even if it was already
 	 * passed over
@@ -237,14 +238,12 @@ void unix_gc(void)
 	 */
 	list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
 		long total_refs;
-		long inflight_refs;
 
 		total_refs = file_count(u->sk.sk_socket->file);
-		inflight_refs = atomic_long_read(&u->inflight);
 
-		BUG_ON(inflight_refs < 1);
-		BUG_ON(total_refs < inflight_refs);
-		if (total_refs == inflight_refs) {
+		BUG_ON(!u->inflight);
+		BUG_ON(total_refs < u->inflight);
+		if (total_refs == u->inflight) {
 			list_move_tail(&u->link, &gc_candidates);
 			__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
 			__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
@@ -271,7 +270,7 @@ void unix_gc(void)
 		/* Move cursor to after the current position. */
 		list_move(&cursor, &u->link);
 
-		if (atomic_long_read(&u->inflight) > 0) {
+		if (u->inflight) {
 			list_move_tail(&u->link, &not_cycle_list);
 			__clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
 			scan_children(&u->sk, inc_inflight_move_tail, NULL);
diff --git a/net/unix/scm.c b/net/unix/scm.c
index e8e2a00bb0f5..bdcda4e41f10 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -54,12 +54,13 @@ void unix_inflight(struct user_struct *user, struct file *fp)
 	if (s) {
 		struct unix_sock *u = unix_sk(s);
 
-		if (atomic_long_inc_return(&u->inflight) == 1) {
+		if (!u->inflight) {
 			BUG_ON(!list_empty(&u->link));
 			list_add_tail(&u->link, &gc_inflight_list);
 		} else {
 			BUG_ON(list_empty(&u->link));
 		}
+		u->inflight++;
 		/* Paired with READ_ONCE() in wait_for_unix_gc() */
 		WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
 	}
@@ -76,10 +77,11 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
 	if (s) {
 		struct unix_sock *u = unix_sk(s);
 
-		BUG_ON(!atomic_long_read(&u->inflight));
+		BUG_ON(!u->inflight);
 		BUG_ON(list_empty(&u->link));
 
-		if (atomic_long_dec_and_test(&u->inflight))
+		u->inflight--;
+		if (!u->inflight)
 			list_del_init(&u->link);
 		/* Paired with READ_ONCE() in wait_for_unix_gc() */
 		WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);

From e8e652b8c81afc06e9bc801f6eb4896516be2d62 Mon Sep 17 00:00:00 2001
From: Michal Luczaj <mhal@rbox.co>
Date: Tue, 9 Apr 2024 22:09:39 +0200
Subject: [PATCH 67/98] UPSTREAM: af_unix: Fix garbage collector racing against
 connect()

[ Upstream commit 47d8ac011fe1c9251070e1bd64cb10b48193ec51 ]

Garbage collector does not take into account the risk of embryo getting
enqueued during the garbage collection. If such embryo has a peer that
carries SCM_RIGHTS, two consecutive passes of scan_children() may see a
different set of children. Leading to an incorrectly elevated inflight
count, and then a dangling pointer within the gc_inflight_list.

sockets are AF_UNIX/SOCK_STREAM
S is an unconnected socket
L is a listening in-flight socket bound to addr, not in fdtable
V's fd will be passed via sendmsg(), gets inflight count bumped

connect(S, addr)	sendmsg(S, [V]); close(V)	__unix_gc()
----------------	-------------------------	-----------

NS = unix_create1()
skb1 = sock_wmalloc(NS)
L = unix_find_other(addr)
unix_state_lock(L)
unix_peer(S) = NS
			// V count=1 inflight=0

 			NS = unix_peer(S)
 			skb2 = sock_alloc()
			skb_queue_tail(NS, skb2[V])

			// V became in-flight
			// V count=2 inflight=1

			close(V)

			// V count=1 inflight=1
			// GC candidate condition met

						for u in gc_inflight_list:
						  if (total_refs == inflight_refs)
						    add u to gc_candidates

						// gc_candidates={L, V}

						for u in gc_candidates:
						  scan_children(u, dec_inflight)

						// embryo (skb1) was not
						// reachable from L yet, so V's
						// inflight remains unchanged
__skb_queue_tail(L, skb1)
unix_state_unlock(L)
						for u in gc_candidates:
						  if (u.inflight)
						    scan_children(u, inc_inflight_move_tail)

						// V count=1 inflight=2 (!)

If there is a GC-candidate listening socket, lock/unlock its state. This
makes GC wait until the end of any ongoing connect() to that socket. After
flipping the lock, a possibly SCM-laden embryo is already enqueued. And if
there is another embryo coming, it can not possibly carry SCM_RIGHTS. At
this point, unix_inflight() can not happen because unix_gc_lock is already
taken. Inflight graph remains unaffected.

Bug: 336226035
Fixes: 1fd05ba5a2f2 ("[AF_UNIX]: Rewrite garbage collector, fixes race.")
Signed-off-by: Michal Luczaj <mhal@rbox.co>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20240409201047.1032217-1-mhal@rbox.co
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
(cherry picked from commit 507cc232ffe53a352847893f8177d276c3b532a9)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: If321f78b8b3220f5a1caea4b5e9450f1235b0770
---
 net/unix/garbage.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 312474c23e9a..67b2c3bfa113 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -235,11 +235,22 @@ void unix_gc(void)
 	 * receive queues.  Other, non candidate sockets _can_ be
 	 * added to queue, so we must make sure only to touch
 	 * candidates.
+	 *
+	 * Embryos, though never candidates themselves, affect which
+	 * candidates are reachable by the garbage collector.  Before
+	 * being added to a listener's queue, an embryo may already
+	 * receive data carrying SCM_RIGHTS, potentially making the
+	 * passed socket a candidate that is not yet reachable by the
+	 * collector.  It becomes reachable once the embryo is
+	 * enqueued.  Therefore, we must ensure that no SCM-laden
+	 * embryo appears in a (candidate) listener's queue between
+	 * consecutive scan_children() calls.
 	 */
 	list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
+		struct sock *sk = &u->sk;
 		long total_refs;
 
-		total_refs = file_count(u->sk.sk_socket->file);
+		total_refs = file_count(sk->sk_socket->file);
 
 		BUG_ON(!u->inflight);
 		BUG_ON(total_refs < u->inflight);
@@ -247,6 +258,11 @@ void unix_gc(void)
 			list_move_tail(&u->link, &gc_candidates);
 			__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
 			__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
+
+			if (sk->sk_state == TCP_LISTEN) {
+				unix_state_lock(sk);
+				unix_state_unlock(sk);
+			}
 		}
 	}
 

From a02278f9908eff05b0d6e3988ab2cee076b0f770 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Fri, 12 Apr 2024 15:26:59 +0100
Subject: [PATCH 68/98] FROMGIT: coresight: etm4x: Do not hardcode IOMEM access
 for register restore

When we restore the register state for ETM4x, while coming back
from CPU idle, we hardcode IOMEM access. This is wrong and could
blow up for an ETM with system instructions access (and for ETE).

Fixes: f5bd523690d2 ("coresight: etm4x: Convert all register accesses")
Reported-by: Yabin Cui <yabinc@google.com>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Tested-by: Yabin Cui <yabinc@google.com>
Link: https://lore.kernel.org/r/20240412142702.2882478-2-suzuki.poulose@arm.com

Bug: 335234033
(cherry picked from commit 1e7ba33fa591de1cf60afffcabb45600b3607025
 https://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git
 next)
Change-Id: Id2ea066374933de51a90f1fca8304338b741845d
Signed-off-by: Yabin Cui <yabinc@google.com>
---
 drivers/hwtracing/coresight/coresight-etm4x-core.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index e7f32b655dde..b40d56c9203f 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -1745,8 +1745,10 @@ static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata)
 {
 	int i;
 	struct etmv4_save_state *state = drvdata->save_state;
-	struct csdev_access tmp_csa = CSDEV_ACCESS_IOMEM(drvdata->base);
-	struct csdev_access *csa = &tmp_csa;
+	struct csdev_access *csa = &drvdata->csdev->access;
+
+	if (WARN_ON(!drvdata->csdev))
+		return;
 
 	etm4_cs_unlock(drvdata, csa);
 	etm4x_relaxed_write32(csa, state->trcclaimset, TRCCLAIMSET);

From 6a08c9fb9d47ecd90d4f2401d9ba0e3f447614f0 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Fri, 12 Apr 2024 15:27:00 +0100
Subject: [PATCH 69/98] FROMGIT: coresight: etm4x: Do not save/restore Data
 trace control registers

ETM4x doesn't support Data trace on A class CPUs. As such do not access the
Data trace control registers during CPU idle. This could cause problems for
ETE. While at it, remove all references to the Data trace control registers.

Fixes: f188b5e76aae ("coresight: etm4x: Save/restore state across CPU low power states")
Reported-by: Yabin Cui <yabinc@google.com>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Tested-by: Yabin Cui <yabinc@google.com>
Link: https://lore.kernel.org/r/20240412142702.2882478-3-suzuki.poulose@arm.com

Bug: 335234033
(cherry picked from commit 5eb3a0c2c52368cb9902e9a6ea04888e093c487d
 https://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git
 next)
Change-Id: I06977d86aa2d876d166db0fac8fbccf48fd07229
Signed-off-by: Yabin Cui <yabinc@google.com>
---
 .../coresight/coresight-etm4x-core.c          |  6 ----
 drivers/hwtracing/coresight/coresight-etm4x.h | 28 -------------------
 2 files changed, 34 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index b40d56c9203f..248c63e0e15b 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -1641,9 +1641,6 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata)
 	state->trcvissctlr = etm4x_read32(csa, TRCVISSCTLR);
 	if (drvdata->nr_pe_cmp)
 		state->trcvipcssctlr = etm4x_read32(csa, TRCVIPCSSCTLR);
-	state->trcvdctlr = etm4x_read32(csa, TRCVDCTLR);
-	state->trcvdsacctlr = etm4x_read32(csa, TRCVDSACCTLR);
-	state->trcvdarcctlr = etm4x_read32(csa, TRCVDARCCTLR);
 
 	for (i = 0; i < drvdata->nrseqstate - 1; i++)
 		state->trcseqevr[i] = etm4x_read32(csa, TRCSEQEVRn(i));
@@ -1774,9 +1771,6 @@ static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata)
 	etm4x_relaxed_write32(csa, state->trcvissctlr, TRCVISSCTLR);
 	if (drvdata->nr_pe_cmp)
 		etm4x_relaxed_write32(csa, state->trcvipcssctlr, TRCVIPCSSCTLR);
-	etm4x_relaxed_write32(csa, state->trcvdctlr, TRCVDCTLR);
-	etm4x_relaxed_write32(csa, state->trcvdsacctlr, TRCVDSACCTLR);
-	etm4x_relaxed_write32(csa, state->trcvdarcctlr, TRCVDARCCTLR);
 
 	for (i = 0; i < drvdata->nrseqstate - 1; i++)
 		etm4x_relaxed_write32(csa, state->trcseqevr[i], TRCSEQEVRn(i));
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
index 1170fbe04d06..32daf11ec856 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -43,9 +43,6 @@
 #define TRCVIIECTLR			0x084
 #define TRCVISSCTLR			0x088
 #define TRCVIPCSSCTLR			0x08C
-#define TRCVDCTLR			0x0A0
-#define TRCVDSACCTLR			0x0A4
-#define TRCVDARCCTLR			0x0A8
 /* Derived resources registers */
 #define TRCSEQEVRn(n)			(0x100 + (n * 4)) /* n = 0-2 */
 #define TRCSEQRSTEVR			0x118
@@ -90,9 +87,6 @@
 /* Address Comparator registers n = 0-15 */
 #define TRCACVRn(n)			(0x400 + (n * 8))
 #define TRCACATRn(n)			(0x480 + (n * 8))
-/* Data Value Comparator Value registers, n = 0-7 */
-#define TRCDVCVRn(n)			(0x500 + (n * 16))
-#define TRCDVCMRn(n)			(0x580 + (n * 16))
 /* ContextID/Virtual ContextID comparators, n = 0-7 */
 #define TRCCIDCVRn(n)			(0x600 + (n * 8))
 #define TRCVMIDCVRn(n)			(0x640 + (n * 8))
@@ -174,9 +168,6 @@
 /* List of registers accessible via System instructions */
 #define ETM4x_ONLY_SYSREG_LIST(op, val)		\
 	CASE_##op((val), TRCPROCSELR)		\
-	CASE_##op((val), TRCVDCTLR)		\
-	CASE_##op((val), TRCVDSACCTLR)		\
-	CASE_##op((val), TRCVDARCCTLR)		\
 	CASE_##op((val), TRCOSLAR)
 
 #define ETM_COMMON_SYSREG_LIST(op, val)		\
@@ -324,22 +315,6 @@
 	CASE_##op((val), TRCACATRn(13))		\
 	CASE_##op((val), TRCACATRn(14))		\
 	CASE_##op((val), TRCACATRn(15))		\
-	CASE_##op((val), TRCDVCVRn(0))		\
-	CASE_##op((val), TRCDVCVRn(1))		\
-	CASE_##op((val), TRCDVCVRn(2))		\
-	CASE_##op((val), TRCDVCVRn(3))		\
-	CASE_##op((val), TRCDVCVRn(4))		\
-	CASE_##op((val), TRCDVCVRn(5))		\
-	CASE_##op((val), TRCDVCVRn(6))		\
-	CASE_##op((val), TRCDVCVRn(7))		\
-	CASE_##op((val), TRCDVCMRn(0))		\
-	CASE_##op((val), TRCDVCMRn(1))		\
-	CASE_##op((val), TRCDVCMRn(2))		\
-	CASE_##op((val), TRCDVCMRn(3))		\
-	CASE_##op((val), TRCDVCMRn(4))		\
-	CASE_##op((val), TRCDVCMRn(5))		\
-	CASE_##op((val), TRCDVCMRn(6))		\
-	CASE_##op((val), TRCDVCMRn(7))		\
 	CASE_##op((val), TRCCIDCVRn(0))		\
 	CASE_##op((val), TRCCIDCVRn(1))		\
 	CASE_##op((val), TRCCIDCVRn(2))		\
@@ -821,9 +796,6 @@ struct etmv4_save_state {
 	u32	trcviiectlr;
 	u32	trcvissctlr;
 	u32	trcvipcssctlr;
-	u32	trcvdctlr;
-	u32	trcvdsacctlr;
-	u32	trcvdarcctlr;
 
 	u32	trcseqevr[ETM_MAX_SEQ_STATES];
 	u32	trcseqrstevr;

From 8ba180228744f31ed67611eae51d86c62a1f7bae Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Fri, 12 Apr 2024 15:27:01 +0100
Subject: [PATCH 70/98] BACKPORT: FROMGIT: coresight: etm4x: Safe access for
 TRCQCLTR

ETM4x implements TRCQCLTR only when the Q elements are supported
and the Q element filtering is supported (TRCIDR0.QFILT). Access
to the register otherwise could be fatal. Fix this by tracking the
availability, like the others.

Fixes: f188b5e76aae ("coresight: etm4x: Save/restore state across CPU low power states")
Reported-by: Yabin Cui <yabinc@google.com>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Tested-by: Yabin Cui <yabinc@google.com>
Link: https://lore.kernel.org/r/20240412142702.2882478-4-suzuki.poulose@arm.com

Bug: 335234033
(cherry picked from commit 46bf8d7cd8530eca607379033b9bc4ac5590a0cd
 https://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git
 next)
Change-Id: Id848fa14ba8003149f76b5ca54562593f6164150
Signed-off-by: Yabin Cui <yabinc@google.com>
---
 drivers/hwtracing/coresight/coresight-etm4x-core.c | 8 ++++++--
 drivers/hwtracing/coresight/coresight-etm4x.h      | 4 ++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 248c63e0e15b..44c785c296ee 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -1113,6 +1113,8 @@ static void etm4_init_arch_data(void *info)
 	drvdata->nr_event = BMVAL(etmidr0, 10, 11);
 	/* QSUPP, bits[16:15] Q element support field */
 	drvdata->q_support = BMVAL(etmidr0, 15, 16);
+	if (drvdata->q_support)
+		drvdata->q_filt = !!(etmidr0 & TRCIDR0_QFILT);
 	/* TSSIZE, bits[28:24] Global timestamp size field */
 	drvdata->ts_size = BMVAL(etmidr0, 24, 28);
 
@@ -1634,7 +1636,8 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata)
 	state->trcccctlr = etm4x_read32(csa, TRCCCCTLR);
 	state->trcbbctlr = etm4x_read32(csa, TRCBBCTLR);
 	state->trctraceidr = etm4x_read32(csa, TRCTRACEIDR);
-	state->trcqctlr = etm4x_read32(csa, TRCQCTLR);
+	if (drvdata->q_filt)
+		state->trcqctlr = etm4x_read32(csa, TRCQCTLR);
 
 	state->trcvictlr = etm4x_read32(csa, TRCVICTLR);
 	state->trcviiectlr = etm4x_read32(csa, TRCVIIECTLR);
@@ -1764,7 +1767,8 @@ static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata)
 	etm4x_relaxed_write32(csa, state->trcccctlr, TRCCCCTLR);
 	etm4x_relaxed_write32(csa, state->trcbbctlr, TRCBBCTLR);
 	etm4x_relaxed_write32(csa, state->trctraceidr, TRCTRACEIDR);
-	etm4x_relaxed_write32(csa, state->trcqctlr, TRCQCTLR);
+	if (drvdata->q_filt)
+		etm4x_relaxed_write32(csa, state->trcqctlr, TRCQCTLR);
 
 	etm4x_relaxed_write32(csa, state->trcvictlr, TRCVICTLR);
 	etm4x_relaxed_write32(csa, state->trcviiectlr, TRCVIIECTLR);
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
index 32daf11ec856..6e8d4f0efab6 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -125,6 +125,8 @@
 
 #define TRCRSR_TA			BIT(12)
 
+#define TRCIDR0_QFILT				BIT(14)
+
 /*
  * System instructions to access ETM registers.
  * See ETMv4.4 spec ARM IHI0064F section 4.3.6 System instructions
@@ -867,6 +869,7 @@ struct etmv4_save_state {
  * @os_unlock:  True if access to management registers is allowed.
  * @instrp0:	Tracing of load and store instructions
  *		as P0 elements is supported.
+ * @q_filt:	Q element filtering support, if Q elements are supported.
  * @trcbb:	Indicates if the trace unit supports branch broadcast tracing.
  * @trccond:	If the trace unit supports conditional
  *		instruction tracing.
@@ -929,6 +932,7 @@ struct etmv4_drvdata {
 	bool				boot_enable;
 	bool				os_unlock;
 	bool				instrp0;
+	bool				q_filt;
 	bool				trcbb;
 	bool				trccond;
 	bool				retstack;

From 0ae4f32634d02104dcd1d7703c1f106f47722049 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Fri, 12 Apr 2024 15:27:02 +0100
Subject: [PATCH 71/98] FROMGIT: coresight: etm4x: Fix access to resource
 selector registers

Resource selector pair 0 is always implemented and reserved. We must not
touch it, even during save/restore for CPU Idle. Rest of the driver is
well behaved. Fix the offending ones.

Reported-by: Yabin Cui <yabinc@google.com>
Fixes: f188b5e76aae ("coresight: etm4x: Save/restore state across CPU low power states")
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Tested-by: Yabin Cui <yabinc@google.com>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
Link: https://lore.kernel.org/r/20240412142702.2882478-5-suzuki.poulose@arm.com

Bug: 335234033
(cherry picked from commit d6fc00d0f640d6010b51054aa8b0fd191177dbc9
 https://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git
 next)
Change-Id: I5f3385cb269969a299402fa258b30ab43e95805f
Signed-off-by: Yabin Cui <yabinc@google.com>
---
 drivers/hwtracing/coresight/coresight-etm4x-core.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 44c785c296ee..2608453620e8 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -1660,7 +1660,8 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata)
 		state->trccntvr[i] = etm4x_read32(csa, TRCCNTVRn(i));
 	}
 
-	for (i = 0; i < drvdata->nr_resource * 2; i++)
+	/* Resource selector pair 0 is reserved */
+	for (i = 2; i < drvdata->nr_resource * 2; i++)
 		state->trcrsctlr[i] = etm4x_read32(csa, TRCRSCTLRn(i));
 
 	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
@@ -1791,7 +1792,8 @@ static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata)
 		etm4x_relaxed_write32(csa, state->trccntvr[i], TRCCNTVRn(i));
 	}
 
-	for (i = 0; i < drvdata->nr_resource * 2; i++)
+	/* Resource selector pair 0 is reserved */
+	for (i = 2; i < drvdata->nr_resource * 2; i++)
 		etm4x_relaxed_write32(csa, state->trcrsctlr[i], TRCRSCTLRn(i));
 
 	for (i = 0; i < drvdata->nr_ss_cmp; i++) {

From 978f805a2d4b5a0bed9576750152af33f1b4fe45 Mon Sep 17 00:00:00 2001
From: seanwang1 <seanwang1@lenovo.com>
Date: Thu, 25 Apr 2024 17:08:03 +0800
Subject: [PATCH 72/98] ANDROID: GKI: Export css_task_iter_start()

Export css_task_iter_start() and css_task_iter_next() and
css_task_iter_end() inorder to support task iteration in a cgroup in
vendor modules.

Bug: 336967294

Change-Id: Id93963ddd30ab02c7a4d5086f19d15310e4eda14
Signed-off-by: seanwang1 <seanwang1@lenovo.com>
---
 kernel/cgroup/cgroup.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 532561757795..3de209d57c19 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4802,6 +4802,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
 
 	spin_unlock_irq(&css_set_lock);
 }
+EXPORT_SYMBOL_GPL(css_task_iter_start);
 
 /**
  * css_task_iter_next - return the next task for the iterator
@@ -4835,6 +4836,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
 
 	return it->cur_task;
 }
+EXPORT_SYMBOL_GPL(css_task_iter_next);
 
 /**
  * css_task_iter_end - finish task iteration
@@ -4857,6 +4859,7 @@ void css_task_iter_end(struct css_task_iter *it)
 	if (it->cur_task)
 		put_task_struct(it->cur_task);
 }
+EXPORT_SYMBOL_GPL(css_task_iter_end);
 
 static void cgroup_procs_release(struct kernfs_open_file *of)
 {

From 444a497469b1c30429daafdc3e4df7d52dc30ffa Mon Sep 17 00:00:00 2001
From: seanwang1 <seanwang1@lenovo.com>
Date: Sun, 28 Apr 2024 15:18:48 +0800
Subject: [PATCH 73/98] ANDROID: GKI: Update lenovo symbol list

3 function symbols added
  'void css_task_iter_end(struct css_task_iter*)'
  'struct task_struct* css_task_iter_next(struct css_task_iter*)'
  'void css_task_iter_start(struct cgroup_subsys_state*, unsigned int, struct css_task_iter*)'

Bug: 336967294
Change-Id: I7258e06fe9f1e21d73481d47a5cc54bb95e40646
Signed-off-by: seanwang1 <seanwang1@lenovo.com>
---
 android/abi_gki_aarch64.stg    | 138 +++++++++++++++++++++++++++++++++
 android/abi_gki_aarch64_lenovo |   3 +
 2 files changed, 141 insertions(+)

diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg
index 97fca3ab154c..5d9f00e5ac55 100644
--- a/android/abi_gki_aarch64.stg
+++ b/android/abi_gki_aarch64.stg
@@ -2498,6 +2498,11 @@ pointer_reference {
   kind: POINTER
   pointee_type_id: 0x02fa8d0a
 }
+pointer_reference {
+  id: 0x0a3309af
+  kind: POINTER
+  pointee_type_id: 0x028cc020
+}
 pointer_reference {
   id: 0x0a4d20f4
   kind: POINTER
@@ -62346,12 +62351,24 @@ member {
   type_id: 0x4585663f
   offset: 1376
 }
+member {
+  id: 0x6e05f60a
+  name: "cset_head"
+  type_id: 0x3e6239e1
+  offset: 192
+}
 member {
   id: 0xe75624b8
   name: "cset_links"
   type_id: 0xd3c80119
   offset: 4032
 }
+member {
+  id: 0x66e22acd
+  name: "cset_pos"
+  type_id: 0x3e6239e1
+  offset: 128
+}
 member {
   id: 0xf69ec4bf
   name: "csets"
@@ -63037,6 +63054,18 @@ member {
   type_id: 0x0d821a01
   offset: 384
 }
+member {
+  id: 0x07e78da0
+  name: "cur_cset"
+  type_id: 0x0d821a01
+  offset: 512
+}
+member {
+  id: 0xace31076
+  name: "cur_dcset"
+  type_id: 0x0d821a01
+  offset: 576
+}
 member {
   id: 0xc5936017
   name: "cur_format"
@@ -63137,12 +63166,24 @@ member {
   type_id: 0x31114896
   offset: 320
 }
+member {
+  id: 0x47642a41
+  name: "cur_task"
+  type_id: 0x1d19a9d5
+  offset: 640
+}
 member {
   id: 0x47642b4f
   name: "cur_task"
   type_id: 0x1d19a9d5
   offset: 448
 }
+member {
+  id: 0x2742c6d9
+  name: "cur_tasks_head"
+  type_id: 0x3e6239e1
+  offset: 448
+}
 member {
   id: 0x8b687155
   name: "curchunk_hdrlen"
@@ -102535,6 +102576,12 @@ member {
   type_id: 0x18bd6530
   offset: 320
 }
+member {
+  id: 0x46869f17
+  name: "iters_node"
+  type_id: 0xd3c80119
+  offset: 704
+}
 member {
   id: 0xa331f04a
   name: "itree"
@@ -166264,6 +166311,11 @@ member {
   type_id: 0x0a85fcb6
   offset: 704
 }
+member {
+  id: 0x8c864b85
+  name: "ss"
+  type_id: 0x0a85fcb6
+}
 member {
   id: 0x8c864f64
   name: "ss"
@@ -173254,6 +173306,12 @@ member {
   type_id: 0x49b889e7
   offset: 896
 }
+member {
+  id: 0x83eb8339
+  name: "task_pos"
+  type_id: 0x3e6239e1
+  offset: 384
+}
 member {
   id: 0x95bea2ba
   name: "task_running"
@@ -173821,6 +173879,18 @@ member {
   type_id: 0xc9082b19
   offset: 32
 }
+member {
+  id: 0x0e555c39
+  name: "tcset_head"
+  type_id: 0x3e6239e1
+  offset: 320
+}
+member {
+  id: 0x70b82471
+  name: "tcset_pos"
+  type_id: 0x3e6239e1
+  offset: 256
+}
 member {
   id: 0x3660523a
   name: "tctx_list"
@@ -203585,6 +203655,27 @@ struct_union {
     member_id: 0x56a72c33
   }
 }
+struct_union {
+  id: 0x028cc020
+  kind: STRUCT
+  name: "css_task_iter"
+  definition {
+    bytesize: 112
+    member_id: 0x8c864b85
+    member_id: 0x2d2d05d9
+    member_id: 0x66e22acd
+    member_id: 0x6e05f60a
+    member_id: 0x70b82471
+    member_id: 0x0e555c39
+    member_id: 0x83eb8339
+    member_id: 0x2742c6d9
+    member_id: 0x07e78da0
+    member_id: 0xace31076
+    member_id: 0x47642a41
+    member_id: 0x46869f17
+    member_id: 0x2d081688
+  }
+}
 struct_union {
   id: 0x209b5a60
   kind: STRUCT
@@ -264797,6 +264888,11 @@ function {
   return_type_id: 0x48b5725f
   parameter_id: 0x0a2e9ae5
 }
+function {
+  id: 0x121493f8
+  return_type_id: 0x48b5725f
+  parameter_id: 0x0a3309af
+}
 function {
   id: 0x12186f16
   return_type_id: 0x48b5725f
@@ -269831,6 +269927,13 @@ function {
   parameter_id: 0x2efd5036
   parameter_id: 0x18bd6530
 }
+function {
+  id: 0x1b4978c4
+  return_type_id: 0x48b5725f
+  parameter_id: 0x2b16c036
+  parameter_id: 0x4585663f
+  parameter_id: 0x0a3309af
+}
 function {
   id: 0x1b4c69f5
   return_type_id: 0x48b5725f
@@ -277489,6 +277592,11 @@ function {
   return_type_id: 0x19341e7e
   parameter_id: 0x2efe8065
 }
+function {
+  id: 0x79c16494
+  return_type_id: 0x1d19a9d5
+  parameter_id: 0x0a3309af
+}
 function {
   id: 0x79e2d3b1
   return_type_id: 0x18bd6530
@@ -319239,6 +319347,33 @@ elf_symbol {
   type_id: 0xc867c639
   full_name: "css_next_descendant_pre"
 }
+elf_symbol {
+  id: 0xcf7b4665
+  name: "css_task_iter_end"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x95e443ef
+  type_id: 0x121493f8
+  full_name: "css_task_iter_end"
+}
+elf_symbol {
+  id: 0x4223a490
+  name: "css_task_iter_next"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x5929335f
+  type_id: 0x79c16494
+  full_name: "css_task_iter_next"
+}
+elf_symbol {
+  id: 0x559ad4b8
+  name: "css_task_iter_start"
+  is_defined: true
+  symbol_type: FUNCTION
+  crc: 0x53460a34
+  type_id: 0x1b4978c4
+  full_name: "css_task_iter_start"
+}
 elf_symbol {
   id: 0x81bdacc5
   name: "csum_ipv6_magic"
@@ -365396,6 +365531,9 @@ interface {
   symbol_id: 0x9b2d60fa
   symbol_id: 0xd36400de
   symbol_id: 0x2c08983d
+  symbol_id: 0xcf7b4665
+  symbol_id: 0x4223a490
+  symbol_id: 0x559ad4b8
   symbol_id: 0x81bdacc5
   symbol_id: 0x554af0ba
   symbol_id: 0xbb54c900
diff --git a/android/abi_gki_aarch64_lenovo b/android/abi_gki_aarch64_lenovo
index 7a8be12c77d4..1cc068b7f779 100644
--- a/android/abi_gki_aarch64_lenovo
+++ b/android/abi_gki_aarch64_lenovo
@@ -205,6 +205,9 @@
   cpu_topology
   crc32_le
   css_next_child
+  css_task_iter_end
+  css_task_iter_next
+  css_task_iter_start
   csum_partial
   _ctype
   debugfs_attr_read

From fb310d468a41c61f9dc9c0be165b7e021a5d2ca9 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 10 Apr 2024 21:05:13 +0200
Subject: [PATCH 74/98] UPSTREAM: netfilter: nft_set_pipapo: do not free live
 element

[ Upstream commit 3cfc9ec039af60dbd8965ae085b2c2ccdcfbe1cc ]

Pablo reports a crash with large batches of elements with a
back-to-back add/remove pattern.  Quoting Pablo:

  add_elem("00000000") timeout 100 ms
  ...
  add_elem("0000000X") timeout 100 ms
  del_elem("0000000X") <---------------- delete one that was just added
  ...
  add_elem("00005000") timeout 100 ms

  1) nft_pipapo_remove() removes element 0000000X
  Then, KASAN shows a splat.

Looking at the remove function there is a chance that we will drop a
rule that maps to a non-deactivated element.

Removal happens in two steps, first we do a lookup for key k and return the
to-be-removed element and mark it as inactive in the next generation.
Then, in a second step, the element gets removed from the set/map.

The _remove function does not work correctly if we have more than one
element that share the same key.

This can happen if we insert an element into a set when the set already
holds an element with same key, but the element mapping to the existing
key has timed out or is not active in the next generation.

In such case its possible that removal will unmap the wrong element.
If this happens, we will leak the non-deactivated element, it becomes
unreachable.

The element that got deactivated (and will be freed later) will
remain reachable in the set data structure, this can result in
a crash when such an element is retrieved during lookup (stale
pointer).

Add a check that the fully matching key does in fact map to the element
that we have marked as inactive in the deactivation step.
If not, we need to continue searching.

Add a bug/warn trap at the end of the function as well, the remove
function must not ever be called with an invisible/unreachable/non-existent
element.

v2: avoid uneeded temporary variable (Stefano)

Bug: 336735501
Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
Reported-by: Pablo Neira Ayuso <pablo@netfilter.org>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
(cherry picked from commit ebf7c9746f073035ee26209e38c3a1170f7b349a)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: Ic9a48ac9ac0f9960fea9e066d9a0a9fb93f7b633
---
 net/netfilter/nft_set_pipapo.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 050672ccfa7e..381c1871be27 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1975,6 +1975,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
 		rules_fx = rules_f0;
 
 		nft_pipapo_for_each_field(f, i, m) {
+			bool last = i == m->field_count - 1;
+
 			if (!pipapo_match_field(f, start, rules_fx,
 						match_start, match_end))
 				break;
@@ -1987,16 +1989,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
 
 			match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
 			match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
-		}
 
-		if (i == m->field_count) {
-			priv->dirty = true;
-			pipapo_drop(m, rulemap);
-			return;
+			if (last && f->mt[rulemap[i].to].e == e) {
+				priv->dirty = true;
+				pipapo_drop(m, rulemap);
+				return;
+			}
 		}
 
 		first_rule += rules_f0;
 	}
+
+	WARN_ON_ONCE(1); /* elem_priv not found */
 }
 
 /**

From 24e6758060b8a5bf7366892080d968962a5420e2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Sun, 16 Oct 2022 16:22:53 +0000
Subject: [PATCH 75/98] BACKPORT: rcu: Fix missing nocb gp wake on
 rcu_barrier()

In preparation for RCU lazy changes, wake up the RCU nocb gp thread if
needed after an entrain.  This change prevents the RCU barrier callback
from waiting in the queue for several seconds before the lazy callbacks
in front of it are serviced.

Reported-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
(cherry picked from commit b8f7aca3f0e0e6223094ba2662bac90353674b04
 https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git rcu/next)

(Backport:
Conflicts:
   kernel/rcu/tree.c
Due to missing 'rcu: Rework rcu_barrier() and callback-migration logic'
Chose not to backport that.)

Bug: 258241771
Bug: 222463781
Test: CQ
Change-Id: Ib55c5886764b74df22531eca35f076ef7acc08dd
Signed-off-by: Joel Fernandes <joelaf@google.com>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4062165
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
(cherry picked from commit fc6e55ea65dca9cc52bda6081341f3fcc87f6ee7)
[Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 kernel/rcu/tree.c      | 11 +++++++++++
 kernel/rcu/tree.h      |  1 +
 kernel/rcu/tree_nocb.h |  5 +++++
 3 files changed, 17 insertions(+)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index be627fb32a91..a0989afc9980 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -4003,12 +4003,21 @@ static void rcu_barrier_func(void *cpu_in)
 {
 	uintptr_t cpu = (uintptr_t)cpu_in;
 	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+	bool wake_nocb = false;
+	bool was_alldone = false;
 
 	rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);
 	rdp->barrier_head.func = rcu_barrier_callback;
 	debug_rcu_head_queue(&rdp->barrier_head);
 	rcu_nocb_lock(rdp);
+	/*
+	 * Flush bypass and wakeup rcuog if we add callbacks to an empty regular
+	 * queue. This way we don't wait for bypass timer that can reach seconds
+	 * if it's fully lazy.
+	 */
+	was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist);
 	WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
+	wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist);
 	if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {
 		atomic_inc(&rcu_state.barrier_cpu_count);
 	} else {
@@ -4017,6 +4026,8 @@ static void rcu_barrier_func(void *cpu_in)
 				  rcu_state.barrier_sequence);
 	}
 	rcu_nocb_unlock(rdp);
+	if (wake_nocb)
+		wake_nocb_gp(rdp, false);
 }
 
 /**
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 222a5a59f535..168b1b84b138 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -437,6 +437,7 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp);
 static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
 static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
 static void rcu_init_one_nocb(struct rcu_node *rnp);
+static bool wake_nocb_gp(struct rcu_data *rdp, bool force);
 static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
 				  unsigned long j);
 static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 8fdf44f8523f..1b74e65399e0 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1449,6 +1449,11 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
 {
 }
 
+static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
+{
+	return false;
+}
+
 static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
 				  unsigned long j)
 {

From 276d33f21a0ebbe96332dac67a2cd9ba1958346d Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Sat, 17 Sep 2022 16:41:59 +0000
Subject: [PATCH 76/98] UPSTREAM: rcu: Fix late wakeup when flush of bypass
 cblist happens

When the bypass cblist gets too big or its timeout has occurred, it is
flushed into the main cblist. However, the bypass timer is still running
and the behavior is that it would eventually expire and wake the GP
thread.

Since we are going to use the bypass cblist for lazy CBs, do the wakeup
soon as the flush for "too big or too long" bypass list happens.
Otherwise, long delays can happen for callbacks which get promoted from
lazy to non-lazy.

This is a good thing to do anyway (regardless of future lazy patches),
since it makes the behavior consistent with behavior of other code paths
where flushing into the ->cblist makes the GP kthread into a
non-sleeping state quickly.

[ Frederic Weisbecker: Changes to avoid unnecessary GP-thread wakeups plus
		    comment changes. ]

Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
(cherry picked from commit b50606f35f4b73c8e4c6b9c64fe7ba72ea919134)

Bug: 258241771
Bug: 222463781
Test: powerIdle lab tests.
Change-Id: If8da96d7ba6ed90a2a70f7d56f7bb03af44fd649
Signed-off-by: Joel Fernandes <joelaf@google.com>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4065239
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
(cherry picked from commit 75db04e1eed1756a4ee5fb87ef8dd494d19bf53f)
[Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 kernel/rcu/tree_nocb.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 1b74e65399e0..869f28eaddcf 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -438,8 +438,9 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
 	if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) ||
 	    ncbs >= qhimark) {
 		rcu_nocb_lock(rdp);
+		*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
+
 		if (!rcu_nocb_flush_bypass(rdp, rhp, j)) {
-			*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
 			if (*was_alldone)
 				trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
 						    TPS("FirstQ"));
@@ -452,7 +453,12 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
 			rcu_advance_cbs_nowake(rdp->mynode, rdp);
 			rdp->nocb_gp_adv_time = j;
 		}
-		rcu_nocb_unlock_irqrestore(rdp, flags);
+
+		// The flush succeeded and we moved CBs into the regular list.
+		// Don't wait for the wake up timer as it may be too far ahead.
+		// Wake up the GP thread now instead, if the cblist was empty.
+		__call_rcu_nocb_wake(rdp, *was_alldone, flags);
+
 		return true; // Callback already enqueued.
 	}
 

From e0297c38a54d51304c722405823a5e029ab6a091 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Sun, 16 Oct 2022 16:22:54 +0000
Subject: [PATCH 77/98] BACKPORT: rcu: Make call_rcu() lazy to save power

Implement timer-based RCU callback batching (also known as lazy
callbacks). With this we save about 5-10% of power consumed due
to RCU requests that happen when system is lightly loaded or idle.

By default, all async callbacks (queued via call_rcu) are marked
lazy. An alternate API call_rcu_hurry() is provided for the few users,
for example synchronize_rcu(), that need the old behavior.

The batch is flushed whenever a certain amount of time has passed, or
the batch on a particular CPU grows too big. Also memory pressure will
flush it in a future patch.

To handle several corner cases automagically (such as rcu_barrier() and
hotplug), we re-use bypass lists which were originally introduced to
address lock contention, to handle lazy CBs as well. The bypass list
length has the lazy CB length included in it. A separate lazy CB length
counter is also introduced to keep track of the number of lazy CBs.

[ paulmck: Fix formatting of inline call_rcu_lazy() definition. ]
[ paulmck: Apply Zqiang feedback. ]
[ paulmck: Apply s/call_rcu_flush/call_rcu_hurry/ feedback from Tejun Heo. ]

[ joelaf: Small changes for 5.15 backport. ]

Suggested-by: Paul McKenney <paulmck@kernel.org>
Acked-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>

Bug: 258241771
Bug: 222463781
Test: CQ
(cherry picked from commit 3cb278e73be58bfb780ecd55129296d2f74c1fb7
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master)
Change-Id: I557d5af2a5d317bd66e9ec55ed40822bb5c54390
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318045
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
Commit-Queue: Joel Fernandes <joelaf@google.com>
Tested-by: Joel Fernandes <joelaf@google.com>
(cherry picked from commit b30e520b9da88a5de115ed5b2c1b2aa89de9e214)
[Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 include/linux/rcupdate.h |   9 +++
 kernel/rcu/Kconfig       |   8 ++
 kernel/rcu/rcu.h         |   8 ++
 kernel/rcu/tiny.c        |   2 +-
 kernel/rcu/tree.c        |  46 +++++++++--
 kernel/rcu/tree.h        |  11 ++-
 kernel/rcu/tree_exp.h    |   2 +-
 kernel/rcu/tree_nocb.h   | 159 +++++++++++++++++++++++++++++++--------
 8 files changed, 201 insertions(+), 44 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 13bddb841ceb..3aad9ebfa7af 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -81,6 +81,15 @@ static inline int rcu_preempt_depth(void)
 
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 
+#ifdef CONFIG_RCU_LAZY
+void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func);
+#else
+static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
+{
+	call_rcu(head, func);
+}
+#endif
+
 /* Internal to kernel */
 void rcu_init(void);
 extern int rcu_scheduler_active __read_mostly;
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 066be3bb9d77..8e31e315a6f5 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -274,4 +274,12 @@ config TASKS_TRACE_RCU_READ_MB
 	  Say N here if you hate read-side memory barriers.
 	  Take the default if you are unsure.
 
+config RCU_LAZY
+	bool "RCU callback lazy invocation functionality"
+	depends on RCU_NOCB_CPU
+	default n
+	help
+	  To save power, batch RCU callbacks and flush after delay, memory
+	  pressure, or callback list growing too big.
+
 endmenu # "RCU Subsystem"
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 5510d2231c32..075c4e3ebab9 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -459,6 +459,14 @@ enum rcutorture_type {
 	INVALID_RCU_FLAVOR
 };
 
+#if defined(CONFIG_RCU_LAZY)
+unsigned long rcu_lazy_get_jiffies_till_flush(void);
+void rcu_lazy_set_jiffies_till_flush(unsigned long j);
+#else
+static inline unsigned long rcu_lazy_get_jiffies_till_flush(void) { return 0; }
+static inline void rcu_lazy_set_jiffies_till_flush(unsigned long j) { }
+#endif
+
 #if defined(CONFIG_TREE_RCU)
 void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
 			    unsigned long *gp_seq);
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 340b3f8b090d..457684ad1627 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -44,7 +44,7 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
 
 void rcu_barrier(void)
 {
-	wait_rcu_gp(call_rcu);
+	wait_rcu_gp(call_rcu_hurry);
 }
 EXPORT_SYMBOL(rcu_barrier);
 
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a0989afc9980..2c7138ac0b60 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2976,9 +2976,8 @@ static void check_cb_ovld(struct rcu_data *rdp)
 	raw_spin_unlock_rcu_node(rnp);
 }
 
-/* Helper function for call_rcu() and friends.  */
 static void
-__call_rcu(struct rcu_head *head, rcu_callback_t func)
+__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy)
 {
 	static atomic_t doublefrees;
 	unsigned long flags;
@@ -3019,7 +3018,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
 	}
 
 	check_cb_ovld(rdp);
-	if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags))
+	if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy))
 		return; // Enqueued onto ->nocb_bypass, so just leave.
 	// If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.
 	rcu_segcblist_enqueue(&rdp->cblist, head);
@@ -3042,8 +3041,40 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
 	}
 }
 
+#ifdef CONFIG_RCU_LAZY
+/**
+ * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and
+ * flush all lazy callbacks (including the new one) to the main ->cblist while
+ * doing so.
+ *
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed.
+ *
+ * Use this API instead of call_rcu() if you don't want the callback to be
+ * invoked after very long periods of time, which can happen on systems without
+ * memory pressure and on systems which are lightly loaded or mostly idle.
+ * This function will cause callbacks to be invoked sooner than later at the
+ * expense of extra power. Other than that, this function is identical to, and
+ * reuses call_rcu()'s logic. Refer to call_rcu() for more details about memory
+ * ordering and other functionality.
+ */
+void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
+{
+	return __call_rcu_common(head, func, false);
+}
+EXPORT_SYMBOL_GPL(call_rcu_hurry);
+#endif
+
 /**
  * call_rcu() - Queue an RCU callback for invocation after a grace period.
+ * By default the callbacks are 'lazy' and are kept hidden from the main
+ * ->cblist to prevent starting of grace periods too soon.
+ * If you desire grace periods to start very soon, use call_rcu_hurry().
+ *
  * @head: structure to be used for queueing the RCU updates.
  * @func: actual callback function to be invoked after the grace period
  *
@@ -3084,11 +3115,10 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
  */
 void call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
-	__call_rcu(head, func);
+	return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY));
 }
 EXPORT_SYMBOL_GPL(call_rcu);
 
-
 /* Maximum number of jiffies to wait before draining a batch. */
 #define KFREE_DRAIN_JIFFIES (HZ / 50)
 #define KFREE_N_BATCHES 2
@@ -3797,7 +3827,7 @@ void synchronize_rcu(void)
 	if (rcu_gp_is_expedited())
 		synchronize_rcu_expedited();
 	else
-		wait_rcu_gp(call_rcu);
+		wait_rcu_gp(call_rcu_hurry);
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu);
 
@@ -4016,7 +4046,7 @@ static void rcu_barrier_func(void *cpu_in)
 	 * if it's fully lazy.
 	 */
 	was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist);
-	WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
+	WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
 	wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist);
 	if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {
 		atomic_inc(&rcu_state.barrier_cpu_count);
@@ -4410,7 +4440,7 @@ void rcutree_migrate_callbacks(int cpu)
 	my_rdp = this_cpu_ptr(&rcu_data);
 	my_rnp = my_rdp->mynode;
 	rcu_nocb_lock(my_rdp); /* irqs already disabled. */
-	WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies));
+	WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies, false));
 	raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */
 	/* Leverage recent GPs and set GP for new callbacks. */
 	needwake = rcu_advance_cbs(my_rnp, rdp) ||
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 168b1b84b138..19809d07d92a 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -258,14 +258,16 @@ struct rcu_data {
 	short rcu_onl_gp_flags;		/* ->gp_flags at last online. */
 	unsigned long last_fqs_resched;	/* Time of last rcu_resched(). */
 
+	long lazy_len;			/* Length of buffered lazy callbacks. */
 	int cpu;
 };
 
 /* Values for nocb_defer_wakeup field in struct rcu_data. */
 #define RCU_NOCB_WAKE_NOT	0
 #define RCU_NOCB_WAKE_BYPASS	1
-#define RCU_NOCB_WAKE		2
-#define RCU_NOCB_WAKE_FORCE	3
+#define RCU_NOCB_WAKE_LAZY	2
+#define RCU_NOCB_WAKE		3
+#define RCU_NOCB_WAKE_FORCE	4
 
 #define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
 					/* For jiffies_till_first_fqs and */
@@ -439,9 +441,10 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
 static void rcu_init_one_nocb(struct rcu_node *rnp);
 static bool wake_nocb_gp(struct rcu_data *rdp, bool force);
 static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
-				  unsigned long j);
+				  unsigned long j, bool lazy);
 static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
-				bool *was_alldone, unsigned long flags);
+				bool *was_alldone, unsigned long flags,
+				bool lazy);
 static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
 				 unsigned long flags);
 static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index f36b812b595b..444f3b47f0b0 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -922,7 +922,7 @@ void synchronize_rcu_expedited(void)
 
 	/* If expedited grace periods are prohibited, fall back to normal. */
 	if (rcu_gp_is_normal()) {
-		wait_rcu_gp(call_rcu);
+		wait_rcu_gp(call_rcu_hurry);
 		return;
 	}
 
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 869f28eaddcf..2c30f32df7d1 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -261,6 +261,31 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
 	return __wake_nocb_gp(rdp_gp, rdp, force, flags);
 }
 
+/*
+ * LAZY_FLUSH_JIFFIES decides the maximum amount of time that
+ * can elapse before lazy callbacks are flushed. Lazy callbacks
+ * could be flushed much earlier for a number of other reasons
+ * however, LAZY_FLUSH_JIFFIES will ensure no lazy callbacks are
+ * left unsubmitted to RCU after those many jiffies.
+ */
+#define LAZY_FLUSH_JIFFIES (10 * HZ)
+static unsigned long jiffies_till_flush = LAZY_FLUSH_JIFFIES;
+
+#ifdef CONFIG_RCU_LAZY
+// To be called only from test code.
+void rcu_lazy_set_jiffies_till_flush(unsigned long jif)
+{
+	jiffies_till_flush = jif;
+}
+EXPORT_SYMBOL(rcu_lazy_set_jiffies_till_flush);
+
+unsigned long rcu_lazy_get_jiffies_till_flush(void)
+{
+	return jiffies_till_flush;
+}
+EXPORT_SYMBOL(rcu_lazy_get_jiffies_till_flush);
+#endif
+
 /*
  * Arrange to wake the GP kthread for this NOCB group at some future
  * time when it is safe to do so.
@@ -274,10 +299,14 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
 	raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
 
 	/*
-	 * Bypass wakeup overrides previous deferments. In case
-	 * of callback storm, no need to wake up too early.
+	 * Bypass wakeup overrides previous deferments. In case of
+	 * callback storms, no need to wake up too early.
 	 */
-	if (waketype == RCU_NOCB_WAKE_BYPASS) {
+	if (waketype == RCU_NOCB_WAKE_LAZY &&
+	    rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) {
+		mod_timer(&rdp_gp->nocb_timer, jiffies + jiffies_till_flush);
+		WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
+	} else if (waketype == RCU_NOCB_WAKE_BYPASS) {
 		mod_timer(&rdp_gp->nocb_timer, jiffies + 2);
 		WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
 	} else {
@@ -298,10 +327,13 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
  * proves to be initially empty, just return false because the no-CB GP
  * kthread may need to be awakened in this case.
  *
+ * Return true if there was something to be flushed and it succeeded, otherwise
+ * false.
+ *
  * Note that this function always returns true if rhp is NULL.
  */
 static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
-				     unsigned long j)
+				     unsigned long j, bool lazy)
 {
 	struct rcu_cblist rcl;
 
@@ -315,7 +347,20 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
 	/* Note: ->cblist.len already accounts for ->nocb_bypass contents. */
 	if (rhp)
 		rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
-	rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);
+
+	/*
+	 * If the new CB requested was a lazy one, queue it onto the main
+	 * ->cblist so we can take advantage of a sooner grade period.
+	 */
+	if (lazy && rhp) {
+		rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, NULL);
+		rcu_cblist_enqueue(&rcl, rhp);
+		WRITE_ONCE(rdp->lazy_len, 0);
+	} else {
+		rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);
+		WRITE_ONCE(rdp->lazy_len, 0);
+	}
+
 	rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl);
 	WRITE_ONCE(rdp->nocb_bypass_first, j);
 	rcu_nocb_bypass_unlock(rdp);
@@ -331,13 +376,13 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
  * Note that this function always returns true if rhp is NULL.
  */
 static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
-				  unsigned long j)
+				  unsigned long j, bool lazy)
 {
 	if (!rcu_rdp_is_offloaded(rdp))
 		return true;
 	rcu_lockdep_assert_cblist_protected(rdp);
 	rcu_nocb_bypass_lock(rdp);
-	return rcu_nocb_do_flush_bypass(rdp, rhp, j);
+	return rcu_nocb_do_flush_bypass(rdp, rhp, j, lazy);
 }
 
 /*
@@ -350,7 +395,7 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
 	if (!rcu_rdp_is_offloaded(rdp) ||
 	    !rcu_nocb_bypass_trylock(rdp))
 		return;
-	WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j));
+	WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false));
 }
 
 /*
@@ -372,12 +417,14 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
  * there is only one CPU in operation.
  */
 static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
-				bool *was_alldone, unsigned long flags)
+				bool *was_alldone, unsigned long flags,
+				bool lazy)
 {
 	unsigned long c;
 	unsigned long cur_gp_seq;
 	unsigned long j = jiffies;
 	long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
+	bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len));
 
 	lockdep_assert_irqs_disabled();
 
@@ -422,25 +469,29 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
 	// If there hasn't yet been all that many ->cblist enqueues
 	// this jiffy, tell the caller to enqueue onto ->cblist.  But flush
 	// ->nocb_bypass first.
-	if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy) {
+	// Lazy CBs throttle this back and do immediate bypass queuing.
+	if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy && !lazy) {
 		rcu_nocb_lock(rdp);
 		*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
 		if (*was_alldone)
 			trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
 					    TPS("FirstQ"));
-		WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j));
+
+		WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j, false));
 		WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
 		return false; // Caller must enqueue the callback.
 	}
 
 	// If ->nocb_bypass has been used too long or is too full,
 	// flush ->nocb_bypass to ->cblist.
-	if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) ||
+	if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) ||
+	    (ncbs &&  bypass_is_lazy &&
+	     (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush))) ||
 	    ncbs >= qhimark) {
 		rcu_nocb_lock(rdp);
 		*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
 
-		if (!rcu_nocb_flush_bypass(rdp, rhp, j)) {
+		if (!rcu_nocb_flush_bypass(rdp, rhp, j, lazy)) {
 			if (*was_alldone)
 				trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
 						    TPS("FirstQ"));
@@ -468,13 +519,24 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
 	ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
 	rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
 	rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
+
+	if (lazy)
+		WRITE_ONCE(rdp->lazy_len, rdp->lazy_len + 1);
+
 	if (!ncbs) {
 		WRITE_ONCE(rdp->nocb_bypass_first, j);
 		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ"));
 	}
 	rcu_nocb_bypass_unlock(rdp);
 	smp_mb(); /* Order enqueue before wake. */
-	if (ncbs) {
+	// A wake up of the grace period kthread or timer adjustment
+	// needs to be done only if:
+	// 1. Bypass list was fully empty before (this is the first
+	//    bypass list entry), or:
+	// 2. Both of these conditions are met:
+	//    a. The bypass list previously had only lazy CBs, and:
+	//    b. The new CB is non-lazy.
+	if (ncbs && (!bypass_is_lazy || lazy)) {
 		local_irq_restore(flags);
 	} else {
 		// No-CBs GP kthread might be indefinitely asleep, if so, wake.
@@ -502,8 +564,10 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
 				 unsigned long flags)
 				 __releases(rdp->nocb_lock)
 {
+	long bypass_len;
 	unsigned long cur_gp_seq;
 	unsigned long j;
+	long lazy_len;
 	long len;
 	struct task_struct *t;
 
@@ -517,9 +581,16 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
 	}
 	// Need to actually to a wakeup.
 	len = rcu_segcblist_n_cbs(&rdp->cblist);
+	bypass_len = rcu_cblist_n_cbs(&rdp->nocb_bypass);
+	lazy_len = READ_ONCE(rdp->lazy_len);
 	if (was_alldone) {
 		rdp->qlen_last_fqs_check = len;
-		if (!irqs_disabled_flags(flags)) {
+		// Only lazy CBs in bypass list
+		if (lazy_len && bypass_len == lazy_len) {
+			rcu_nocb_unlock_irqrestore(rdp, flags);
+			wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
+					   TPS("WakeLazy"));
+		} else if (!irqs_disabled_flags(flags)) {
 			/* ... if queue was empty ... */
 			rcu_nocb_unlock_irqrestore(rdp, flags);
 			wake_nocb_gp(rdp, false);
@@ -612,12 +683,12 @@ static inline bool nocb_gp_update_state_deoffloading(struct rcu_data *rdp,
 static void nocb_gp_wait(struct rcu_data *my_rdp)
 {
 	bool bypass = false;
-	long bypass_ncbs;
 	int __maybe_unused cpu = my_rdp->cpu;
 	unsigned long cur_gp_seq;
 	unsigned long flags;
 	bool gotcbs = false;
 	unsigned long j = jiffies;
+	bool lazy = false;
 	bool needwait_gp = false; // This prevents actual uninitialized use.
 	bool needwake;
 	bool needwake_gp;
@@ -634,9 +705,13 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
 	WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp);
 	for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) {
 		bool needwake_state = false;
+		long bypass_ncbs;
+		bool flush_bypass = false;
+		long lazy_ncbs;
 
 		if (!nocb_gp_enabled_cb(rdp))
 			continue;
+
 		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check"));
 		rcu_nocb_lock_irqsave(rdp, flags);
 		if (nocb_gp_update_state_deoffloading(rdp, &needwake_state)) {
@@ -646,22 +721,37 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
 			continue;
 		}
 		bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
-		if (bypass_ncbs &&
+		lazy_ncbs = READ_ONCE(rdp->lazy_len);
+
+		if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) &&
+		    (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush) ||
+		     bypass_ncbs > 2 * qhimark)) {
+			flush_bypass = true;
+		} else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) &&
 		    (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) ||
 		     bypass_ncbs > 2 * qhimark)) {
-			// Bypass full or old, so flush it.
-			(void)rcu_nocb_try_flush_bypass(rdp, j);
-			bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
+			flush_bypass = true;
 		} else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {
 			rcu_nocb_unlock_irqrestore(rdp, flags);
 			if (needwake_state)
 				swake_up_one(&rdp->nocb_state_wq);
 			continue; /* No callbacks here, try next. */
 		}
+
+		if (flush_bypass) {
+			// Bypass full or old, so flush it.
+			(void)rcu_nocb_try_flush_bypass(rdp, j);
+			bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
+			lazy_ncbs = READ_ONCE(rdp->lazy_len);
+		}
+
 		if (bypass_ncbs) {
 			trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
-					    TPS("Bypass"));
-			bypass = true;
+					    bypass_ncbs == lazy_ncbs ? TPS("Lazy") : TPS("Bypass"));
+			if (bypass_ncbs == lazy_ncbs)
+				lazy = true;
+			else
+				bypass = true;
 		}
 		rnp = rdp->mynode;
 
@@ -711,12 +801,20 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
 	my_rdp->nocb_gp_gp = needwait_gp;
 	my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0;
 
-	if (bypass && !rcu_nocb_poll) {
-		// At least one child with non-empty ->nocb_bypass, so set
-		// timer in order to avoid stranding its callbacks.
-		wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS,
-				   TPS("WakeBypassIsDeferred"));
+	// At least one child with non-empty ->nocb_bypass, so set
+	// timer in order to avoid stranding its callbacks.
+	if (!rcu_nocb_poll) {
+		// If bypass list only has lazy CBs. Add a deferred lazy wake up.
+		if (lazy && !bypass) {
+			wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_LAZY,
+					TPS("WakeLazyIsDeferred"));
+		// Otherwise add a deferred bypass wake up.
+		} else if (bypass) {
+			wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS,
+					TPS("WakeBypassIsDeferred"));
+		}
 	}
+
 	if (rcu_nocb_poll) {
 		/* Polling, so trace if first poll in the series. */
 		if (gotcbs)
@@ -995,7 +1093,7 @@ static long rcu_nocb_rdp_deoffload(void *arg)
 	 * return false, which means that future calls to rcu_nocb_try_bypass()
 	 * will refuse to put anything into the bypass.
 	 */
-	WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
+	WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
 	ret = rdp_offload_toggle(rdp, false, flags);
 	swait_event_exclusive(rdp->nocb_state_wq,
 			      !rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB |
@@ -1177,6 +1275,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
 	raw_spin_lock_init(&rdp->nocb_gp_lock);
 	timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
 	rcu_cblist_init(&rdp->nocb_bypass);
+	WRITE_ONCE(rdp->lazy_len, 0);
 }
 
 /*
@@ -1461,13 +1560,13 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
 }
 
 static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
-				  unsigned long j)
+				  unsigned long j, bool lazy)
 {
 	return true;
 }
 
 static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
-				bool *was_alldone, unsigned long flags)
+				bool *was_alldone, unsigned long flags, bool lazy)
 {
 	return false;
 }

From f4abe7bb5f0077047cf2e650a841c837c3bfe7b6 Mon Sep 17 00:00:00 2001
From: Vineeth Pillai <vineeth@bitbyteword.org>
Date: Sun, 16 Oct 2022 16:22:56 +0000
Subject: [PATCH 78/98] BACKPORT: rcu: Shrinker for lazy rcu

The shrinker is used to speed up the free'ing of memory potentially held
by RCU lazy callbacks. RCU kernel module test cases show this to be
effective. Test is introduced in a later patch.

[Joel: register_shrinker() argument list change.]

Bug: 258241771
Bug: 222463781
Test: CQ
Change-Id: I6a73a9dae79ff35feca37abe2663e55a0f46dda8
Signed-off-by: Vineeth Pillai <vineeth@bitbyteword.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
(cherry picked from commit c945b4da7a448a9a56becc5a8745d942b2b83d3c)
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318046
Tested-by: Joel Fernandes <joelaf@google.com>
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
Commit-Queue: Joel Fernandes <joelaf@google.com>
(cherry picked from commit 2cf50ca2e7c3bc08f5182fc517a89a65e8dca7e3)
[Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 kernel/rcu/tree_nocb.h | 52 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 2c30f32df7d1..a4daed924191 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1215,6 +1215,55 @@ int rcu_nocb_cpu_offload(int cpu)
 }
 EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload);
 
+static unsigned long
+lazy_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+	int cpu;
+	unsigned long count = 0;
+
+	/* Snapshot count of all CPUs */
+	for_each_possible_cpu(cpu) {
+		struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+		count +=  READ_ONCE(rdp->lazy_len);
+	}
+
+	return count ? count : SHRINK_EMPTY;
+}
+
+static unsigned long
+lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+	int cpu;
+	unsigned long flags;
+	unsigned long count = 0;
+
+	/* Snapshot count of all CPUs */
+	for_each_possible_cpu(cpu) {
+		struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+		int _count = READ_ONCE(rdp->lazy_len);
+
+		if (_count == 0)
+			continue;
+		rcu_nocb_lock_irqsave(rdp, flags);
+		WRITE_ONCE(rdp->lazy_len, 0);
+		rcu_nocb_unlock_irqrestore(rdp, flags);
+		wake_nocb_gp(rdp, false);
+		sc->nr_to_scan -= _count;
+		count += _count;
+		if (sc->nr_to_scan <= 0)
+			break;
+	}
+	return count ? count : SHRINK_STOP;
+}
+
+static struct shrinker lazy_rcu_shrinker = {
+	.count_objects = lazy_rcu_shrink_count,
+	.scan_objects = lazy_rcu_shrink_scan,
+	.batch = 0,
+	.seeks = DEFAULT_SEEKS,
+};
+
 void __init rcu_init_nohz(void)
 {
 	int cpu;
@@ -1240,6 +1289,9 @@ void __init rcu_init_nohz(void)
 		cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
 #endif /* #if defined(CONFIG_NO_HZ_FULL) */
 
+	if (register_shrinker(&lazy_rcu_shrinker))
+		pr_err("Failed to register lazy_rcu shrinker!\n");
+
 	if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
 		pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
 		cpumask_and(rcu_nocb_mask, cpu_possible_mask,

From 222a4cd66cd4483e3d28f0efe1284b6942b02e59 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Sun, 16 Oct 2022 16:22:55 +0000
Subject: [PATCH 79/98] UPSTREAM: rcu: Refactor code a bit in
 rcu_nocb_do_flush_bypass()

This consolidates the code a bit and makes it cleaner. Functionally it
is the same.

Bug: 258241771
Bug: 222463781
Test: CQ
Reported-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>

(cherry picked from commit 3d222a0c0cfef85bad2c9cff5d541836cb81cfbd)
Change-Id: I8422c7138edd6a476fc46374beefdf46dd76b8b0
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318047
Tested-by: Joel Fernandes <joelaf@google.com>
Reviewed-by: Sean Paul <sean@poorly.run>
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
Commit-Queue: Joel Fernandes <joelaf@google.com>
(cherry picked from commit 58cb433d445d2416ba26645e8df63d86afa15f8c)
[Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 kernel/rcu/tree_nocb.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index a4daed924191..b210af04c08a 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -332,10 +332,11 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
  *
  * Note that this function always returns true if rhp is NULL.
  */
-static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
+static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp_in,
 				     unsigned long j, bool lazy)
 {
 	struct rcu_cblist rcl;
+	struct rcu_head *rhp = rhp_in;
 
 	WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp));
 	rcu_lockdep_assert_cblist_protected(rdp);
@@ -350,16 +351,16 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
 
 	/*
 	 * If the new CB requested was a lazy one, queue it onto the main
-	 * ->cblist so we can take advantage of a sooner grade period.
+	 * ->cblist so that we can take advantage of the grace-period that will
+	 * happen regardless. But queue it onto the bypass list first so that
+	 * the lazy CB is ordered with the existing CBs in the bypass list.
 	 */
 	if (lazy && rhp) {
-		rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, NULL);
-		rcu_cblist_enqueue(&rcl, rhp);
-		WRITE_ONCE(rdp->lazy_len, 0);
-	} else {
-		rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);
-		WRITE_ONCE(rdp->lazy_len, 0);
+		rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
+		rhp = NULL;
 	}
+	rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);
+	WRITE_ONCE(rdp->lazy_len, 0);
 
 	rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl);
 	WRITE_ONCE(rdp->nocb_bypass_first, j);

From a4cc1aa22dd3c7146a2f67f089b0ee76ebbb9696 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Sun, 16 Oct 2022 16:22:59 +0000
Subject: [PATCH 80/98] UPSTREAM: rcu/sync: Use call_rcu_hurry() instead of
 call_rcu

call_rcu() changes to save power will slow down rcu sync. Use the
call_rcu_hurry() API instead which reverts to the old behavior.

[ paulmck: Apply s/call_rcu_flush/call_rcu_hurry/ feedback from Tejun Heo. ]

Bug: 258241771
Bug: 222463781
Test: CQ
Change-Id: I5123ba52f47676305dbcfa1233bf3b41f140766c
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
(cherry picked from commit 7651d6b25086656eacfdd8356bfe3a21c0c2d79d)
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318048
Reviewed-by: Sean Paul <sean@poorly.run>
Commit-Queue: Joel Fernandes <joelaf@google.com>
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
Tested-by: Joel Fernandes <joelaf@google.com>
(cherry picked from commit 183fce4e1bfbbae1266ec90c6bb871b51d7af81c)
[Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 kernel/rcu/sync.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 33d896d85902..c6bae4050d53 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -44,7 +44,7 @@ static void rcu_sync_func(struct rcu_head *rhp);
 
 static void rcu_sync_call(struct rcu_sync *rsp)
 {
-	call_rcu(&rsp->cb_head, rcu_sync_func);
+	call_rcu_hurry(&rsp->cb_head, rcu_sync_func);
 }
 
 /**

From ff22b562f0a6573bfb42acaacf777d68a76bde79 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Sun, 16 Oct 2022 16:22:58 +0000
Subject: [PATCH 81/98] UPSTREAM: percpu-refcount: Use call_rcu_hurry() for
 atomic switch

Earlier commits in this series allow battery-powered systems to build
their kernels with the default-disabled CONFIG_RCU_LAZY=y Kconfig option.
This Kconfig option causes call_rcu() to delay its callbacks in order to
batch callbacks.  This means that a given RCU grace period covers more
callbacks, thus reducing the number of grace periods, in turn reducing
the amount of energy consumed, which increases battery lifetime which
can be a very good thing.  This is not a subtle effect: In some important
use cases, the battery lifetime is increased by more than 10%.

This CONFIG_RCU_LAZY=y option is available only for CPUs that offload
callbacks, for example, CPUs mentioned in the rcu_nocbs kernel boot
parameter passed to kernels built with CONFIG_RCU_NOCB_CPU=y.

Delaying callbacks is normally not a problem because most callbacks do
nothing but free memory.  If the system is short on memory, a shrinker
will kick all currently queued lazy callbacks out of their laziness,
thus freeing their memory in short order.  Similarly, the rcu_barrier()
function, which blocks until all currently queued callbacks are invoked,
will also kick lazy callbacks, thus enabling rcu_barrier() to complete
in a timely manner.

However, there are some cases where laziness is not a good option.
For example, synchronize_rcu() invokes call_rcu(), and blocks until
the newly queued callback is invoked.  It would not be a good for
synchronize_rcu() to block for ten seconds, even on an idle system.
Therefore, synchronize_rcu() invokes call_rcu_hurry() instead of
call_rcu().  The arrival of a non-lazy call_rcu_hurry() callback on a
given CPU kicks any lazy callbacks that might be already queued on that
CPU.  After all, if there is going to be a grace period, all callbacks
might as well get full benefit from it.

Yes, this could be done the other way around by creating a
call_rcu_lazy(), but earlier experience with this approach and
feedback at the 2022 Linux Plumbers Conference shifted the approach
to call_rcu() being lazy with call_rcu_hurry() for the few places
where laziness is inappropriate.

And another call_rcu() instance that cannot be lazy is the one on the
percpu refcounter's "per-CPU to atomic switch" code path, which
uses RCU when switching to atomic mode.  The enqueued callback
wakes up waiters waiting in the percpu_ref_switch_waitq.  Allowing
this callback to be lazy would result in unacceptable slowdowns for
users of per-CPU refcounts, such as blk_pre_runtime_suspend().

Therefore, make __percpu_ref_switch_to_atomic() use call_rcu_hurry()
in order to revert to the old behavior.

[ paulmck: Apply s/call_rcu_flush/call_rcu_hurry/ feedback from Tejun Heo. ]

Bug: 258241771
Bug: 222463781
Test: CQ
Change-Id: Icc325f69d0df1a37b6f1de02a284e1fabf20e366
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: <linux-mm@kvack.org>
(cherry picked from commit 343a72e5e37d380b70534fae3acd7e5e39adb769)
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318049
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
Reviewed-by: Sean Paul <sean@poorly.run>
Tested-by: Joel Fernandes <joelaf@google.com>
Commit-Queue: Joel Fernandes <joelaf@google.com>
(cherry picked from commit dfd536f499642cd18679cc64c79a8fb275137f45)
[Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 lib/percpu-refcount.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index e5c5315da274..668f6aa6a75d 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -230,7 +230,8 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 		percpu_ref_noop_confirm_switch;
 
 	percpu_ref_get(ref);	/* put after confirmation */
-	call_rcu(&ref->data->rcu, percpu_ref_switch_to_atomic_rcu);
+	call_rcu_hurry(&ref->data->rcu,
+		       percpu_ref_switch_to_atomic_rcu);
 }
 
 static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)

From f12c162eac7c951a5548439fda98aeac53f1bf33 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Fri, 18 Nov 2022 19:19:08 +0000
Subject: [PATCH 82/98] UPSTREAM: net: Use call_rcu_hurry() for dst_release()

In a networking test on ChromeOS, kernels built with the new
CONFIG_RCU_LAZY=y Kconfig option fail a networking test in the teardown
phase.

This failure may be reproduced as follows: ip netns del <name>

The CONFIG_RCU_LAZY=y Kconfig option was introduced by earlier commits
in this series for the benefit of certain battery-powered systems.
This Kconfig option causes call_rcu() to delay its callbacks in order
to batch them.  This means that a given RCU grace period covers more
callbacks, thus reducing the number of grace periods, in turn reducing
the amount of energy consumed, which increases battery lifetime which
can be a very good thing.  This is not a subtle effect: In some important
use cases, the battery lifetime is increased by more than 10%.

This CONFIG_RCU_LAZY=y option is available only for CPUs that offload
callbacks, for example, CPUs mentioned in the rcu_nocbs kernel boot
parameter passed to kernels built with CONFIG_RCU_NOCB_CPU=y.

Delaying callbacks is normally not a problem because most callbacks do
nothing but free memory.  If the system is short on memory, a shrinker
will kick all currently queued lazy callbacks out of their laziness,
thus freeing their memory in short order.  Similarly, the rcu_barrier()
function, which blocks until all currently queued callbacks are invoked,
will also kick lazy callbacks, thus enabling rcu_barrier() to complete
in a timely manner.

However, there are some cases where laziness is not a good option.
For example, synchronize_rcu() invokes call_rcu(), and blocks until
the newly queued callback is invoked.  It would not be a good for
synchronize_rcu() to block for ten seconds, even on an idle system.
Therefore, synchronize_rcu() invokes call_rcu_hurry() instead of
call_rcu().  The arrival of a non-lazy call_rcu_hurry() callback on a
given CPU kicks any lazy callbacks that might be already queued on that
CPU.  After all, if there is going to be a grace period, all callbacks
might as well get full benefit from it.

Yes, this could be done the other way around by creating a
call_rcu_lazy(), but earlier experience with this approach and
feedback at the 2022 Linux Plumbers Conference shifted the approach
to call_rcu() being lazy with call_rcu_hurry() for the few places
where laziness is inappropriate.

Returning to the test failure, use of ftrace showed that this failure
cause caused by the aadded delays due to this new lazy behavior of
call_rcu() in kernels built with CONFIG_RCU_LAZY=y.

Therefore, make dst_release() use call_rcu_hurry() in order to revert
to the old test-failure-free behavior.

[ paulmck: Apply s/call_rcu_flush/call_rcu_hurry/ feedback from Tejun Heo. ]

Bug: 258241771
Bug: 222463781
Test: CQ
Change-Id: Ifd64083bd210a9dfe94c179152f27d310c179507
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: David Ahern <dsahern@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: <netdev@vger.kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
(cherry picked from commit 483c26ff63f42e8898ed43aca0b9953bc91f0cd4)
Signed-off-by: Joel Fernandes <joelaf@google.com>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318050
Reviewed-by: Sean Paul <sean@poorly.run>
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
(cherry picked from commit e0886387489fed8a60e7e0f107b95fb9c0241930)
[Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 net/core/dst.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/dst.c b/net/core/dst.c
index 497ef9b3fc6a..a64acdb69f56 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -174,7 +174,7 @@ void dst_release(struct dst_entry *dst)
 			net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
 					     __func__, dst, newrefcnt);
 		if (!newrefcnt)
-			call_rcu(&dst->rcu_head, dst_destroy_rcu);
+			call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu);
 	}
 }
 EXPORT_SYMBOL(dst_release);

From 856859371956b4d39ad7affe286f516d8253e736 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Thu, 12 Jan 2023 00:52:22 +0000
Subject: [PATCH 83/98] UPSTREAM: rcu: Track laziness during boot and suspend

Boot and suspend/resume should not be slowed down in kernels built with
CONFIG_RCU_LAZY=y.  In particular, suspend can sometimes fail in such
kernels.

This commit therefore adds rcu_async_hurry(), rcu_async_relax(), and
rcu_async_should_hurry() functions that track whether or not either
a boot or a suspend/resume operation is in progress.  This will
enable a later commit to refrain from laziness during those times.

Export rcu_async_should_hurry(), rcu_async_hurry(), and rcu_async_relax()
for later use by rcutorture.

[ paulmck: Apply feedback from Steve Rostedt. ]

Bug: 258241771
Bug: 222463781
Test: CQ
Fixes: 3cb278e73be5 ("rcu: Make call_rcu() lazy to save power")
Change-Id: Ieb2f2d484a33cfbd71f71c8e3dbcfc05cd7efe8c
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
(cherry picked from commit 6efdda8bec2900ce5166ee4ff4b1844b47b529cd)
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318051
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
Reviewed-by: Sean Paul <sean@poorly.run>
Tested-by: Joel Fernandes <joelaf@google.com>
Commit-Queue: Joel Fernandes <joelaf@google.com>
(cherry picked from commit 8bc7efc64c84da753f2174a7071c8f1a7823d2bb)
[Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 kernel/rcu/rcu.h    |  6 ++++++
 kernel/rcu/tree.c   |  2 ++
 kernel/rcu/update.c | 40 +++++++++++++++++++++++++++++++++++++++-
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 075c4e3ebab9..fd19addb1d27 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -428,14 +428,20 @@ do {									\
 /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
 static inline bool rcu_gp_is_normal(void) { return true; }
 static inline bool rcu_gp_is_expedited(void) { return false; }
+static inline bool rcu_async_should_hurry(void) { return false; }
 static inline void rcu_expedite_gp(void) { }
 static inline void rcu_unexpedite_gp(void) { }
+static inline void rcu_async_hurry(void) { }
+static inline void rcu_async_relax(void) { }
 static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
 #else /* #ifdef CONFIG_TINY_RCU */
 bool rcu_gp_is_normal(void);     /* Internal RCU use. */
 bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
+bool rcu_async_should_hurry(void);  /* Internal RCU use. */
 void rcu_expedite_gp(void);
 void rcu_unexpedite_gp(void);
+void rcu_async_hurry(void);
+void rcu_async_relax(void);
 void rcupdate_announce_bootup_oddness(void);
 #ifdef CONFIG_TASKS_RCU_GENERIC
 void show_rcu_tasks_gp_kthreads(void);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 2c7138ac0b60..f033bffd47e7 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -4478,11 +4478,13 @@ static int rcu_pm_notify(struct notifier_block *self,
 	switch (action) {
 	case PM_HIBERNATION_PREPARE:
 	case PM_SUSPEND_PREPARE:
+		rcu_async_hurry();
 		rcu_expedite_gp();
 		break;
 	case PM_POST_HIBERNATION:
 	case PM_POST_SUSPEND:
 		rcu_unexpedite_gp();
+		rcu_async_relax();
 		break;
 	default:
 		break;
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index c21b38cc25e9..a90458c7b4f2 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -144,8 +144,45 @@ bool rcu_gp_is_normal(void)
 }
 EXPORT_SYMBOL_GPL(rcu_gp_is_normal);
 
-static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1);
+static atomic_t rcu_async_hurry_nesting = ATOMIC_INIT(1);
+/*
+ * Should call_rcu() callbacks be processed with urgency or are
+ * they OK being executed with arbitrary delays?
+ */
+bool rcu_async_should_hurry(void)
+{
+	return !IS_ENABLED(CONFIG_RCU_LAZY) ||
+	       atomic_read(&rcu_async_hurry_nesting);
+}
+EXPORT_SYMBOL_GPL(rcu_async_should_hurry);
 
+/**
+ * rcu_async_hurry - Make future async RCU callbacks not lazy.
+ *
+ * After a call to this function, future calls to call_rcu()
+ * will be processed in a timely fashion.
+ */
+void rcu_async_hurry(void)
+{
+	if (IS_ENABLED(CONFIG_RCU_LAZY))
+		atomic_inc(&rcu_async_hurry_nesting);
+}
+EXPORT_SYMBOL_GPL(rcu_async_hurry);
+
+/**
+ * rcu_async_relax - Make future async RCU callbacks lazy.
+ *
+ * After a call to this function, future calls to call_rcu()
+ * will be processed in a lazy fashion.
+ */
+void rcu_async_relax(void)
+{
+	if (IS_ENABLED(CONFIG_RCU_LAZY))
+		atomic_dec(&rcu_async_hurry_nesting);
+}
+EXPORT_SYMBOL_GPL(rcu_async_relax);
+
+static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1);
 /*
  * Should normal grace-period primitives be expedited?  Intended for
  * use within RCU.  Note that this function takes the rcu_expedited
@@ -195,6 +232,7 @@ static bool rcu_boot_ended __read_mostly;
 void rcu_end_inkernel_boot(void)
 {
 	rcu_unexpedite_gp();
+	rcu_async_relax();
 	if (rcu_normal_after_boot)
 		WRITE_ONCE(rcu_normal, 1);
 	rcu_boot_ended = true;

From 706e751b3331d85ba38dbe5654961fa0eb155715 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Thu, 12 Jan 2023 00:52:23 +0000
Subject: [PATCH 84/98] UPSTREAM: rcu: Disable laziness if lazy-tracking says
 so

During suspend, we see failures to suspend 1 in 300-500 suspends.
Looking closer, it appears that asynchronous RCU callbacks are being
queued as lazy even though synchronous callbacks are expedited. These
delays appear to not be very welcome by the suspend/resume code as
evidenced by these occasional suspend failures.

This commit modifies call_rcu() to check if rcu_async_should_hurry(),
which will return true if we are in suspend or in-kernel boot.

[ paulmck: Alphabetize local variables. ]

Ignoring the lazy hint makes the 3000 suspend/resume cycles pass
reliably on a 12th gen 12-core Intel CPU, and there is some evidence
that it also slightly speeds up boot performance.

Bug: 258241771
Bug: 222463781
Test: CQ
Fixes: 3cb278e73be5 ("rcu: Make call_rcu() lazy to save power")
Change-Id: I4cfe6f43de8bae9a6c034831c79d9773199d6d29
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
(cherry picked from commit cf7066b97e27b2319af1ae2ef6889c4a1704312d)
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318052
Reviewed-by: Sean Paul <sean@poorly.run>
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
Tested-by: Joel Fernandes <joelaf@google.com>
Commit-Queue: Joel Fernandes <joelaf@google.com>
(cherry picked from commit e59686da91b689d3771a09f3eae37db5f40d3f75)
[Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 kernel/rcu/tree.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f033bffd47e7..1f7f05aa11a5 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2977,10 +2977,11 @@ static void check_cb_ovld(struct rcu_data *rdp)
 }
 
 static void
-__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy)
+__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in)
 {
 	static atomic_t doublefrees;
 	unsigned long flags;
+	bool lazy;
 	struct rcu_data *rdp;
 	bool was_alldone;
 
@@ -3005,6 +3006,7 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy)
 	local_irq_save(flags);
 	kasan_record_aux_stack_noalloc(head);
 	rdp = this_cpu_ptr(&rcu_data);
+	lazy = lazy_in && !rcu_async_should_hurry();
 
 	/* Add the callback to our list. */
 	if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist))) {

From 930bdc0924849b9e0d7f660e440dff182ab04259 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 18 Nov 2022 19:19:09 +0000
Subject: [PATCH 85/98] UPSTREAM: net: devinet: Reduce refcount before grace
 period

Currently, the inetdev_destroy() function waits for an RCU grace period
before decrementing the refcount and freeing memory. This causes a delay
with a new RCU configuration that tries to save power, which results in the
network interface disappearing later than expected. The resulting delay
causes test failures on ChromeOS.

Refactor the code such that the refcount is freed before the grace period
and memory is freed after. With this a ChromeOS network test passes that
does 'ip netns del' and polls for an interface disappearing, now passes.

Bug: 258241771
Bug: 222463781
Test: CQ
Reported-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Change-Id: I98b13c5a8fb9696c1111219d774cf91c8b14b4c5
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: David Ahern <dsahern@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: <netdev@vger.kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
(cherry picked from commit 9d40c84cf5bcb5b1d124921ded2056d76be7640d)
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318054
Tested-by: Joel Fernandes <joelaf@google.com>
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
Commit-Queue: Joel Fernandes <joelaf@google.com>
Reviewed-by: Sean Paul <sean@poorly.run>
(cherry picked from commit 3c0f4bb182d6b0be5424947b53019e92bea8b38c)
[Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 net/ipv4/devinet.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index c511751c2f41..b95ccab89acd 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -231,13 +231,20 @@ static void inet_free_ifa(struct in_ifaddr *ifa)
 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
 }
 
+static void in_dev_free_rcu(struct rcu_head *head)
+{
+	struct in_device *idev = container_of(head, struct in_device, rcu_head);
+
+	kfree(rcu_dereference_protected(idev->mc_hash, 1));
+	kfree(idev);
+}
+
 void in_dev_finish_destroy(struct in_device *idev)
 {
 	struct net_device *dev = idev->dev;
 
 	WARN_ON(idev->ifa_list);
 	WARN_ON(idev->mc_list);
-	kfree(rcu_dereference_protected(idev->mc_hash, 1));
 #ifdef NET_REFCNT_DEBUG
 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
 #endif
@@ -245,7 +252,7 @@ void in_dev_finish_destroy(struct in_device *idev)
 	if (!idev->dead)
 		pr_err("Freeing alive in_device %p\n", idev);
 	else
-		kfree(idev);
+		call_rcu(&idev->rcu_head, in_dev_free_rcu);
 }
 EXPORT_SYMBOL(in_dev_finish_destroy);
 
@@ -295,12 +302,6 @@ out_kfree:
 	goto out;
 }
 
-static void in_dev_rcu_put(struct rcu_head *head)
-{
-	struct in_device *idev = container_of(head, struct in_device, rcu_head);
-	in_dev_put(idev);
-}
-
 static void inetdev_destroy(struct in_device *in_dev)
 {
 	struct net_device *dev;
@@ -325,7 +326,7 @@ static void inetdev_destroy(struct in_device *in_dev)
 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
 	arp_ifdown(dev);
 
-	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
+	in_dev_put(in_dev);
 }
 
 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)

From a4124a21b121fef29df694a8771239a76bb87166 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Sun, 16 Oct 2022 16:23:04 +0000
Subject: [PATCH 86/98] ANDROID: rxrpc: Use call_rcu_hurry() instead of
 call_rcu()

call_rcu() changes to save power may cause slowness. Use the
call_rcu_hurry() API instead which reverts to the old behavior.

We find this via inspection that the RCU callback does a wakeup of a
thread. This usually indicates that something is waiting on it. To be
safe, let us use call_rcu_hurry() here instead.

[ joel: Upstream is rewriting this code, so I am merging this as a CHROMIUM
  patch. There is no harm in including it.
  Link: https://lore.kernel.org/rcu/658624.1669849522@warthog.procyon.org.uk/#t ]

Bug: 258241771
Bug: 222463781
Test: CQ
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Change-Id: Iaadfe2f9db189489915828c6f2f74522f4b90ea3
Signed-off-by: Joel Fernandes <joelaf@google.com>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/3965078
Reviewed-by: Ross Zwisler <zwisler@google.com>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318055
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
(cherry picked from commit 1f98f32393f83d14bc290fef06d5b3132bee23e0)
[Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 net/rxrpc/conn_object.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 22089e37e97f..9c5fae9ca106 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -253,7 +253,7 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn)
 	 * must carry a ref on the connection to prevent us getting here whilst
 	 * it is queued or running.
 	 */
-	call_rcu(&conn->rcu, rxrpc_destroy_connection);
+	call_rcu_hurry(&conn->rcu, rxrpc_destroy_connection);
 }
 
 /*

From 84828604c7299ecf420eed959cba81da19fa7fe1 Mon Sep 17 00:00:00 2001
From: Uladzislau Rezki <urezki@gmail.com>
Date: Sun, 16 Oct 2022 16:23:02 +0000
Subject: [PATCH 87/98] UPSTREAM: scsi/scsi_error: Use call_rcu_hurry() instead
 of call_rcu()

Earlier commits in this series allow battery-powered systems to build
their kernels with the default-disabled CONFIG_RCU_LAZY=y Kconfig option.
This Kconfig option causes call_rcu() to delay its callbacks in order
to batch them.  This means that a given RCU grace period covers more
callbacks, thus reducing the number of grace periods, in turn reducing
the amount of energy consumed, which increases battery lifetime which
can be a very good thing.  This is not a subtle effect: In some important
use cases, the battery lifetime is increased by more than 10%.

This CONFIG_RCU_LAZY=y option is available only for CPUs that offload
callbacks, for example, CPUs mentioned in the rcu_nocbs kernel boot
parameter passed to kernels built with CONFIG_RCU_NOCB_CPU=y.

Delaying callbacks is normally not a problem because most callbacks do
nothing but free memory.  If the system is short on memory, a shrinker
will kick all currently queued lazy callbacks out of their laziness,
thus freeing their memory in short order.  Similarly, the rcu_barrier()
function, which blocks until all currently queued callbacks are invoked,
will also kick lazy callbacks, thus enabling rcu_barrier() to complete
in a timely manner.

However, there are some cases where laziness is not a good option.
For example, synchronize_rcu() invokes call_rcu(), and blocks until
the newly queued callback is invoked.  It would not be a good for
synchronize_rcu() to block for ten seconds, even on an idle system.
Therefore, synchronize_rcu() invokes call_rcu_hurry() instead of
call_rcu().  The arrival of a non-lazy call_rcu_hurry() callback on a
given CPU kicks any lazy callbacks that might be already queued on that
CPU.  After all, if there is going to be a grace period, all callbacks
might as well get full benefit from it.

Yes, this could be done the other way around by creating a
call_rcu_lazy(), but earlier experience with this approach and
feedback at the 2022 Linux Plumbers Conference shifted the approach
to call_rcu() being lazy with call_rcu_hurry() for the few places
where laziness is inappropriate.

And another call_rcu() instance that cannot be lazy is the one in the
scsi_eh_scmd_add() function.  Leaving this instance lazy results in
unacceptably slow boot times.

Therefore, make scsi_eh_scmd_add() use call_rcu_hurry() in order to
revert to the old behavior.

[ paulmck: Apply s/call_rcu_flush/call_rcu_hurry/ feedback from Tejun Heo. ]

Bug: 258241771
Bug: 222463781
Test: CQ
Tested-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Change-Id: I95bba865e582b0a12b1c09ba1f0bd4f897401c07
Signed-off-by: Uladzislau Rezki <urezki@gmail.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: "James E.J. Bottomley" <jejb@linux.ibm.com>
Cc: <linux-scsi@vger.kernel.org>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
(cherry picked from commit 54d87b0a0c19bc3f740e4cd4b87ba14ce2e4ea73)
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318056
Commit-Queue: Joel Fernandes <joelaf@google.com>
Reviewed-by: Sean Paul <sean@poorly.run>
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
Tested-by: Joel Fernandes <joelaf@google.com>
(cherry picked from commit 5578f9ac27d25e3e57a5b9c4cf0346cfc5162994)
[Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 drivers/scsi/scsi_error.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 8ab40c36bb88..ff1060fe44cd 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -314,7 +314,7 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd)
 	 * Ensure that all tasks observe the host state change before the
 	 * host_failed change.
 	 */
-	call_rcu(&scmd->rcu, scsi_eh_inc_host_failed);
+	call_rcu_hurry(&scmd->rcu, scsi_eh_inc_host_failed);
 }
 
 /**

From 5b47d8411d6cd0de35216a9edadedf4d12387715 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Thu, 2 Jun 2022 10:06:43 +0200
Subject: [PATCH 88/98] UPSTREAM: rcu/kvfree: Remove useless monitor_todo flag

monitor_todo is not needed as the work struct already tracks
if work is pending. Just use that to know if work is pending
using schedule_delayed_work() helper.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
(cherry picked from commit 82d26c36cc68e781400eb4e541f943008208f2d6)

Bug: 258241771
Bug: 222463781
Test: CQ
Change-Id: I4c13f89da735a628a5030ab55a13e338b97da4b8
Signed-off-by: Joel Fernandes <joelaf@google.com>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4332176
Reviewed-by: Sean Paul <sean@poorly.run>
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
(cherry picked from commit bb867be28d6a70b36ff1d6563f794c489072ab7e)
[Minor conflict with 71cf9c983515549999229ba240e61fa20b471dae where it
added a new function in the same location.
Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 kernel/rcu/tree.c | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1f7f05aa11a5..78deb6a7844f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3168,7 +3168,6 @@ struct kfree_rcu_cpu_work {
  * @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period
  * @lock: Synchronize access to this structure
  * @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES
- * @monitor_todo: Tracks whether a @monitor_work delayed work is pending
  * @initialized: The @rcu_work fields have been initialized
  * @count: Number of objects for which GP not started
  * @bkvcache:
@@ -3193,7 +3192,6 @@ struct kfree_rcu_cpu {
 	struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES];
 	raw_spinlock_t lock;
 	struct delayed_work monitor_work;
-	bool monitor_todo;
 	bool initialized;
 	int count;
 
@@ -3452,9 +3450,7 @@ static void kfree_rcu_monitor(struct work_struct *work)
 	// of the channels that is still busy we should rearm the
 	// work to repeat an attempt. Because previous batches are
 	// still in progress.
-	if (!krcp->bkvhead[0] && !krcp->bkvhead[1] && !krcp->head)
-		krcp->monitor_todo = false;
-	else
+	if (need_offload_krc(krcp))
 		schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
 
 	raw_spin_unlock_irqrestore(&krcp->lock, flags);
@@ -3651,11 +3647,8 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 	kmemleak_ignore(ptr);
 
 	// Set timer to drain after KFREE_DRAIN_JIFFIES.
-	if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
-	    !krcp->monitor_todo) {
-		krcp->monitor_todo = true;
+	if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING)
 		schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
-	}
 
 unlock_return:
 	krc_this_cpu_unlock(krcp, flags);
@@ -3730,14 +3723,8 @@ void __init kfree_rcu_scheduler_running(void)
 		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
 
 		raw_spin_lock_irqsave(&krcp->lock, flags);
-		if ((!krcp->bkvhead[0] && !krcp->bkvhead[1] && !krcp->head) ||
-				krcp->monitor_todo) {
-			raw_spin_unlock_irqrestore(&krcp->lock, flags);
-			continue;
-		}
-		krcp->monitor_todo = true;
-		schedule_delayed_work_on(cpu, &krcp->monitor_work,
-					 KFREE_DRAIN_JIFFIES);
+		if (need_offload_krc(krcp))
+			schedule_delayed_work_on(cpu, &krcp->monitor_work, KFREE_DRAIN_JIFFIES);
 		raw_spin_unlock_irqrestore(&krcp->lock, flags);
 	}
 }

From 88587c18386742c9dfe51e2e29065430bce3b26d Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Thu, 30 Jun 2022 18:33:35 +0200
Subject: [PATCH 89/98] UPSTREAM: rcu/kvfree: Update KFREE_DRAIN_JIFFIES
 interval

Currently the monitor work is scheduled with a fixed interval of HZ/20,
which is roughly 50 milliseconds. The drawback of this approach is
low utilization of the 512 page slots in scenarios with infrequence
kvfree_rcu() calls.  For example on an Android system:

<snip>
  kworker/3:3-507     [003] ....   470.286305: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000d0f0dde5 nr_records=6
  kworker/6:1-76      [006] ....   470.416613: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000ea0d6556 nr_records=1
  kworker/6:1-76      [006] ....   470.416625: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000003e025849 nr_records=9
  kworker/3:3-507     [003] ....   471.390000: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000815a8713 nr_records=48
  kworker/1:1-73      [001] ....   471.725785: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000fda9bf20 nr_records=3
  kworker/1:1-73      [001] ....   471.725833: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000a425b67b nr_records=76
  kworker/0:4-1411    [000] ....   472.085673: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000007996be9d nr_records=1
  kworker/0:4-1411    [000] ....   472.085728: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000d0f0dde5 nr_records=5
  kworker/6:1-76      [006] ....   472.260340: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000065630ee4 nr_records=102
<snip>

In many cases, out of 512 slots, fewer than 10 were actually used.
In order to improve batching and make utilization more efficient this
commit sets a drain interval to a fixed 5-seconds interval. Floods are
detected when a page fills quickly, and in that case, the reclaim work
is re-scheduled for the next scheduling-clock tick (jiffy).

After this change:

<snip>
  kworker/7:1-371     [007] ....  5630.725708: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000005ab0ffb3 nr_records=121
  kworker/7:1-371     [007] ....  5630.989702: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000060c84761 nr_records=47
  kworker/7:1-371     [007] ....  5630.989714: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000000babf308 nr_records=510
  kworker/7:1-371     [007] ....  5631.553790: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000bb7bd0ef nr_records=169
  kworker/7:1-371     [007] ....  5631.553808: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000044c78753 nr_records=510
  kworker/5:6-9428    [005] ....  5631.746102: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000d98519aa nr_records=123
  kworker/4:7-9434    [004] ....  5632.001758: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000526c9d44 nr_records=322
  kworker/4:7-9434    [004] ....  5632.002073: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000002c6a8afa nr_records=185
  kworker/7:1-371     [007] ....  5632.277515: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000007f4a962f nr_records=510
<snip>

Here, all but one of the cases, more than one hundreds slots were used,
representing an order-of-magnitude improvement.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
(cherry picked from commit 51824b780b719c53113dc39e027fbf670dc66028)

Bug: 258241771
Bug: 222463781
Test: CQ
Change-Id: I4635ba0dbece4e029d5271ef3950b8eaa1ae5e81
Signed-off-by: Joel Fernandes <joelaf@google.com>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4332177
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
Reviewed-by: Sean Paul <sean@poorly.run>
(cherry picked from commit b1bf359877e084383be107bf0008d58d0a6b15e3)
[Conflict due to 71cf9c983515549999229ba240e61fa20b471dae adding a new
function in the same location.
Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 kernel/rcu/tree.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 78deb6a7844f..ed2dce3f0176 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3122,7 +3122,7 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
 EXPORT_SYMBOL_GPL(call_rcu);
 
 /* Maximum number of jiffies to wait before draining a batch. */
-#define KFREE_DRAIN_JIFFIES (HZ / 50)
+#define KFREE_DRAIN_JIFFIES (5 * HZ)
 #define KFREE_N_BATCHES 2
 #define FREE_N_CHANNELS 2
 
@@ -3395,6 +3395,21 @@ need_wait_for_krwp_work(struct kfree_rcu_cpu_work *krwp)
 	return !!krwp->head_free;
 }
 
+static void
+schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
+{
+	long delay, delay_left;
+
+	delay = READ_ONCE(krcp->count) >= KVFREE_BULK_MAX_ENTR ? 1:KFREE_DRAIN_JIFFIES;
+	if (delayed_work_pending(&krcp->monitor_work)) {
+		delay_left = krcp->monitor_work.timer.expires - jiffies;
+		if (delay < delay_left)
+			mod_delayed_work(system_wq, &krcp->monitor_work, delay);
+		return;
+	}
+	queue_delayed_work(system_wq, &krcp->monitor_work, delay);
+}
+
 /*
  * This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
  */
@@ -3451,7 +3466,7 @@ static void kfree_rcu_monitor(struct work_struct *work)
 	// work to repeat an attempt. Because previous batches are
 	// still in progress.
 	if (need_offload_krc(krcp))
-		schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+		schedule_delayed_monitor_work(krcp);
 
 	raw_spin_unlock_irqrestore(&krcp->lock, flags);
 }
@@ -3648,7 +3663,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 
 	// Set timer to drain after KFREE_DRAIN_JIFFIES.
 	if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING)
-		schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+		schedule_delayed_monitor_work(krcp);
 
 unlock_return:
 	krc_this_cpu_unlock(krcp, flags);
@@ -3724,7 +3739,7 @@ void __init kfree_rcu_scheduler_running(void)
 
 		raw_spin_lock_irqsave(&krcp->lock, flags);
 		if (need_offload_krc(krcp))
-			schedule_delayed_work_on(cpu, &krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+			schedule_delayed_monitor_work(krcp);
 		raw_spin_unlock_irqrestore(&krcp->lock, flags);
 	}
 }

From 5d1a3986c20e8b28742e804a4a86c9942ac4e8ae Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Wed, 22 Jun 2022 22:51:02 +0000
Subject: [PATCH 90/98] UPSTREAM: rcu/kfree: Fix kfree_rcu_shrink_count()
 return value

As per the comments in include/linux/shrinker.h, .count_objects callback
should return the number of freeable items, but if there are no objects
to free, SHRINK_EMPTY should be returned. The only time 0 is returned
should be when we are unable to determine the number of objects, or the
cache should be skipped for another reason.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
(cherry picked from commit 38269096351806bf7315f971c53205b676ada259)

Bug: 258241771
Bug: 222463781
Test: CQ
Change-Id: I5cb380fceaccc85971a47773d9058f0ea044c6dd
Signed-off-by: Joel Fernandes <joelaf@google.com>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4332178
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
Reviewed-by: Sean Paul <sean@poorly.run>
(cherry picked from commit 3243f1e22bf915c9b805a96cc4a8cbc03ed5d7a8)
[Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 kernel/rcu/tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index ed2dce3f0176..5f0510e08c5b 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3696,7 +3696,7 @@ kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
 		atomic_set(&krcp->backoff_page_cache_fill, 1);
 	}
 
-	return count;
+	return count == 0 ? SHRINK_EMPTY : count;
 }
 
 static unsigned long

From 16ea06fe441069eee38d16662975e8cd86572d69 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Tue, 29 Nov 2022 16:58:21 +0100
Subject: [PATCH 91/98] UPSTREAM: rcu/kvfree: Move need_offload_krc() out of
 krcp->lock

The need_offload_krc() function currently holds the krcp->lock in order
to safely check krcp->head.  This commit removes the need for this lock
in that function by updating the krcp->head pointer using WRITE_ONCE()
macro so that readers can carry out lockless loads of that pointer.

Bug: 258241771
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
(cherry picked from commit 8fc5494ad5face62747a3937db66b00db1e5d80b)
Signed-off-by: Qais Yousef <qyousef@google.com>
Change-Id: Iddde5ec15e8574216abc95d8c64efa5c66868508
---
 kernel/rcu/tree.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 5f0510e08c5b..aa84ee9f1830 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3446,7 +3446,7 @@ static void kfree_rcu_monitor(struct work_struct *work)
 			// objects queued on the linked list.
 			if (!krwp->head_free) {
 				krwp->head_free = krcp->head;
-				krcp->head = NULL;
+				WRITE_ONCE(krcp->head, NULL);
 			}
 
 			WRITE_ONCE(krcp->count, 0);
@@ -3460,6 +3460,8 @@ static void kfree_rcu_monitor(struct work_struct *work)
 		}
 	}
 
+	raw_spin_unlock_irqrestore(&krcp->lock, flags);
+
 	// If there is nothing to detach, it means that our job is
 	// successfully done here. In case of having at least one
 	// of the channels that is still busy we should rearm the
@@ -3467,8 +3469,6 @@ static void kfree_rcu_monitor(struct work_struct *work)
 	// still in progress.
 	if (need_offload_krc(krcp))
 		schedule_delayed_monitor_work(krcp);
-
-	raw_spin_unlock_irqrestore(&krcp->lock, flags);
 }
 
 static enum hrtimer_restart
@@ -3647,7 +3647,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 
 		head->func = func;
 		head->next = krcp->head;
-		krcp->head = head;
+		WRITE_ONCE(krcp->head, head);
 		success = true;
 	}
 
@@ -3732,15 +3732,12 @@ static struct shrinker kfree_rcu_shrinker = {
 void __init kfree_rcu_scheduler_running(void)
 {
 	int cpu;
-	unsigned long flags;
 
 	for_each_possible_cpu(cpu) {
 		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
 
-		raw_spin_lock_irqsave(&krcp->lock, flags);
 		if (need_offload_krc(krcp))
 			schedule_delayed_monitor_work(krcp);
-		raw_spin_unlock_irqrestore(&krcp->lock, flags);
 	}
 }
 

From 4adb60810c9f4092f53e5ddc4c1f6a0bf0a97838 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Fri, 3 Mar 2023 21:38:51 +0000
Subject: [PATCH 92/98] ANDROID: rcu: Add a minimum time for marking boot as
 completed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On many systems, a great deal of boot (in userspace) happens after the
kernel thinks the boot has completed. It is difficult to determine if
the system has really booted from the kernel side. Some features like
lazy-RCU can risk slowing down boot time if, say, a callback has been
added that the boot synchronously depends on. Further expedited callbacks
can get unexpedited way earlier than it should be, thus slowing down
boot (as shown in the data below).

For these reasons, this commit adds a config option
'CONFIG_RCU_BOOT_END_DELAY' and a boot parameter rcupdate.boot_end_delay.
Userspace can also make RCU's view of the system as booted, by writing the
time in milliseconds to: /sys/module/rcupdate/parameters/android_rcu_boot_end_delay
Or even just writing a value of 0 to this sysfs node.
However, under no circumstance will the boot be allowed to end earlier
than just before init is launched.

The default value of CONFIG_RCU_BOOT_END_DELAY is chosen as 15s. This
suites ChromeOS and also a PREEMPT_RT system below very well, which need
no config or parameter changes, and just a simple application of this
patch. A system designer can also choose a specific value here to keep
RCU from marking boot completion.  As noted earlier, RCU's perspective
of the system as booted will not be marker until at least
android_rcu_boot_end_delay milliseconds have passed or an update is made
via writing a small value (or 0) in milliseconds to:
/sys/module/rcupdate/parameters/android_rcu_boot_end_delay.

One side-effect of this patch is, there is a risk that a real-time workload
launched just after the kernel boots will suffer interruptions due to expedited
RCU, which previous ended just before init was launched. However, to mitigate
such an issue (however unlikely), the user should either tune
CONFIG_RCU_BOOT_END_DELAY to a smaller value than 15 seconds or write a value
of 0 to /sys/module/rcupdate/parameters/android_rcu_boot_end_delay, once userspace
boots, and before launching the real-time workload.

Qiuxu also noted impressive boot-time improvements with earlier version
of patch. An excerpt from the data he shared:

1) Testing environment:
    OS            : CentOS Stream 8 (non-RT OS)
    Kernel     : v6.2
    Machine : Intel Cascade Lake server (2 sockets, each with 44 logical threads)
    Qemu  args  : -cpu host -enable-kvm, -smp 88,threads=2,sockets=2, …

2) OS boot time definition:
    The time from the start of the kernel boot to the shell command line
    prompt is shown from the console. [ Different people may have
    different OS boot time definitions. ]

3) Measurement method (very rough method):
    A timer in the kernel periodically prints the boot time every 100ms.
    As soon as the shell command line prompt is shown from the console,
    we record the boot time printed by the timer, then the printed boot
    time is the OS boot time.

4) Measured OS boot time (in seconds)
   a) Measured 10 times w/o this patch:
        8.7s, 8.4s, 8.6s, 8.2s, 9.0s, 8.7s, 8.8s, 9.3s, 8.8s, 8.3s
        The average OS boot time was: ~8.7s

   b) Measure 10 times w/ this patch:
        8.5s, 8.2s, 7.6s, 8.2s, 8.7s, 8.2s, 7.8s, 8.2s, 9.3s, 8.4s
        The average OS boot time was: ~8.3s.

(CHROMIUM tag rationale: Submitted upstream but got lots of pushback as
it may harm a PREEMPT_RT system -- the concern is VERY theoretical and
this improves things for ChromeOS. Plus we are not a PREEMPT_RT system.
So I am strongly suggesting this mostly simple change for ChromeOS.)

Bug: 258241771
Bug: 268129466
Test: boot
Tested-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Change-Id: Ibd262189d7f92dbcc57f1508efe90fcfba95a6cc
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4350228
Commit-Queue: Joel Fernandes <joelaf@google.com>
Commit-Queue: Vineeth Pillai <vineethrp@google.com>
Tested-by: Vineeth Pillai <vineethrp@google.com>
Tested-by: Joel Fernandes <joelaf@google.com>
Reviewed-by: Vineeth Pillai <vineethrp@google.com>
(cherry picked from commit 7968079ec77b320ee9d4115fe13048a8f7afbc02)
[Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message
to match Android style. Prefix boot param with android_]
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 .../admin-guide/kernel-parameters.txt         | 15 ++++
 kernel/rcu/Kconfig                            | 21 +++++
 kernel/rcu/update.c                           | 76 ++++++++++++++++++-
 3 files changed, 110 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index ec80358a9faf..5d10bc873b37 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4881,6 +4881,21 @@
 	rcutorture.verbose= [KNL]
 			Enable additional printk() statements.
 
+	rcupdate.android_rcu_boot_end_delay= [KNL]
+			Minimum time in milliseconds from the start of boot
+			that must elapse before the boot sequence can be marked
+			complete from RCU's perspective, after which RCU's
+			behavior becomes more relaxed. The default value is also
+			configurable via CONFIG_RCU_BOOT_END_DELAY.
+			Userspace can also mark the boot as completed
+			sooner by writing the time in milliseconds, say once
+			userspace considers the system as booted, to:
+			/sys/module/rcupdate/parameters/android_rcu_boot_end_delay
+			Or even just writing a value of 0 to this sysfs node.
+			The sysfs node can also be used to extend the delay
+			to be larger than the default, assuming the marking
+			of boot complete has not yet occurred.
+
 	rcupdate.rcu_cpu_stall_ftrace_dump= [KNL]
 			Dump ftrace buffer after reporting RCU CPU
 			stall warning.
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 8e31e315a6f5..f0d0df3beedc 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -282,4 +282,25 @@ config RCU_LAZY
 	  To save power, batch RCU callbacks and flush after delay, memory
 	  pressure, or callback list growing too big.
 
+config RCU_BOOT_END_DELAY
+	int "Minimum time before RCU may consider in-kernel boot as completed"
+	range 0 120000
+	default 20000
+	help
+	  Default value of the minimum time in milliseconds from the start of boot
+	  that must elapse before the boot sequence can be marked complete from RCU's
+	  perspective, after which RCU's behavior becomes more relaxed.
+	  Userspace can also mark the boot as completed sooner than this default
+	  by writing the time in milliseconds, say once userspace considers
+	  the system as booted, to: /sys/module/rcupdate/parameters/rcu_boot_end_delay.
+	  Or even just writing a value of 0 to this sysfs node. The sysfs node can
+	  also be used to extend the delay to be larger than the default, assuming
+	  the marking of boot completion has not yet occurred.
+
+	  The actual delay for RCU's view of the system to be marked as booted can be
+	  higher than this value if the kernel takes a long time to initialize but it
+	  will never be smaller than this value.
+
+	  Accept the default if unsure.
+
 endmenu # "RCU Subsystem"
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index a90458c7b4f2..699344c50506 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -43,6 +43,7 @@
 #include <linux/slab.h>
 #include <linux/irq_work.h>
 #include <linux/rcupdate_trace.h>
+#include <linux/jiffies.h>
 
 #define CREATE_TRACE_POINTS
 
@@ -224,13 +225,51 @@ void rcu_unexpedite_gp(void)
 }
 EXPORT_SYMBOL_GPL(rcu_unexpedite_gp);
 
+/*
+ * Minimum time in milliseconds from the start boot until RCU can consider
+ * in-kernel boot as completed.  This can also be tuned at runtime to end the
+ * boot earlier, by userspace init code writing the time in milliseconds (even
+ * 0) to: /sys/module/rcupdate/parameters/android_rcu_boot_end_delay. The sysfs
+ *    node can also be used to extend the delay to be larger than the default,
+ *    assuming the marking of boot complete has not yet occurred.
+ */
+static int android_rcu_boot_end_delay = CONFIG_RCU_BOOT_END_DELAY;
+
 static bool rcu_boot_ended __read_mostly;
+static bool rcu_boot_end_called __read_mostly;
+static DEFINE_MUTEX(rcu_boot_end_lock);
 
 /*
- * Inform RCU of the end of the in-kernel boot sequence.
+ * Inform RCU of the end of the in-kernel boot sequence. The boot sequence will
+ * not be marked ended until at least android_rcu_boot_end_delay milliseconds
+ * have passed.
  */
-void rcu_end_inkernel_boot(void)
+void rcu_end_inkernel_boot(void);
+static void rcu_boot_end_work_fn(struct work_struct *work)
 {
+	rcu_end_inkernel_boot();
+}
+static DECLARE_DELAYED_WORK(rcu_boot_end_work, rcu_boot_end_work_fn);
+
+/* Must be called with rcu_boot_end_lock held. */
+static void rcu_end_inkernel_boot_locked(void)
+{
+	rcu_boot_end_called = true;
+
+	if (rcu_boot_ended)
+		return;
+
+	if (android_rcu_boot_end_delay) {
+		u64 boot_ms = div_u64(ktime_get_boot_fast_ns(), 1000000UL);
+
+		if (boot_ms < android_rcu_boot_end_delay) {
+			schedule_delayed_work(&rcu_boot_end_work,
+					msecs_to_jiffies(android_rcu_boot_end_delay - boot_ms));
+			return;
+		}
+	}
+
+	cancel_delayed_work(&rcu_boot_end_work);
 	rcu_unexpedite_gp();
 	rcu_async_relax();
 	if (rcu_normal_after_boot)
@@ -238,6 +277,39 @@ void rcu_end_inkernel_boot(void)
 	rcu_boot_ended = true;
 }
 
+void rcu_end_inkernel_boot(void)
+{
+	mutex_lock(&rcu_boot_end_lock);
+	rcu_end_inkernel_boot_locked();
+	mutex_unlock(&rcu_boot_end_lock);
+}
+
+static int param_set_rcu_boot_end(const char *val, const struct kernel_param *kp)
+{
+	uint end_ms;
+	int ret = kstrtouint(val, 0, &end_ms);
+
+	if (ret)
+		return ret;
+	/*
+	 * rcu_end_inkernel_boot() should be called at least once during init
+	 * before we can allow param changes to end the boot.
+	 */
+	mutex_lock(&rcu_boot_end_lock);
+	android_rcu_boot_end_delay = end_ms;
+	if (!rcu_boot_ended && rcu_boot_end_called) {
+		rcu_end_inkernel_boot_locked();
+	}
+	mutex_unlock(&rcu_boot_end_lock);
+	return ret;
+}
+
+static const struct kernel_param_ops rcu_boot_end_ops = {
+	.set = param_set_rcu_boot_end,
+	.get = param_get_uint,
+};
+module_param_cb(android_rcu_boot_end_delay, &rcu_boot_end_ops, &android_rcu_boot_end_delay, 0644);
+
 /*
  * Let rcutorture know when it is OK to turn it up to eleven.
  */

From 37b02c190cbf74022651e3f5f8240c13796ad37c Mon Sep 17 00:00:00 2001
From: Qais Yousef <qyousef@layalina.io>
Date: Sun, 3 Dec 2023 01:12:52 +0000
Subject: [PATCH 93/98] FROMLIST: rcu: Provide a boot time parameter to control
 lazy RCU

To allow more flexible arrangements while still provide a single kernel
for distros, provide a boot time parameter to enable/disable lazy RCU.

Specify:

	rcutree.enable_rcu_lazy=[y|1|n|0]

Which also requires

	rcu_nocbs=all

at boot time to enable/disable lazy RCU.

To disable it by default at build time when CONFIG_RCU_LAZY=y, the new
CONFIG_RCU_LAZY_DEFAULT_OFF can be used.

Bug: 258241771
Signed-off-by: Qais Yousef (Google) <qyousef@layalina.io>
Tested-by: Andrea Righi <andrea.righi@canonical.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/lkml/20231203011252.233748-1-qyousef@layalina.io/
[Fix trivial conflicts rejecting newer code that doesn't exist on 5.15]
Signed-off-by: Qais Yousef <qyousef@google.com>
Change-Id: Ib5585ae717a2ba7749f2802101b785c4e5de8a90
---
 Documentation/admin-guide/kernel-parameters.txt |  5 +++++
 kernel/rcu/Kconfig                              | 12 ++++++++++++
 kernel/rcu/tree.c                               |  7 ++++++-
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 5d10bc873b37..6d653ef073ad 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4637,6 +4637,11 @@
 			rcu_node tree with an eye towards determining
 			why a new grace period has not yet started.
 
+	rcutree.enable_rcu_lazy= [KNL]
+			To save power, batch RCU callbacks and flush after
+			delay, memory pressure or callback list growing too
+			big.
+
 	rcuscale.gp_async= [KNL]
 			Measure performance of asynchronous
 			grace-period primitives such as call_rcu().
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index f0d0df3beedc..4bc5b79ce1df 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -281,6 +281,18 @@ config RCU_LAZY
 	help
 	  To save power, batch RCU callbacks and flush after delay, memory
 	  pressure, or callback list growing too big.
+	  Requires rcu_nocbs=all to be set.
+
+	  Use rcutree.enable_rcu_lazy=0 to turn it off at boot time.
+
+config RCU_LAZY_DEFAULT_OFF
+	bool "Turn RCU lazy invocation off by default"
+	depends on RCU_LAZY
+	default n
+	help
+	  Allows building the kernel with CONFIG_RCU_LAZY=y yet keep it default
+	  off. Boot time param rcutree.enable_rcu_lazy=1 can be used to switch
+	  it back on.
 
 config RCU_BOOT_END_DELAY
 	int "Minimum time before RCU may consider in-kernel boot as completed"
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index aa84ee9f1830..ed0756afdf20 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3044,6 +3044,9 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in)
 }
 
 #ifdef CONFIG_RCU_LAZY
+static bool enable_rcu_lazy __read_mostly = !IS_ENABLED(CONFIG_RCU_LAZY_DEFAULT_OFF);
+module_param(enable_rcu_lazy, bool, 0444);
+
 /**
  * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and
  * flush all lazy callbacks (including the new one) to the main ->cblist while
@@ -3069,6 +3072,8 @@ void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
 	return __call_rcu_common(head, func, false);
 }
 EXPORT_SYMBOL_GPL(call_rcu_hurry);
+#else
+#define enable_rcu_lazy		false
 #endif
 
 /**
@@ -3117,7 +3122,7 @@ EXPORT_SYMBOL_GPL(call_rcu_hurry);
  */
 void call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
-	return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY));
+	__call_rcu_common(head, func, enable_rcu_lazy);
 }
 EXPORT_SYMBOL_GPL(call_rcu);
 

From d38091b4ff772245bf965f21172024b3660c3be0 Mon Sep 17 00:00:00 2001
From: Qais Yousef <qyousef@google.com>
Date: Tue, 23 May 2023 19:22:19 +0000
Subject: [PATCH 94/98] ANDROID: Enable CONFIG_LAZY_RCU in arm64 gki_defconfig

It is still disabled by default. Must specify
rcutree.android_enable_rcu_lazy and rcu_nocbs=all in boot time parameter
to actually enable it.

Bug: 258241771
Change-Id: I11c920aa5edde2fc42ab54245cd198eb8cb47616
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 arch/arm64/configs/gki_defconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig
index 733f1f2da58a..1a84753c54d9 100644
--- a/arch/arm64/configs/gki_defconfig
+++ b/arch/arm64/configs/gki_defconfig
@@ -15,6 +15,8 @@ CONFIG_RCU_EXPERT=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_RCU_BOOST=y
 CONFIG_RCU_NOCB_CPU=y
+CONFIG_RCU_LAZY=y
+CONFIG_RCU_LAZY_DEFAULT_OFF=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_IKHEADERS=m

From ae67f18944e363bcf4822fd0a39fcc41c911d8c9 Mon Sep 17 00:00:00 2001
From: Qais Yousef <qyousef@google.com>
Date: Fri, 19 Jan 2024 10:52:57 +0000
Subject: [PATCH 95/98] ANDROID: Enable CONFIG_LAZY_RCU in x86 gki_defconfig

It is still disabled by default. Must specify
rcutree.android_enable_rcu_lazy and rcu_nocbs=all in boot time parameter
to actually enable it.

Bug: 258241771
Change-Id: Ic9e15b846d58ffa3d5dd81842c568da79352ff2d
Signed-off-by: Qais Yousef <qyousef@google.com>
---
 arch/x86/configs/gki_defconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig
index 2e3d924152bc..422d4ba05d4f 100644
--- a/arch/x86/configs/gki_defconfig
+++ b/arch/x86/configs/gki_defconfig
@@ -17,6 +17,8 @@ CONFIG_RCU_EXPERT=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_RCU_BOOST=y
 CONFIG_RCU_NOCB_CPU=y
+CONFIG_RCU_LAZY=y
+CONFIG_RCU_LAZY_DEFAULT_OFF=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_IKHEADERS=m

From ae44e8dac85115b05699c06e33ab337c0e82cf1e Mon Sep 17 00:00:00 2001
From: Kalesh Singh <kaleshsingh@google.com>
Date: Fri, 19 Apr 2024 14:41:35 -0700
Subject: [PATCH 96/98] ANDROID: 16K: Only madvise padding from dynamic linker
 context

Only preform padding advise from the execution context on bionic's
dynamic linker. This ensures that madvise() doesn't have unwanted
side effects.

Also rearrange the order of fail checks in madvise_vma_pad_pages()
in order of ascending cost.

Bug: 330117029
Bug: 327600007
Bug: 330767927
Bug: 328266487
Bug: 329803029
Change-Id: I3e05b8780c6eda78007f86b613f8c11dd18ac28f
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
---
 mm/pgsize_migration.c | 75 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 65 insertions(+), 10 deletions(-)

diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c
index b7264f49a9cb..aecc109524c3 100644
--- a/mm/pgsize_migration.c
+++ b/mm/pgsize_migration.c
@@ -16,6 +16,7 @@
 #include <linux/jump_label.h>
 #include <linux/kobject.h>
 #include <linux/kstrtox.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <linux/sysfs.h>
 
@@ -134,6 +135,56 @@ static __always_inline bool str_has_suffix(const char *str, const char *suffix)
 	return !strncmp(str + str_len - suffix_len, suffix, suffix_len);
 }
 
+/*
+ * The dynamic linker, or interpreter, operates within the process context
+ * of the binary that necessitated dynamic linking.
+ *
+ * Consequently, process context identifiers; like PID, comm, ...; cannot
+ * be used to differentiate whether the execution context belongs to the
+ * dynamic linker or not.
+ *
+ * linker_ctx() deduces whether execution is currently in the dynamic linker's
+ * context by correlating the current userspace instruction pointer with the
+ * VMAs of the current task.
+ *
+ * Returns true if in linker context, otherwise false.
+ *
+ * Caller must hold mmap lock in read mode.
+ */
+static inline bool linker_ctx(void)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct file *file;
+
+	if (!regs)
+		return false;
+
+	vma = find_vma(mm, instruction_pointer(regs));
+
+	/* Current execution context, the VMA must be present */
+	BUG_ON(!vma);
+
+	file = vma->vm_file;
+	if (!file)
+		return false;
+
+	if ((vma->vm_flags & VM_EXEC)) {
+		char buf[64];
+		const int bufsize = sizeof(buf);
+		char *path;
+
+		memset(buf, 0, bufsize);
+		path = d_path(&file->f_path, buf, bufsize);
+
+		if (!strcmp(path, "/system/bin/linker64"))
+			return true;
+	}
+
+	return false;
+}
+
 /*
  * Saves the number of padding pages for an ELF segment mapping
  * in vm_flags.
@@ -146,6 +197,7 @@ static __always_inline bool str_has_suffix(const char *str, const char *suffix)
  *    4) The number of the pages in the range does not exceed VM_TOTAL_PAD_PAGES.
  *    5) The VMA is a regular file backed VMA (filemap_fault)
  *    6) The file backing the VMA is a shared library (*.so)
+ *    7) The madvise was requested by bionic's dynamic linker.
  */
 void madvise_vma_pad_pages(struct vm_area_struct *vma,
 			   unsigned long start, unsigned long end)
@@ -155,18 +207,9 @@ void madvise_vma_pad_pages(struct vm_area_struct *vma,
 	if (!is_pgsize_migration_enabled())
 		return;
 
-	/* Only handle this for file backed VMAs */
-	if (!vma->vm_file || !vma->vm_ops || vma->vm_ops->fault != filemap_fault)
-		return;
-
-
-	/* Limit this to only shared libraries (*.so) */
-	if (!str_has_suffix(vma->vm_file->f_path.dentry->d_name.name, ".so"))
-		return;
-
 	/*
 	 * If the madvise range is it at the end of the file save the number of
-	 * pages in vm_flags (only need 4 bits are needed for 16kB aligned ELFs).
+	 * pages in vm_flags (only need 4 bits are needed for up to 64kB aligned ELFs).
 	 */
 	if (start <= vma->vm_start || end != vma->vm_end)
 		return;
@@ -176,6 +219,18 @@ void madvise_vma_pad_pages(struct vm_area_struct *vma,
 	if (!nr_pad_pages || nr_pad_pages > VM_TOTAL_PAD_PAGES)
 		return;
 
+	/* Only handle this for file backed VMAs */
+	if (!vma->vm_file || !vma->vm_ops || vma->vm_ops->fault != filemap_fault)
+		return;
+
+	/* Limit this to only shared libraries (*.so) */
+	if (!str_has_suffix(vma->vm_file->f_path.dentry->d_name.name, ".so"))
+		return;
+
+	/* Only bionic's dynamic linker needs to hint padding pages. */
+	if (!linker_ctx())
+		return;
+
 	vma_set_pad_pages(vma, nr_pad_pages);
 }
 

From 19d6e7eb47dc0aeffc4a3b50ad9b65deb594a211 Mon Sep 17 00:00:00 2001
From: Kalesh Singh <kaleshsingh@google.com>
Date: Thu, 25 Apr 2024 09:59:08 -0700
Subject: [PATCH 97/98] ANDROID: 16K: madvise_vma_pad_pages: Remove
 filemap_fault check

Some file systems like F2FS use a custom filemap_fault ops. Remove this
check, as checking vm_file is sufficient.

Bug: 330117029
Bug: 327600007
Bug: 330767927
Bug: 328266487
Bug: 329803029
Change-Id: Id6a584d934f06650c0a95afd1823669fc77ba2c2
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
---
 mm/pgsize_migration.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c
index aecc109524c3..f148918ee8f7 100644
--- a/mm/pgsize_migration.c
+++ b/mm/pgsize_migration.c
@@ -195,7 +195,7 @@ static inline bool linker_ctx(void)
  *    2) The range ends at the end address of the VMA
  *    3) The range starts at an address greater than the start address of the VMA
  *    4) The number of the pages in the range does not exceed VM_TOTAL_PAD_PAGES.
- *    5) The VMA is a regular file backed VMA (filemap_fault)
+ *    5) The VMA is a file backed VMA.
  *    6) The file backing the VMA is a shared library (*.so)
  *    7) The madvise was requested by bionic's dynamic linker.
  */
@@ -220,7 +220,7 @@ void madvise_vma_pad_pages(struct vm_area_struct *vma,
 		return;
 
 	/* Only handle this for file backed VMAs */
-	if (!vma->vm_file || !vma->vm_ops || vma->vm_ops->fault != filemap_fault)
+	if (!vma->vm_file)
 		return;
 
 	/* Limit this to only shared libraries (*.so) */

From d83231efe4bfcdee684acd7eb4f1cada88517b13 Mon Sep 17 00:00:00 2001
From: Kalesh Singh <kaleshsingh@google.com>
Date: Mon, 22 Apr 2024 14:24:59 -0700
Subject: [PATCH 98/98] ANDROID: 16K: Handle pad VMA splits and merges

In some cases a VMA with padding representation may be split, and
therefore the padding flags must be updated accordingly.

There are 3 cases to handle:

Given:
    | DDDDPPPP |

where:
    - D represents 1 page of data;
    - P represents 1 page of padding;
    - | represents the boundaries (start/end) of the VMA

1) Split exactly at the padding boundary

    | DDDDPPPP | --> | DDDD | PPPP |

    - Remove padding flags from the first VMA.
    - The second VMA is all padding

2) Split within the padding area

    | DDDDPPPP | --> | DDDDPP | PP |

    - Subtract the length of the second VMA from the first VMA's
      padding.
    - The second VMA is all padding, adjust its padding length (flags)

3) Split within the data area

    | DDDDPPPP | --> | DD | DDPPPP |

    - Remove padding flags from the first VMA.
    - The second VMA is has the same padding as from before the split.

To simplify the semantics merging of padding VMAs is not allowed.

If a split produces a VMA that is entirely padding, show_[s]maps()
only outputs the padding VMA entry (as the data entry is of length 0).

Bug: 330117029
Bug: 327600007
Bug: 330767927
Bug: 328266487
Bug: 329803029
Change-Id: Ie2628ced5512e2c7f8af25fabae1f38730c8bb1a
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
---
 fs/proc/task_mmu.c               |  7 +++-
 include/linux/pgsize_migration.h | 34 +++++++++++++++
 mm/mlock.c                       |  3 +-
 mm/mmap.c                        |  7 +++-
 mm/mprotect.c                    |  4 +-
 mm/pgsize_migration.c            | 72 +++++++++++++++++++++++++++++++-
 6 files changed, 121 insertions(+), 6 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 095aa7c80ee1..0b7f73653ae3 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -348,7 +348,8 @@ static int show_map(struct seq_file *m, void *v)
 	struct vm_area_struct *pad_vma = get_pad_vma(v);
 	struct vm_area_struct *vma = get_data_vma(v);
 
-	show_map_vma(m, vma);
+	if (vma_pages(vma))
+		show_map_vma(m, vma);
 
 	show_map_pad_vma(vma, pad_vma, m, show_map_vma);
 
@@ -851,6 +852,9 @@ static int show_smap(struct seq_file *m, void *v)
 
 	memset(&mss, 0, sizeof(mss));
 
+	if (!vma_pages(vma))
+		goto show_pad;
+
 	smap_gather_stats(vma, &mss, 0);
 
 	show_map_vma(m, vma);
@@ -869,6 +873,7 @@ static int show_smap(struct seq_file *m, void *v)
 		seq_printf(m, "ProtectionKey:  %8u\n", vma_pkey(vma));
 	show_smap_vma_flags(m, vma);
 
+show_pad:
 	show_map_pad_vma(vma, pad_vma, m, (show_pad_vma_fn)show_smap);
 
 	return 0;
diff --git a/include/linux/pgsize_migration.h b/include/linux/pgsize_migration.h
index 7ab0f288bcf9..5c47ec28ea7d 100644
--- a/include/linux/pgsize_migration.h
+++ b/include/linux/pgsize_migration.h
@@ -61,6 +61,9 @@ extern struct vm_area_struct *get_data_vma(struct vm_area_struct *vma);
 extern void show_map_pad_vma(struct vm_area_struct *vma,
 			     struct vm_area_struct *pad,
 			     struct seq_file *m, show_pad_vma_fn func);
+
+extern void split_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *new,
+			  unsigned long addr, int new_below);
 #else /* PAGE_SIZE != SZ_4K || !defined(CONFIG_64BIT) */
 static inline void vma_set_pad_pages(struct vm_area_struct *vma,
 				     unsigned long nr_pages)
@@ -92,10 +95,41 @@ static inline void show_map_pad_vma(struct vm_area_struct *vma,
 				    struct seq_file *m, show_pad_vma_fn func)
 {
 }
+
+static inline void split_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *new,
+				 unsigned long addr, int new_below)
+{
+}
 #endif /* PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT) */
 
 static inline unsigned long vma_data_pages(struct vm_area_struct *vma)
 {
 	return vma_pages(vma) - vma_pad_pages(vma);
 }
+
+/*
+ * Sets the correct padding bits / flags for a VMA split.
+ */
+static inline unsigned long vma_pad_fixup_flags(struct vm_area_struct *vma,
+						unsigned long newflags)
+{
+	if (newflags & VM_PAD_MASK)
+		return (newflags & ~VM_PAD_MASK) | (vma->vm_flags & VM_PAD_MASK);
+	else
+		return newflags;
+}
+
+/*
+ * Merging of padding VMAs is uncommon, as padding is only allowed
+ * from the linker context.
+ *
+ * To simplify the semantics, adjacent VMAs with padding are not
+ * allowed to merge.
+ */
+static inline bool is_mergable_pad_vma(struct vm_area_struct *vma,
+				       unsigned long vm_flags)
+{
+	/* Padding VMAs cannot be merged with other padding or real VMAs */
+	return !((vma->vm_flags | vm_flags) & VM_PAD_MASK);
+}
 #endif /* _LINUX_PAGE_SIZE_MIGRATION_H */
diff --git a/mm/mlock.c b/mm/mlock.c
index 0cc7fe053755..eec2418f3336 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -13,6 +13,7 @@
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/pagemap.h>
+#include <linux/pgsize_migration.h>
 #include <linux/pagevec.h>
 #include <linux/mempolicy.h>
 #include <linux/syscalls.h>
@@ -547,7 +548,7 @@ success:
 	 */
 
 	if (lock)
-		vma->vm_flags = newflags;
+		vma->vm_flags = vma_pad_fixup_flags(vma, newflags);
 	else
 		munlock_vma_pages_range(vma, start, end);
 
diff --git a/mm/mmap.c b/mm/mmap.c
index e3a10b3cc6be..e78cf663e559 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -24,6 +24,7 @@
 #include <linux/init.h>
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/pgsize_migration.h>
 #include <linux/personality.h>
 #include <linux/security.h>
 #include <linux/hugetlb.h>
@@ -1053,6 +1054,8 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
 		return 0;
 	if (!anon_vma_name_eq(anon_vma_name(vma), anon_name))
 		return 0;
+	if (!is_mergable_pad_vma(vma, vm_flags))
+		return 0;
 	return 1;
 }
 
@@ -2778,8 +2781,10 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 		err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
 
 	/* Success. */
-	if (!err)
+	if (!err) {
+		split_pad_vma(vma, new, addr, new_below);
 		return 0;
+	}
 
 	/* Clean everything up if vma_adjust failed. */
 	if (new->vm_ops && new->vm_ops->close)
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ba53529cdd5e..027cf7c10ce4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -17,6 +17,7 @@
 #include <linux/highmem.h>
 #include <linux/security.h>
 #include <linux/mempolicy.h>
+#include <linux/pgsize_migration.h>
 #include <linux/personality.h>
 #include <linux/syscalls.h>
 #include <linux/swap.h>
@@ -490,7 +491,8 @@ success:
 	 * vm_flags and vm_page_prot are protected by the mmap_lock
 	 * held in write mode.
 	 */
-	vma->vm_flags = newflags;
+	vma->vm_flags = vma_pad_fixup_flags(vma, newflags);
+
 	dirty_accountable = vma_wants_writenotify(vma, vma->vm_page_prot);
 	vma_set_page_prot(vma);
 
diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c
index f148918ee8f7..79c5e26aa141 100644
--- a/mm/pgsize_migration.c
+++ b/mm/pgsize_migration.c
@@ -113,6 +113,7 @@ void vma_set_pad_pages(struct vm_area_struct *vma,
 	if (!is_pgsize_migration_enabled())
 		return;
 
+	vma->vm_flags &= ~VM_PAD_MASK;
 	vma->vm_flags |= (nr_pages << VM_PAD_SHIFT);
 }
 
@@ -268,10 +269,10 @@ struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma)
 	pad->vm_start = VMA_PAD_START(pad);
 
 	/* Make the pad vma PROT_NONE */
-	pad->vm_flags = pad->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
+	pad->vm_flags &= ~(VM_READ|VM_WRITE|VM_EXEC);
 
 	/* Remove padding bits */
-	pad->vm_flags = pad->vm_flags & ~VM_PAD_MASK;
+	pad->vm_flags &= ~VM_PAD_MASK;
 
 	return pad;
 }
@@ -324,5 +325,72 @@ void show_map_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *pad,
 	kfree(pad);
 	kfree(vma);
 }
+
+/*
+ * When splitting a padding VMA there are a couple of cases to handle.
+ *
+ * Given:
+ *
+ *     | DDDDPPPP |
+ *
+ * where:
+ *     - D represents 1 page of data;
+ *     - P represents 1 page of padding;
+ *     - | represents the boundaries (start/end) of the VMA
+ *
+ *
+ * 1) Split exactly at the padding boundary
+ *
+ *     | DDDDPPPP | --> | DDDD | PPPP |
+ *
+ *     - Remove padding flags from the first VMA.
+ *     - The second VMA is all padding
+ *
+ * 2) Split within the padding area
+ *
+ *     | DDDDPPPP | --> | DDDDPP | PP |
+ *
+ *     - Subtract the length of the second VMA from the first VMA's padding.
+ *     - The second VMA is all padding, adjust its padding length (flags)
+ *
+ * 3) Split within the data area
+ *
+ *     | DDDDPPPP | --> | DD | DDPPPP |
+ *
+ *     - Remove padding flags from the first VMA.
+ *     - The second VMA is has the same padding as from before the split.
+ */
+void split_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *new,
+		   unsigned long addr, int new_below)
+{
+	unsigned long nr_pad_pages = vma_pad_pages(vma);
+	unsigned long nr_vma2_pages;
+	struct vm_area_struct *first;
+	struct vm_area_struct *second;
+
+	if (!nr_pad_pages)
+		return;
+
+	if (new_below) {
+		first = new;
+		second = vma;
+	} else {
+		first = vma;
+		second = new;
+	}
+
+	nr_vma2_pages = vma_pages(second);
+
+	if (nr_vma2_pages == nr_pad_pages) { 			/* Case 1 */
+		first->vm_flags &= ~VM_PAD_MASK;
+		vma_set_pad_pages(second, nr_pad_pages);
+	} else if (nr_vma2_pages < nr_pad_pages) { 		/* Case 2 */
+		vma_set_pad_pages(first, nr_pad_pages - nr_vma2_pages);
+		vma_set_pad_pages(second, nr_vma2_pages);
+	} else {						/* Case 3 */
+		first->vm_flags &= ~VM_PAD_MASK;
+		vma_set_pad_pages(second, nr_pad_pages);
+	}
+}
 #endif /* PAGE_SIZE == SZ_4K */
 #endif /* CONFIG_64BIT */