From f56ddffe05c0ed9c95dad3accaff1be1cd0d6208 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 12 Jun 2023 13:33:07 -0700 Subject: [PATCH 01/98] ANDROID: block: Use pr_info() instead of printk(KERN_INFO ...) Switch to the modern style of printing kernel messages. Use %u instead of %d to print unsigned integers. The pr_fmt() format is added on top of the file to include __func__ in the pr_info() calls. Bug: 308663717 Bug: 319125789 Change-Id: Iea0a19c8221fe1a2fcd3f26c5ffd0c3b69935eec Signed-off-by: Bart Van Assche [jyescas@google.com: define pr_fmt(fmt) to include __func__ in the output] Signed-off-by: Juan Yescas --- block/blk-settings.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 73a80895e3ae..9e757812ad02 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -2,6 +2,9 @@ /* * Functions related to setting various queue properties from drivers */ + +#define pr_fmt(fmt) "%s: " fmt, __func__ + #include #include #include @@ -127,8 +130,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto if ((max_hw_sectors << 9) < PAGE_SIZE) { max_hw_sectors = 1 << (PAGE_SHIFT - 9); - printk(KERN_INFO "%s: set to minimum %d\n", - __func__, max_hw_sectors); + pr_info("set to minimum %u\n", max_hw_sectors); } max_hw_sectors = round_down(max_hw_sectors, @@ -243,8 +245,7 @@ void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments { if (!max_segments) { max_segments = 1; - printk(KERN_INFO "%s: set to minimum %d\n", - __func__, max_segments); + pr_info("set to minimum %u\n", max_segments); } q->limits.max_segments = max_segments; @@ -280,8 +281,7 @@ void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) { if (max_size < PAGE_SIZE) { max_size = PAGE_SIZE; - printk(KERN_INFO "%s: set to minimum %d\n", - __func__, max_size); + pr_info("set to minimum %u\n", max_size); } /* see blk_queue_virt_boundary() for the explanation */ @@ -701,8 +701,7 @@ void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask) { if (mask < PAGE_SIZE - 1) { mask = PAGE_SIZE - 1; - printk(KERN_INFO "%s: set to minimum %lx\n", - __func__, mask); + pr_info("set to minimum %lx\n", mask); } q->limits.seg_boundary_mask = mask; From 025c278e84315c1b1b7ecb99a2a419c7a949c0cc Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 12 Jun 2023 13:33:08 -0700 Subject: [PATCH 02/98] ANDROID: block: Prepare for supporting sub-page limits Introduce variables that represent the lower configuration bounds. This patch does not change any functionality. Bug: 308663717 Bug: 319125789 Change-Id: Ia39fbe29a0711caba841a7b44d462608216841ec Signed-off-by: Bart Van Assche Signed-off-by: Juan Yescas --- block/blk-settings.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 9e757812ad02..536fa40d1331 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -126,10 +126,11 @@ EXPORT_SYMBOL(blk_queue_bounce_limit); void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) { struct queue_limits *limits = &q->limits; + unsigned int min_max_hw_sectors = PAGE_SIZE >> SECTOR_SHIFT; unsigned int max_sectors; - if ((max_hw_sectors << 9) < PAGE_SIZE) { - max_hw_sectors = 1 << (PAGE_SHIFT - 9); + if (max_hw_sectors < min_max_hw_sectors) { + max_hw_sectors = min_max_hw_sectors; pr_info("set to minimum %u\n", max_hw_sectors); } @@ -279,8 +280,10 @@ EXPORT_SYMBOL_GPL(blk_queue_max_discard_segments); **/ void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) { - if (max_size < PAGE_SIZE) { - max_size = PAGE_SIZE; + unsigned int min_max_segment_size = PAGE_SIZE; + + if (max_size < min_max_segment_size) { + max_size = min_max_segment_size; pr_info("set to minimum %u\n", max_size); } From 3f6018f1b624c48f3a4d29dbe4ad1390901abaf7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 12 Jun 2023 13:33:09 -0700 Subject: [PATCH 03/98] ANDROID: block: Support configuring limits below the page size Allow block drivers to configure the following: * Maximum number of hardware sectors values smaller than PAGE_SIZE >> SECTOR_SHIFT. For PAGE_SIZE = 4096 this means that values below 8 become supported. * A maximum segment size below the page size. This is most useful for page sizes above 4096 bytes. The blk_sub_page_segments static branch will be used in later patches to prevent that performance of block drivers that support segments >= PAGE_SIZE and max_hw_sectors >= PAGE_SIZE >> SECTOR_SHIFT would be affected. This patch may change the behavior of existing block drivers from not working into working. If a block driver calls blk_queue_max_hw_sectors() or blk_queue_max_segment_size(), this is usually done to configure the maximum supported limits. An attempt to configure a limit below what is supported by the block layer causes the block layer to select a larger value. If that value is not supported by the block driver, this may cause other data to be transferred than requested, a kernel crash or other undesirable behavior. Keeps the ABI stable by taking advantage of a hole in the structure! Bug: 308663717 Bug: 319125789 Bug: 324152549 Change-Id: I4a7b605f0f0d82dde0b4703496c7314064f48acb Signed-off-by: Bart Van Assche [jyescas@google.com: disable subpage limits in block/blk-sysfs.c instead block/blk-core.c because the function blk_free_queue() is not defined in 5.15 kernel] Signed-off-by: Juan Yescas --- android/abi_gki_aarch64.stg | 7 +++++ block/blk-settings.c | 60 +++++++++++++++++++++++++++++++++++++ block/blk-sysfs.c | 2 ++ block/blk.h | 9 ++++++ include/linux/blkdev.h | 5 ++++ 5 files changed, 83 insertions(+) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index d6a079dc565b..1ad3e32c0402 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -168781,6 +168781,12 @@ member { type_id: 0xa7c362b0 offset: 1088 } +member { + id: 0x4c1b044f + name: "sub_page_limits" + type_id: 0x6d7f5ff6 + offset: 840 +} member { id: 0xedd64f59 name: "sub_reg_offsets" @@ -227137,6 +227143,7 @@ struct_union { member_id: 0x06473753 member_id: 0x1bdd5453 member_id: 0x26582f94 + member_id: 0x4c1b044f member_id: 0xaf3e33dd member_id: 0x2d081f94 member_id: 0xd671ce1e diff --git a/block/blk-settings.c b/block/blk-settings.c index 536fa40d1331..e416616bfc5a 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -21,6 +21,11 @@ #include "blk.h" #include "blk-wbt.h" +/* Protects blk_nr_sub_page_limit_queues and blk_sub_page_limits changes. */ +static DEFINE_MUTEX(blk_sub_page_limit_lock); +static uint32_t blk_nr_sub_page_limit_queues; +DEFINE_STATIC_KEY_FALSE(blk_sub_page_limits); + void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) { q->rq_timeout = timeout; @@ -60,6 +65,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->misaligned = 0; lim->zoned = BLK_ZONED_NONE; lim->zone_write_granularity = 0; + lim->sub_page_limits = false; } EXPORT_SYMBOL(blk_set_default_limits); @@ -104,6 +110,50 @@ void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce bounce) } EXPORT_SYMBOL(blk_queue_bounce_limit); +/** + * blk_enable_sub_page_limits - enable support for limits below the page size + * @lim: request queue limits for which to enable support of these features. + * + * Enable support for max_segment_size values smaller than PAGE_SIZE and for + * max_hw_sectors values below PAGE_SIZE >> SECTOR_SHIFT. Support for these + * features is not enabled all the time because of the runtime overhead of these + * features. + */ +static void blk_enable_sub_page_limits(struct queue_limits *lim) +{ + if (lim->sub_page_limits) + return; + + lim->sub_page_limits = true; + + mutex_lock(&blk_sub_page_limit_lock); + if (++blk_nr_sub_page_limit_queues == 1) + static_branch_enable(&blk_sub_page_limits); + mutex_unlock(&blk_sub_page_limit_lock); +} + +/** + * blk_disable_sub_page_limits - disable support for limits below the page size + * @lim: request queue limits for which to enable support of these features. + * + * max_segment_size values smaller than PAGE_SIZE and for max_hw_sectors values + * below PAGE_SIZE >> SECTOR_SHIFT. Support for these features is not enabled + * all the time because of the runtime overhead of these features. + */ +void blk_disable_sub_page_limits(struct queue_limits *lim) +{ + if (!lim->sub_page_limits) + return; + + lim->sub_page_limits = false; + + mutex_lock(&blk_sub_page_limit_lock); + WARN_ON_ONCE(blk_nr_sub_page_limit_queues <= 0); + if (--blk_nr_sub_page_limit_queues == 0) + static_branch_disable(&blk_sub_page_limits); + mutex_unlock(&blk_sub_page_limit_lock); +} + /** * blk_queue_max_hw_sectors - set max sectors for a request for this queue * @q: the request queue for the device @@ -129,6 +179,11 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto unsigned int min_max_hw_sectors = PAGE_SIZE >> SECTOR_SHIFT; unsigned int max_sectors; + if (max_hw_sectors < min_max_hw_sectors) { + blk_enable_sub_page_limits(limits); + min_max_hw_sectors = 1; + } + if (max_hw_sectors < min_max_hw_sectors) { max_hw_sectors = min_max_hw_sectors; pr_info("set to minimum %u\n", max_hw_sectors); @@ -282,6 +337,11 @@ void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) { unsigned int min_max_segment_size = PAGE_SIZE; + if (max_size < min_max_segment_size) { + blk_enable_sub_page_limits(&q->limits); + min_max_segment_size = SECTOR_SIZE; + } + if (max_size < min_max_segment_size) { max_size = min_max_segment_size; pr_info("set to minimum %u\n", max_size); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 5a47708ff233..97d31d539ad7 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -811,6 +811,8 @@ static void blk_release_queue(struct kobject *kobj) blk_queue_free_zone_bitmaps(q); + blk_disable_sub_page_limits(&q->limits); + if (queue_is_mq(q)) blk_mq_release(q); diff --git a/block/blk.h b/block/blk.h index f10c848d91a3..fa606c0bb7aa 100644 --- a/block/blk.h +++ b/block/blk.h @@ -16,6 +16,7 @@ #define BLK_MAX_TIMEOUT (5 * HZ) extern struct dentry *blk_debugfs_root; +DECLARE_STATIC_KEY_FALSE(blk_sub_page_limits); struct internal_request_queue { struct request_queue q; @@ -61,6 +62,14 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, gfp_t flags); void blk_free_flush_queue(struct blk_flush_queue *q); +static inline bool blk_queue_sub_page_limits(const struct queue_limits *lim) +{ + return static_branch_unlikely(&blk_sub_page_limits) && + lim->sub_page_limits; +} + +void blk_disable_sub_page_limits(struct queue_limits *q); + void blk_freeze_queue(struct request_queue *q); void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic); void blk_queue_start_drain(struct request_queue *q); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5f771925d439..f8198834edec 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -328,6 +328,11 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; + +#ifndef __GENKSYMS__ + bool sub_page_limits; +#endif + enum blk_zoned_model zoned; ANDROID_KABI_RESERVE(1); From e99e7de8a6e4fb0c1b51ff1abc6f9042a4c7ae74 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 12 Jun 2023 13:33:11 -0700 Subject: [PATCH 04/98] ANDROID: block: Support submitting passthrough requests with small segments If the segment size is smaller than the page size there may be multiple segments per bvec even if a bvec only contains a single page. Hence this patch. Bug: 308663717 Bug: 319125789 Change-Id: I446aab83a2c519cb3c42d5d8ffd814dcc34274d2 Signed-off-by: Bart Van Assche Signed-off-by: Juan Yescas --- block/blk-map.c | 2 +- block/blk.h | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/block/blk-map.c b/block/blk-map.c index c7f71d83eff1..e08e79142d24 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -486,7 +486,7 @@ int blk_rq_append_bio(struct request *rq, struct bio *bio) unsigned int nr_segs = 0; bio_for_each_bvec(bv, bio, iter) - nr_segs++; + nr_segs += blk_segments(&rq->q->limits, bv.bv_len); if (!rq->bio) { blk_rq_bio_prep(rq, bio, nr_segs); diff --git a/block/blk.h b/block/blk.h index fa606c0bb7aa..3f9e259d13f6 100644 --- a/block/blk.h +++ b/block/blk.h @@ -79,6 +79,24 @@ struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, gfp_t gfp_mask); void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs); +/* Number of DMA segments required to transfer @bytes data. */ +static inline unsigned int blk_segments(const struct queue_limits *limits, + unsigned int bytes) +{ + if (!blk_queue_sub_page_limits(limits)) + return 1; + + { + const unsigned int mss = limits->max_segment_size; + + if (bytes <= mss) + return 1; + if (is_power_of_2(mss)) + return round_up(bytes, mss) >> ilog2(mss); + return (bytes + mss - 1) / mss; + } +} + static inline bool biovec_phys_mergeable(struct request_queue *q, struct bio_vec *vec1, struct bio_vec *vec2) { From bed88e7c4f1b6d86e76c64ca8eed6b0ce8a98e0d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 12 Jun 2023 13:33:12 -0700 Subject: [PATCH 05/98] ANDROID: block: Add support for filesystem requests and small segments Add support in the bio splitting code and also in the bio submission code for bios with segments smaller than the page size. Bug: 308663717 Bug: 319125789 Change-Id: Iea511675ad96b0c8255c2b87811ad33c3a02c8fa Signed-off-by: Bart Van Assche [jyescas@google.com: the function bio_may_exceed_limits() does not exists in the 5.15 kernel, so the sub page limit code in block/blk.h was moved to the function __blk_queue_split() in block/blk-merge.c] Signed-off-by: Juan Yescas --- block/blk-merge.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index 1affc5fd35f0..79bfae7ff221 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -265,7 +265,8 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, if (nsegs < max_segs && sectors + (bv.bv_len >> 9) <= max_sectors && bv.bv_offset + bv.bv_len <= PAGE_SIZE) { - nsegs++; + /* single-page bvec optimization */ + nsegs += blk_segments(&q->limits, bv.bv_len); sectors += bv.bv_len >> 9; } else if (bvec_split_segs(q, &bv, &nsegs, §ors, max_segs, max_sectors)) { @@ -333,18 +334,17 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs) break; default: /* - * All drivers must accept single-segments bios that are <= - * PAGE_SIZE. This is a quick and dirty check that relies on - * the fact that bi_io_vec[0] is always valid if a bio has data. - * The check might lead to occasional false negatives when bios - * are cloned, but compared to the performance impact of cloned - * bios themselves the loop below doesn't matter anyway. + * Check whether bio splitting should be performed. This check may + * trigger the bio splitting code even if splitting is not necessary. */ if (!q->limits.chunk_sectors && (*bio)->bi_vcnt == 1 && + (!blk_queue_sub_page_limits(&q->limits) || + (*bio)->bi_io_vec->bv_len <= q->limits.max_segment_size) && ((*bio)->bi_io_vec[0].bv_len + (*bio)->bi_io_vec[0].bv_offset) <= PAGE_SIZE) { - *nr_segs = 1; + *nr_segs = blk_segments(&q->limits, + (*bio)->bi_io_vec[0].bv_len); break; } split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs); @@ -519,7 +519,10 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, __blk_segment_map_sg_merge(q, &bvec, &bvprv, sg)) goto next_bvec; - if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE) + if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE && + (!blk_queue_sub_page_limits(&q->limits) || + bvec.bv_len <= q->limits.max_segment_size)) + /* single-segment bvec optimization */ nsegs += __blk_bvec_map_sg(bvec, sglist, sg); else nsegs += blk_bvec_map_sg(q, &bvec, sglist, sg); From 3ef8e9009c277711444a1ed95cc93e4a45ddb5ef Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 12 Jun 2023 13:33:10 -0700 Subject: [PATCH 06/98] ANDROID: block: Make sub_page_limit_queues available in debugfs This new debugfs attribute makes it easier to verify the code that tracks how many queues require limits below the page size. Bug: 308663717 Bug: 319125789 Change-Id: I855113cd3898f9641c7c3f1b4732bf4990fc7d3d Signed-off-by: Bart Van Assche [jyescas@google.com: Wrap #include "blk-mq-debugfs.h" with #ifndef __GENKSYSM__ to avoid ABI CRC changes.] Signed-off-by: Juan Yescas --- block/blk-core.c | 5 +++++ block/blk-mq-debugfs.c | 9 +++++++++ block/blk-mq-debugfs.h | 6 ++++++ block/blk-settings.c | 8 ++++++++ block/blk.h | 1 + 5 files changed, 29 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 47667004fdf0..195ac33f19f6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -46,7 +46,11 @@ #include #include "blk.h" + #include "blk-mq.h" +#ifndef __GENKSYMS__ +#include "blk-mq-debugfs.h" +#endif #include "blk-mq-sched.h" #include "blk-pm.h" #ifndef __GENKSYMS__ @@ -1786,6 +1790,7 @@ int __init blk_dev_init(void) sizeof(struct internal_request_queue), 0, SLAB_PANIC, NULL); blk_debugfs_root = debugfs_create_dir("block", NULL); + blk_mq_debugfs_init(); return 0; } diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 7023257a133d..5a5cfae86073 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -1013,3 +1013,12 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) debugfs_remove_recursive(hctx->sched_debugfs_dir); hctx->sched_debugfs_dir = NULL; } + +DEFINE_DEBUGFS_ATTRIBUTE(blk_sub_page_limit_queues_fops, + blk_sub_page_limit_queues_get, NULL, "%llu\n"); + +void blk_mq_debugfs_init(void) +{ + debugfs_create_file("sub_page_limit_queues", 0400, blk_debugfs_root, + NULL, &blk_sub_page_limit_queues_fops); +} diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index a68aa6041a10..997cc17a692a 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -15,6 +15,8 @@ struct blk_mq_debugfs_attr { const struct seq_operations *seq_ops; }; +void blk_mq_debugfs_init(void); + int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq); int blk_mq_debugfs_rq_show(struct seq_file *m, void *v); @@ -36,6 +38,10 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos); void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos); void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q); #else +static inline void blk_mq_debugfs_init(void) +{ +} + static inline void blk_mq_debugfs_register(struct request_queue *q) { } diff --git a/block/blk-settings.c b/block/blk-settings.c index e416616bfc5a..932c82e77cf8 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -110,6 +110,14 @@ void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce bounce) } EXPORT_SYMBOL(blk_queue_bounce_limit); +/* For debugfs. */ +int blk_sub_page_limit_queues_get(void *data, u64 *val) +{ + *val = READ_ONCE(blk_nr_sub_page_limit_queues); + + return 0; +} + /** * blk_enable_sub_page_limits - enable support for limits below the page size * @lim: request queue limits for which to enable support of these features. diff --git a/block/blk.h b/block/blk.h index 3f9e259d13f6..ec3713bdfed2 100644 --- a/block/blk.h +++ b/block/blk.h @@ -68,6 +68,7 @@ static inline bool blk_queue_sub_page_limits(const struct queue_limits *lim) lim->sub_page_limits; } +int blk_sub_page_limit_queues_get(void *data, u64 *val); void blk_disable_sub_page_limits(struct queue_limits *q); void blk_freeze_queue(struct request_queue *q); From 0ffd03e67d6dab338fd8c13a916acd1936353925 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 12 Jun 2023 13:33:13 -0700 Subject: [PATCH 07/98] ANDROID: scsi_debug: Support configuring the maximum segment size Add a kernel module parameter for configuring the maximum segment size. This patch enables testing SCSI support for segments smaller than the page size. Bug: 308663717 Bug: 319125789 Change-Id: Ib645dead4e10d4aaf5a1dfc1d064038153737bc1 Signed-off-by: Bart Van Assche Signed-off-by: Juan Yescas --- drivers/scsi/scsi_debug.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index e4f6bb3470e5..c8e4479b2144 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -752,6 +752,7 @@ static int sdebug_host_max_queue; /* per host */ static int sdebug_lowest_aligned = DEF_LOWEST_ALIGNED; static int sdebug_max_luns = DEF_MAX_LUNS; static int sdebug_max_queue = SDEBUG_CANQUEUE; /* per submit queue */ +static unsigned int sdebug_max_segment_size = BLK_MAX_SEGMENT_SIZE; static unsigned int sdebug_medium_error_start = OPT_MEDIUM_ERR_ADDR; static int sdebug_medium_error_count = OPT_MEDIUM_ERR_NUM; static atomic_t retired_max_queue; /* if > 0 then was prior max_queue */ @@ -5775,6 +5776,7 @@ module_param_named(lowest_aligned, sdebug_lowest_aligned, int, S_IRUGO); module_param_named(lun_format, sdebug_lun_am_i, int, S_IRUGO | S_IWUSR); module_param_named(max_luns, sdebug_max_luns, int, S_IRUGO | S_IWUSR); module_param_named(max_queue, sdebug_max_queue, int, S_IRUGO | S_IWUSR); +module_param_named(max_segment_size, sdebug_max_segment_size, uint, S_IRUGO); module_param_named(medium_error_count, sdebug_medium_error_count, int, S_IRUGO | S_IWUSR); module_param_named(medium_error_start, sdebug_medium_error_start, int, @@ -5851,6 +5853,7 @@ MODULE_PARM_DESC(lowest_aligned, "lowest aligned lba (def=0)"); MODULE_PARM_DESC(lun_format, "LUN format: 0->peripheral (def); 1 --> flat address method"); MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)"); MODULE_PARM_DESC(max_queue, "max number of queued commands (1 to max(def))"); +MODULE_PARM_DESC(max_segment_size, "max bytes in a single segment"); MODULE_PARM_DESC(medium_error_count, "count of sectors to return follow on MEDIUM error"); MODULE_PARM_DESC(medium_error_start, "starting sector number to return MEDIUM error"); MODULE_PARM_DESC(ndelay, "response delay in nanoseconds (def=0 -> ignore)"); @@ -7725,6 +7728,7 @@ static int sdebug_driver_probe(struct device *dev) sdebug_driver_template.can_queue = sdebug_max_queue; sdebug_driver_template.cmd_per_lun = sdebug_max_queue; + sdebug_driver_template.max_segment_size = sdebug_max_segment_size; if (!sdebug_clustering) sdebug_driver_template.dma_boundary = PAGE_SIZE - 1; From ff1e211db631c81704bbf0cb1cb2de697b305a4f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 12 Jun 2023 13:33:14 -0700 Subject: [PATCH 08/98] ANDROID: null_blk: Support configuring the maximum segment size Add support for configuring the maximum segment size. Add support for segments smaller than the page size. This patch enables testing segments smaller than the page size with a driver that does not call blk_rq_map_sg(). Bug: 308663717 Bug: 319125789 Change-Id: I74165d83e71201116378c4598c2f9a2ff8c8b623 Signed-off-by: Bart Van Assche Signed-off-by: Juan Yescas --- drivers/block/null_blk/main.c | 18 +++++++++++++++--- drivers/block/null_blk/null_blk.h | 1 + 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 344e3859bb1e..0d16bd6de5b7 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -156,6 +156,10 @@ static int g_max_sectors; module_param_named(max_sectors, g_max_sectors, int, 0444); MODULE_PARM_DESC(max_sectors, "Maximum size of a command (in 512B sectors)"); +static unsigned int g_max_segment_size = BLK_MAX_SEGMENT_SIZE; +module_param_named(max_segment_size, g_max_segment_size, int, 0444); +MODULE_PARM_DESC(max_segment_size, "Maximum size of a segment in bytes"); + static unsigned int nr_devices = 1; module_param(nr_devices, uint, 0444); MODULE_PARM_DESC(nr_devices, "Number of devices to register"); @@ -351,6 +355,7 @@ NULLB_DEVICE_ATTR(home_node, uint, NULL); NULLB_DEVICE_ATTR(queue_mode, uint, NULL); NULLB_DEVICE_ATTR(blocksize, uint, NULL); NULLB_DEVICE_ATTR(max_sectors, uint, NULL); +NULLB_DEVICE_ATTR(max_segment_size, uint, NULL); NULLB_DEVICE_ATTR(irqmode, uint, NULL); NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL); NULLB_DEVICE_ATTR(index, uint, NULL); @@ -470,6 +475,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_queue_mode, &nullb_device_attr_blocksize, &nullb_device_attr_max_sectors, + &nullb_device_attr_max_segment_size, &nullb_device_attr_irqmode, &nullb_device_attr_hw_queue_depth, &nullb_device_attr_index, @@ -541,7 +547,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) static ssize_t memb_group_features_show(struct config_item *item, char *page) { return snprintf(page, PAGE_SIZE, - "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors,virt_boundary\n"); + "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors,max_segment_size,virt_boundary\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -597,6 +603,7 @@ static struct nullb_device *null_alloc_dev(void) dev->queue_mode = g_queue_mode; dev->blocksize = g_bs; dev->max_sectors = g_max_sectors; + dev->max_segment_size = g_max_segment_size; dev->irqmode = g_irqmode; dev->hw_queue_depth = g_hw_queue_depth; dev->blocking = g_blocking; @@ -1135,6 +1142,8 @@ static int null_transfer(struct nullb *nullb, struct page *page, unsigned int valid_len = len; int err = 0; + WARN_ONCE(len > dev->max_segment_size, "%u > %u\n", len, + dev->max_segment_size); if (!is_write) { if (dev->zoned) valid_len = null_zone_valid_read_len(nullb, @@ -1170,7 +1179,8 @@ static int null_handle_rq(struct nullb_cmd *cmd) spin_lock_irq(&nullb->lock); rq_for_each_segment(bvec, rq, iter) { - len = bvec.bv_len; + len = min(bvec.bv_len, nullb->dev->max_segment_size); + bvec.bv_len = len; err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, op_is_write(req_op(rq)), sector, rq->cmd_flags & REQ_FUA); @@ -1197,7 +1207,8 @@ static int null_handle_bio(struct nullb_cmd *cmd) spin_lock_irq(&nullb->lock); bio_for_each_segment(bvec, bio, iter) { - len = bvec.bv_len; + len = min(bvec.bv_len, nullb->dev->max_segment_size); + bvec.bv_len = len; err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, op_is_write(bio_op(bio)), sector, bio->bi_opf & REQ_FUA); @@ -1904,6 +1915,7 @@ static int null_add_dev(struct nullb_device *dev) dev->max_sectors = min_t(unsigned int, dev->max_sectors, BLK_DEF_MAX_SECTORS); blk_queue_max_hw_sectors(nullb->q, dev->max_sectors); + blk_queue_max_segment_size(nullb->q, dev->max_segment_size); if (dev->virt_boundary) blk_queue_virt_boundary(nullb->q, PAGE_SIZE - 1); diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 277571c502d9..430b793c1575 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -88,6 +88,7 @@ struct nullb_device { unsigned int queue_mode; /* block interface */ unsigned int blocksize; /* block size */ unsigned int max_sectors; /* Max sectors per command */ + unsigned int max_segment_size; /* Max size of a single DMA segment. */ unsigned int irqmode; /* IRQ completion handler */ unsigned int hw_queue_depth; /* queue depth */ unsigned int index; /* index of the disk, only valid with a disk */ From 288abb8b19f74fd2f200b5e65c401edbebbb41cb Mon Sep 17 00:00:00 2001 From: Sajid Dalvi Date: Tue, 20 Feb 2024 13:20:17 -0600 Subject: [PATCH 09/98] ANDROID: PCI: dwc: Wait for the link only if it has been started In dw_pcie_host_init() regardless of whether the link has been started or not, the code waits for the link to come up. Even in cases where start_link() is not defined the code ends up spinning in a loop for 1 second. Since in some systems dw_pcie_host_init() gets called during probe, this one second loop for each pcie interface instance ends up extending the boot time. Wait for the link up in only if the start_link() is defined. The patch submitted to the upstream kernel (see link below) was not accepted due to no upstream user. The change here is a simplified version of that patch, which will wait for a link only if start_link ops has been defined. Also, this patch was already applied before in https://r.android.com/2548250 but the functionality was lost after https://lore.kernel.org/all/20220624143428.8334-14-Sergey.Semin@baikalelectronics.ru/ was pulled in from the LTS merge. This patch restores the functionality (of removing the delay) which was lost during the LTS merge. Bug: 315052790 Link: https://lore.kernel.org/all/20240112093006.2832105-1-ajayagarwal@google.com/ Change-Id: I4e8d00f6195062728417e41ddd51072880676920 Signed-off-by: Sajid Dalvi --- drivers/pci/controller/dwc/pcie-designware-host.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index fe0fd88e95eb..f0967cbb48f2 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -411,8 +411,10 @@ int dw_pcie_host_init(struct pcie_port *pp) if (ret) goto err_free_msi; - /* Ignore errors, the link may come up later */ - dw_pcie_wait_for_link(pci); + if (pci->ops && pci->ops->start_link) { + /* Ignore errors, the link may come up later */ + dw_pcie_wait_for_link(pci); + } } bridge->sysdata = pp; From 4403e2517ad8b4738b8de62f94f8ef08cb093a8a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 7 Feb 2024 18:49:51 +0100 Subject: [PATCH 10/98] UPSTREAM: netfilter: nft_set_rbtree: skip end interval element from gc commit 60c0c230c6f046da536d3df8b39a20b9a9fd6af0 upstream. rbtree lazy gc on insert might collect an end interval element that has been just added in this transactions, skip end interval elements that are not yet active. Bug: 325477234 Fixes: f718863aca46 ("netfilter: nft_set_rbtree: fix overlap expiration walk") Cc: stable@vger.kernel.org Reported-by: lonial con Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 1296c110c5a0b45a8fcf58e7d18bc5da61a565cb) Signed-off-by: Lee Jones Change-Id: I42f7bca418d47948292b15ace9f371b81ccd7fe8 --- net/netfilter/nft_set_rbtree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index e34662f4a71e..5bf5572e945c 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -235,7 +235,7 @@ static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, static const struct nft_rbtree_elem * nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, - struct nft_rbtree_elem *rbe, u8 genmask) + struct nft_rbtree_elem *rbe) { struct nft_set *set = (struct nft_set *)__set; struct rb_node *prev = rb_prev(&rbe->node); @@ -254,7 +254,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, while (prev) { rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); if (nft_rbtree_interval_end(rbe_prev) && - nft_set_elem_active(&rbe_prev->ext, genmask)) + nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY)) break; prev = rb_prev(prev); @@ -365,7 +365,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, nft_set_elem_active(&rbe->ext, cur_genmask)) { const struct nft_rbtree_elem *removed_end; - removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask); + removed_end = nft_rbtree_gc_elem(set, priv, rbe); if (IS_ERR(removed_end)) return PTR_ERR(removed_end); From d96725ec1af6c6e58c981e45669160357967ff4c Mon Sep 17 00:00:00 2001 From: Vilas Bhat Date: Fri, 23 Feb 2024 12:27:51 -0800 Subject: [PATCH 11/98] BACKPORT: FROMGIT: PM: runtime: add tracepoint for runtime_status changes Existing runtime PM ftrace events (`rpm_suspend`, `rpm_resume`, `rpm_return_int`) offer limited visibility into the exact timing of device runtime power state transitions, particularly when asynchronous operations are involved. When the `rpm_suspend` or `rpm_resume` functions are invoked with the `RPM_ASYNC` flag, a return value of 0 i.e., success merely indicates that the device power state request has been queued, not that the device has yet transitioned. A new ftrace event, `rpm_status`, is introduced. This event directly logs the `power.runtime_status` value of a device whenever it changes providing granular tracking of runtime power state transitions regardless of synchronous or asynchronous `rpm_suspend` / `rpm_resume` usage. Signed-off-by: Vilas Bhat Signed-off-by: Rafael J. Wysocki Bug: 325508361 (cherry picked from commit 015abee404760249a5c968b9ce29216b94b8ced1 https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next) [vilasbhat: Removed reference to RPM_INVALID from patch as it does not exist in 5.15] Change-Id: Iad7cae74c41b23b430331379c180b5e59bc32c40 Signed-off-by: Vilas Bhat --- drivers/base/power/runtime.c | 1 + include/trace/events/rpm.h | 41 ++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 5824d41a0b74..c94699018b9d 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -93,6 +93,7 @@ static void update_pm_runtime_accounting(struct device *dev) static void __update_runtime_status(struct device *dev, enum rpm_status status) { update_pm_runtime_accounting(dev); + trace_rpm_status(dev, status); dev->power.runtime_status = status; } diff --git a/include/trace/events/rpm.h b/include/trace/events/rpm.h index 3c716214dab1..c5763327ab6c 100644 --- a/include/trace/events/rpm.h +++ b/include/trace/events/rpm.h @@ -101,6 +101,47 @@ TRACE_EVENT(rpm_return_int, __entry->ret) ); +#define RPM_STATUS_STRINGS \ + EM(RPM_ACTIVE, "RPM_ACTIVE") \ + EM(RPM_RESUMING, "RPM_RESUMING") \ + EM(RPM_SUSPENDED, "RPM_SUSPENDED") \ + EMe(RPM_SUSPENDING, "RPM_SUSPENDING") + +/* Enums require being exported to userspace, for user tool parsing. */ +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +RPM_STATUS_STRINGS + +/* + * Now redefine the EM() and EMe() macros to map the enums to the strings that + * will be printed in the output. + */ +#undef EM +#undef EMe +#define EM(a, b) { a, b }, +#define EMe(a, b) { a, b } + +TRACE_EVENT(rpm_status, + TP_PROTO(struct device *dev, enum rpm_status status), + TP_ARGS(dev, status), + + TP_STRUCT__entry( + __string(name, dev_name(dev)) + __field(int, status) + ), + + TP_fast_assign( + __assign_str(name, dev_name(dev)); + __entry->status = status; + ), + + TP_printk("%s status=%s", __get_str(name), + __print_symbolic(__entry->status, RPM_STATUS_STRINGS)) +); + #endif /* _TRACE_RUNTIME_POWER_H */ /* This part must be outside protection */ From f115661832fb68e1b78a3f1be78de0927afe0a88 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 27 Feb 2024 17:57:16 +0000 Subject: [PATCH 12/98] Revert "interconnect: Teach lockdep about icc_bw_lock order" This reverts commit e3a29b80e9e6df217dd61c670ac42864fa4a0e67 which is commit 13619170303878e1dae86d9a58b039475c957fcf upstream. It is reported to cause crashes, so revert it for now. Bug: 326555421 Signed-off-by: Greg Kroah-Hartman Change-Id: I3dd7744a9b706a959cbed3a793be668147dfcb9a --- drivers/interconnect/core.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index b7c41bd7409c..1ea0d43ca3ae 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -1136,21 +1136,13 @@ void icc_sync_state(struct device *dev) } } } - mutex_unlock(&icc_bw_lock); mutex_unlock(&icc_lock); } EXPORT_SYMBOL_GPL(icc_sync_state); static int __init icc_init(void) { - struct device_node *root; - - /* Teach lockdep about lock ordering wrt. shrinker: */ - fs_reclaim_acquire(GFP_KERNEL); - might_lock(&icc_bw_lock); - fs_reclaim_release(GFP_KERNEL); - - root = of_find_node_by_path("/"); + struct device_node *root = of_find_node_by_path("/"); providers_count = of_count_icc_providers(root); of_node_put(root); From b74b4cbe62eda8dc8bd844c0af41dddd3890aa4d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 27 Feb 2024 17:58:16 +0000 Subject: [PATCH 13/98] Revert "interconnect: Fix locking for runpm vs reclaim" This reverts commit 9be2957f014d91088db1eb5dd09d9a03d7184dce which is commit af42269c3523492d71ebbe11fefae2653e9cdc78 upstream. It is reported to cause crashes, so revert it for now. Bug: 326555421 Change-Id: I2fb3626c306e0444f4e0eb42a95488e688942ba9 Signed-off-by: Greg Kroah-Hartman --- drivers/interconnect/core.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 1ea0d43ca3ae..f93678096326 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -30,7 +30,6 @@ static LIST_HEAD(icc_providers); static int providers_count; static bool synced_state; static DEFINE_MUTEX(icc_lock); -static DEFINE_MUTEX(icc_bw_lock); static struct dentry *icc_debugfs_dir; static void icc_summary_show_one(struct seq_file *s, struct icc_node *n) @@ -634,7 +633,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) if (WARN_ON(IS_ERR(path) || !path->num_nodes)) return -EINVAL; - mutex_lock(&icc_bw_lock); + mutex_lock(&icc_lock); old_avg = path->reqs[0].avg_bw; old_peak = path->reqs[0].peak_bw; @@ -666,7 +665,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) apply_constraints(path); } - mutex_unlock(&icc_bw_lock); + mutex_unlock(&icc_lock); trace_icc_set_bw_end(path, ret); @@ -969,7 +968,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider) return; mutex_lock(&icc_lock); - mutex_lock(&icc_bw_lock); node->provider = provider; list_add_tail(&node->node_list, &provider->nodes); @@ -995,7 +993,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider) node->avg_bw = 0; node->peak_bw = 0; - mutex_unlock(&icc_bw_lock); mutex_unlock(&icc_lock); } EXPORT_SYMBOL_GPL(icc_node_add); @@ -1123,7 +1120,6 @@ void icc_sync_state(struct device *dev) return; mutex_lock(&icc_lock); - mutex_lock(&icc_bw_lock); synced_state = true; list_for_each_entry(p, &icc_providers, provider_list) { dev_dbg(p->dev, "interconnect provider is in synced state\n"); From 9cef46f39ee92235f328de3bc65c1b765a200613 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 29 Feb 2024 22:35:14 +0000 Subject: [PATCH 14/98] ANDROID: remove LTO check from build.config.gki.aarch64.fips140 Don't check the "LTO" variable in build.config.gki.aarch64.fips140, since fips140.ko no longer depends on LTO. Also, Kleaf doesn't set the "LTO" variable anyway; it was specific to build.sh. Bug: 188620248 Change-Id: I213faa4c8c1a23898c08da121b0a5dc602b7218a Signed-off-by: Eric Biggers --- build.config.gki.aarch64.fips140 | 5 ----- 1 file changed, 5 deletions(-) diff --git a/build.config.gki.aarch64.fips140 b/build.config.gki.aarch64.fips140 index ec493efc20cf..031695579fc8 100644 --- a/build.config.gki.aarch64.fips140 +++ b/build.config.gki.aarch64.fips140 @@ -10,11 +10,6 @@ MAKE_GOALS=" modules " -if [ "${LTO}" = "none" ]; then - echo "The FIPS140 module needs LTO to be enabled." - exit 1 -fi - MODULES_ORDER=android/gki_aarch64_fips140_modules KERNEL_DIR=common From ee9964b308a6ee11d1cf5ee778dc91030f084489 Mon Sep 17 00:00:00 2001 From: Lokesh Gidra Date: Tue, 16 Jan 2024 20:20:25 +0000 Subject: [PATCH 15/98] ANDROID: userfaultfd: allow SPF for UFFD_FEATURE_SIGBUS on private+anon Currently we bail out of speculative page fault when we detect that the fault address is in a userfaultfd registered vma. However, if userfaultfd is being used with UFFD_FEATURE_SIGBUS feature, then handle_userfault() doesn't do much and is easiest to handle with SPF. This patch lets MISSING userfaultfs on private anonymous mappings be allowed with SPF if UFFD_FEATURE_SIGBUS is used. With this patch we get >99% success rate for userfaults caused during userfaultfd GC's compaction phase. This translates into eliminating uninterruptible sleep time in do_page_fault() due to userfaults. ABI breakage note: 'userfaultfd_ctx' struct, which has been modified in this CL, is private and hence cannot cause real breakage. Bug: 324640390 Bug: 320478828 Signed-off-by: Lokesh Gidra Change-Id: Ic7fde0fde03602b35179bc0cf891ddbbc434190f --- fs/userfaultfd.c | 96 ++++++++++++++++++++++++++--------- include/linux/mm_types.h | 2 +- include/linux/userfaultfd_k.h | 12 ++++- mm/memory.c | 21 +++++++- mm/userfaultfd.c | 2 +- 5 files changed, 104 insertions(+), 29 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 232861e8aad6..b4c24753ec19 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -71,6 +71,7 @@ struct userfaultfd_ctx { atomic_t mmap_changing; /* mm with one ore more vmas attached to this userfaultfd_ctx */ struct mm_struct *mm; + struct rcu_head rcu_head; }; struct userfaultfd_fork_ctx { @@ -156,6 +157,13 @@ static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx) refcount_inc(&ctx->refcount); } +static void __free_userfaultfd_ctx(struct rcu_head *head) +{ + struct userfaultfd_ctx *ctx = container_of(head, struct userfaultfd_ctx, + rcu_head); + kmem_cache_free(userfaultfd_ctx_cachep, ctx); +} + /** * userfaultfd_ctx_put - Releases a reference to the internal userfaultfd * context. @@ -176,7 +184,7 @@ static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx) VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock)); VM_BUG_ON(waitqueue_active(&ctx->fd_wqh)); mmdrop(ctx->mm); - kmem_cache_free(userfaultfd_ctx_cachep, ctx); + call_rcu(&ctx->rcu_head, __free_userfaultfd_ctx); } } @@ -350,6 +358,24 @@ static inline unsigned int userfaultfd_get_blocking_state(unsigned int flags) return TASK_UNINTERRUPTIBLE; } +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT +bool userfaultfd_using_sigbus(struct vm_area_struct *vma) +{ + struct userfaultfd_ctx *ctx; + bool ret; + + /* + * Do it inside RCU section to ensure that the ctx doesn't + * disappear under us. + */ + rcu_read_lock(); + ctx = rcu_dereference(vma->vm_userfaultfd_ctx.ctx); + ret = ctx && (ctx->features & UFFD_FEATURE_SIGBUS); + rcu_read_unlock(); + return ret; +} +#endif + /* * The locking rules involved in returning VM_FAULT_RETRY depending on * FAULT_FLAG_ALLOW_RETRY, FAULT_FLAG_RETRY_NOWAIT and @@ -394,7 +420,8 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) */ mmap_assert_locked(mm); - ctx = vmf->vma->vm_userfaultfd_ctx.ctx; + ctx = rcu_dereference_protected(vmf->vma->vm_userfaultfd_ctx.ctx, + lockdep_is_held(&mm->mmap_lock)); if (!ctx) goto out; @@ -611,8 +638,10 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, /* the various vma->vm_userfaultfd_ctx still points to it */ mmap_write_lock(mm); for (vma = mm->mmap; vma; vma = vma->vm_next) - if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { - vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + if (rcu_access_pointer(vma->vm_userfaultfd_ctx.ctx) == + release_new_ctx) { + rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, + NULL); vma->vm_flags &= ~__VM_UFFD_FLAGS; } mmap_write_unlock(mm); @@ -643,9 +672,12 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) struct userfaultfd_ctx *ctx = NULL, *octx; struct userfaultfd_fork_ctx *fctx; - octx = vma->vm_userfaultfd_ctx.ctx; + octx = rcu_dereference_protected( + vma->vm_userfaultfd_ctx.ctx, + lockdep_is_held(&vma->vm_mm->mmap_lock)); + if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) { - vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, NULL); vma->vm_flags &= ~__VM_UFFD_FLAGS; return 0; } @@ -682,7 +714,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) list_add_tail(&fctx->list, fcs); } - vma->vm_userfaultfd_ctx.ctx = ctx; + rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, ctx); return 0; } @@ -715,7 +747,8 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma, { struct userfaultfd_ctx *ctx; - ctx = vma->vm_userfaultfd_ctx.ctx; + ctx = rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx, + lockdep_is_held(&vma->vm_mm->mmap_lock)); if (!ctx) return; @@ -726,7 +759,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma, atomic_inc(&ctx->mmap_changing); } else { /* Drop uffd context if remap feature not enabled */ - vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, NULL); vma->vm_flags &= ~__VM_UFFD_FLAGS; } } @@ -763,7 +796,8 @@ bool userfaultfd_remove(struct vm_area_struct *vma, struct userfaultfd_ctx *ctx; struct userfaultfd_wait_queue ewq; - ctx = vma->vm_userfaultfd_ctx.ctx; + ctx = rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx, + lockdep_is_held(&mm->mmap_lock)); if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE)) return true; @@ -801,7 +835,9 @@ int userfaultfd_unmap_prep(struct vm_area_struct *vma, { for ( ; vma && vma->vm_start < end; vma = vma->vm_next) { struct userfaultfd_unmap_ctx *unmap_ctx; - struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; + struct userfaultfd_ctx *ctx = + rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx, + lockdep_is_held(&vma->vm_mm->mmap_lock)); if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) || has_unmap_ctx(ctx, unmaps, start, end)) @@ -866,10 +902,13 @@ static int userfaultfd_release(struct inode *inode, struct file *file) mmap_write_lock(mm); prev = NULL; for (vma = mm->mmap; vma; vma = vma->vm_next) { + struct userfaultfd_ctx *cur_uffd_ctx = + rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx, + lockdep_is_held(&mm->mmap_lock)); cond_resched(); - BUG_ON(!!vma->vm_userfaultfd_ctx.ctx ^ + BUG_ON(!!cur_uffd_ctx ^ !!(vma->vm_flags & __VM_UFFD_FLAGS)); - if (vma->vm_userfaultfd_ctx.ctx != ctx) { + if (cur_uffd_ctx != ctx) { prev = vma; continue; } @@ -884,7 +923,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) else prev = vma; vma->vm_flags = new_flags; - vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, NULL); } mmap_write_unlock(mm); mmput(mm); @@ -1350,9 +1389,12 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, found = false; basic_ioctls = false; for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) { + struct userfaultfd_ctx *cur_uffd_ctx = + rcu_dereference_protected(cur->vm_userfaultfd_ctx.ctx, + lockdep_is_held(&mm->mmap_lock)); cond_resched(); - BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^ + BUG_ON(!!cur_uffd_ctx ^ !!(cur->vm_flags & __VM_UFFD_FLAGS)); /* check not compatible vmas */ @@ -1395,8 +1437,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, * wouldn't know which one to deliver the userfaults to. */ ret = -EBUSY; - if (cur->vm_userfaultfd_ctx.ctx && - cur->vm_userfaultfd_ctx.ctx != ctx) + if (cur_uffd_ctx && cur_uffd_ctx != ctx) goto out_unlock; /* @@ -1414,18 +1455,20 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, ret = 0; do { + struct userfaultfd_ctx *cur_uffd_ctx = + rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx, + lockdep_is_held(&mm->mmap_lock)); cond_resched(); BUG_ON(!vma_can_userfault(vma, vm_flags)); - BUG_ON(vma->vm_userfaultfd_ctx.ctx && - vma->vm_userfaultfd_ctx.ctx != ctx); + BUG_ON(cur_uffd_ctx && cur_uffd_ctx != ctx); WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); /* * Nothing to do: this vma is already registered into this * userfaultfd and with the right tracking mode too. */ - if (vma->vm_userfaultfd_ctx.ctx == ctx && + if (cur_uffd_ctx == ctx && (vma->vm_flags & vm_flags) == vm_flags) goto skip; @@ -1460,7 +1503,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, * the current one has not been updated yet. */ vma->vm_flags = new_flags; - vma->vm_userfaultfd_ctx.ctx = ctx; + rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, ctx); if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma)) hugetlb_unshare_all_pmds(vma); @@ -1559,7 +1602,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) { cond_resched(); - BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^ + BUG_ON(!!rcu_access_pointer(cur->vm_userfaultfd_ctx.ctx) ^ !!(cur->vm_flags & __VM_UFFD_FLAGS)); /* @@ -1581,6 +1624,9 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, ret = 0; do { + struct userfaultfd_ctx *cur_uffd_ctx = + rcu_dereference_protected(vma->vm_userfaultfd_ctx.ctx, + lockdep_is_held(&mm->mmap_lock)); cond_resched(); BUG_ON(!vma_can_userfault(vma, vma->vm_flags)); @@ -1589,7 +1635,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, * Nothing to do: this vma is already registered into this * userfaultfd and with the right tracking mode too. */ - if (!vma->vm_userfaultfd_ctx.ctx) + if (!cur_uffd_ctx) goto skip; WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); @@ -1608,7 +1654,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, struct userfaultfd_wake_range range; range.start = start; range.len = vma_end - start; - wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range); + wake_userfault(cur_uffd_ctx, &range); } new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS; @@ -1637,7 +1683,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, * the current one has not been updated yet. */ vma->vm_flags = new_flags; - vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + rcu_assign_pointer(vma->vm_userfaultfd_ctx.ctx, NULL); skip: prev = vma; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 42786e6364ef..076bb5eb99f2 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -311,7 +311,7 @@ struct vm_region { #ifdef CONFIG_USERFAULTFD #define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) { NULL, }) struct vm_userfaultfd_ctx { - struct userfaultfd_ctx *ctx; + struct userfaultfd_ctx __rcu *ctx; }; #else /* CONFIG_USERFAULTFD */ #define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) {}) diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 33cea484d1ad..8ea2827a4eba 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -36,6 +36,9 @@ extern int sysctl_unprivileged_userfaultfd; extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT +extern bool userfaultfd_using_sigbus(struct vm_area_struct *vma); +#endif /* * The mode of operation for __mcopy_atomic and its helpers. @@ -75,7 +78,7 @@ extern int mwriteprotect_range(struct mm_struct *dst_mm, static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, struct vm_userfaultfd_ctx vm_ctx) { - return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx; + return rcu_access_pointer(vma->vm_userfaultfd_ctx.ctx) == vm_ctx.ctx; } /* @@ -154,6 +157,13 @@ static inline vm_fault_t handle_userfault(struct vm_fault *vmf, return VM_FAULT_SIGBUS; } +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT +static inline bool userfaultfd_using_sigbus(struct vm_area_struct *vma) +{ + return false; +} +#endif + static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, struct vm_userfaultfd_ctx vm_ctx) { diff --git a/mm/memory.c b/mm/memory.c index 54e45571139e..8b10ef4c5d9d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4939,6 +4939,17 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, pgd_t pgdval; p4d_t p4dval; pud_t pudval; + bool uffd_missing_sigbus = false; + +#ifdef CONFIG_USERFAULTFD + /* + * Only support SPF for SIGBUS+MISSING userfaults in private + * anonymous VMAs. + */ + uffd_missing_sigbus = vma_is_anonymous(vma) && + (vma->vm_flags & VM_UFFD_MISSING) && + userfaultfd_using_sigbus(vma); +#endif vmf.seq = seq; @@ -5018,11 +5029,19 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, speculative_page_walk_end(); + if (!vmf.pte && uffd_missing_sigbus) + return VM_FAULT_SIGBUS; + return handle_pte_fault(&vmf); spf_fail: speculative_page_walk_end(); - return VM_FAULT_RETRY; + /* + * Failing page-table walk is similar to page-missing so give an + * opportunity to SIGBUS+MISSING userfault to handle it before + * retrying with mmap_lock + */ + return uffd_missing_sigbus ? VM_FAULT_SIGBUS : VM_FAULT_RETRY; } #endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index caa13abe0c56..d4175821dd29 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -42,7 +42,7 @@ struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm, * enforce the VM_MAYWRITE check done at uffd registration * time. */ - if (!dst_vma->vm_userfaultfd_ctx.ctx) + if (!rcu_access_pointer(dst_vma->vm_userfaultfd_ctx.ctx)) return NULL; return dst_vma; From 0d0784d6b2c1c583844613c04b8baa8e82f29c9b Mon Sep 17 00:00:00 2001 From: Lokesh Gidra Date: Fri, 9 Feb 2024 23:16:00 +0000 Subject: [PATCH 16/98] ANDROID: Update ABI for userfaultfd_ctx The struct is not public so shouldn't cause real ABI breakage. Bug: 320478828 Bug: 324640390 Change-Id: I724ca4c00bae09bc311d6495383cfd3a77592d7a Signed-off-by: Lokesh Gidra --- android/abi_gki_aarch64.stg | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 1ad3e32c0402..7b680d9ccfa3 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -52829,6 +52829,12 @@ member { type_id: 0x18bd6530 offset: 512 } +member { + id: 0x56a72143 + name: "callback_head" + type_id: 0xe3222f5b + offset: 1024 +} member { id: 0x56a7224a name: "callback_head" @@ -241566,7 +241572,7 @@ struct_union { kind: STRUCT name: "userfaultfd_ctx" definition { - bytesize: 128 + bytesize: 144 member_id: 0x3162bad8 member_id: 0x05e73814 member_id: 0xc409a485 @@ -241578,6 +241584,7 @@ struct_union { member_id: 0x79d263fe member_id: 0xdd180b6b member_id: 0x3025dd18 + member_id: 0x56a72143 } } struct_union { From 2390d5886287ebbf12d7735d4e5ef2fc4a6f2126 Mon Sep 17 00:00:00 2001 From: Ben Fennema Date: Tue, 5 Mar 2024 16:18:31 -0800 Subject: [PATCH 17/98] ANDROID: GKI: Update the ABI symbol list Update the pixel_watch symbol list. 3 function symbol(s) added 'void gic_resume()' 'ssize_t mipi_dsi_dcs_write(struct mipi_dsi_device*, u8, const void*, size_t)' 'int snd_soc_get_dai_name(const struct of_phandle_args*, const char**)' Bug: 327650099 Change-Id: I7a7efaa91f1a37f44d3e950af4ec9947fb349acc Signed-off-by: Ben Fennema --- android/abi_gki_aarch64.stg | 44 +++++++++++++++++++++++++++++ android/abi_gki_aarch64_pixel_watch | 16 +++++++++++ 2 files changed, 60 insertions(+) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 7b680d9ccfa3..68e2b49e33af 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -265963,6 +265963,14 @@ function { parameter_id: 0x11cffa09 parameter_id: 0x4585663f } +function { + id: 0x16019d86 + return_type_id: 0xd5cc9c9a + parameter_id: 0x09626b7f + parameter_id: 0x295c7202 + parameter_id: 0x391f15ea + parameter_id: 0xf435685e +} function { id: 0x1603f977 return_type_id: 0x48b5725f @@ -280952,6 +280960,12 @@ function { parameter_id: 0x391f15ea parameter_id: 0xf435685e } +function { + id: 0x9294d8c1 + return_type_id: 0x6720d32f + parameter_id: 0x3c01aef6 + parameter_id: 0x051414e1 +} function { id: 0x929694e1 return_type_id: 0x6720d32f @@ -329598,6 +329612,15 @@ elf_symbol { type_id: 0x8e47c273 full_name: "gic_nonsecure_priorities" } +elf_symbol { + id: 0x5ddec58b + name: "gic_resume" + is_defined: true + symbol_type: FUNCTION + crc: 0x14da596c + type_id: 0x10985193 + full_name: "gic_resume" +} elf_symbol { id: 0x596931c4 name: "gic_v3_cpu_init" @@ -336689,6 +336712,15 @@ elf_symbol { type_id: 0x9fd879f0 full_name: "mipi_dsi_dcs_set_tear_off" } +elf_symbol { + id: 0x0d70d901 + name: "mipi_dsi_dcs_write" + is_defined: true + symbol_type: FUNCTION + crc: 0xb1d4f105 + type_id: 0x16019d86 + full_name: "mipi_dsi_dcs_write" +} elf_symbol { id: 0x9d1a3913 name: "mipi_dsi_dcs_write_buffer" @@ -351577,6 +351609,15 @@ elf_symbol { type_id: 0x6eef99a2 full_name: "snd_soc_find_dai_with_mutex" } +elf_symbol { + id: 0x4086fab0 + name: "snd_soc_get_dai_name" + is_defined: true + symbol_type: FUNCTION + crc: 0x0082f713 + type_id: 0x9294d8c1 + full_name: "snd_soc_get_dai_name" +} elf_symbol { id: 0x33a917a0 name: "snd_soc_get_enum_double" @@ -365140,6 +365181,7 @@ interface { symbol_id: 0xe87161bc symbol_id: 0x112db471 symbol_id: 0x5a582da8 + symbol_id: 0x5ddec58b symbol_id: 0x596931c4 symbol_id: 0x390427e4 symbol_id: 0x53ba530f @@ -365926,6 +365968,7 @@ interface { symbol_id: 0x6ee51cd8 symbol_id: 0xf096de3c symbol_id: 0x4512217f + symbol_id: 0x0d70d901 symbol_id: 0x9d1a3913 symbol_id: 0xfdf03f19 symbol_id: 0xb0f04be5 @@ -367579,6 +367622,7 @@ interface { symbol_id: 0x687a68d3 symbol_id: 0x97843792 symbol_id: 0xc88e38bb + symbol_id: 0x4086fab0 symbol_id: 0x33a917a0 symbol_id: 0x4c3ba1e9 symbol_id: 0x05ad614a diff --git a/android/abi_gki_aarch64_pixel_watch b/android/abi_gki_aarch64_pixel_watch index 8455f206c1c5..f60a3092e140 100644 --- a/android/abi_gki_aarch64_pixel_watch +++ b/android/abi_gki_aarch64_pixel_watch @@ -844,6 +844,10 @@ get_user_pages get_zeroed_page gic_nonsecure_priorities + gic_resume + gic_v3_cpu_init + gic_v3_dist_init + gic_v3_dist_wait_for_rwp gov_attr_set_init gov_attr_set_put governor_sysfs_ops @@ -1220,6 +1224,7 @@ mipi_dsi_create_packet mipi_dsi_dcs_set_display_brightness mipi_dsi_dcs_set_tear_off + mipi_dsi_dcs_write mipi_dsi_host_register mipi_dsi_host_unregister misc_deregister @@ -1333,6 +1338,7 @@ ns_capable nsecs_to_jiffies ns_to_timespec64 + __num_online_cpus nvmem_cell_get nvmem_cell_put nvmem_cell_read @@ -1734,6 +1740,7 @@ rpmsg_get_signals rpmsg_poll rpmsg_register_device + rpmsg_register_device_override rpmsg_rx_done rpmsg_send rpmsg_set_signals @@ -1939,6 +1946,7 @@ snd_soc_dapm_put_enum_double snd_soc_dapm_put_volsw snd_soc_dapm_sync + snd_soc_get_dai_name snd_soc_get_enum_double snd_soc_get_pcm_runtime snd_soc_get_volsw @@ -2167,12 +2175,16 @@ __traceiter_android_vh_binder_wakeup_ilocked __traceiter_android_vh_cpu_idle_enter __traceiter_android_vh_cpu_idle_exit + __traceiter_android_vh_cpuidle_psci_enter + __traceiter_android_vh_cpuidle_psci_exit __traceiter_android_vh_disable_thermal_cooling_stats __traceiter_android_vh_ftrace_dump_buffer __traceiter_android_vh_ftrace_format_check __traceiter_android_vh_ftrace_oops_enter __traceiter_android_vh_ftrace_oops_exit __traceiter_android_vh_ftrace_size_check + __traceiter_android_vh_gic_resume + __traceiter_android_vh_gic_v3_suspend __traceiter_android_vh_ipi_stop __traceiter_android_vh_jiffies_update __traceiter_android_vh_mmc_sdio_pm_flag_set @@ -2242,12 +2254,16 @@ __tracepoint_android_vh_binder_wakeup_ilocked __tracepoint_android_vh_cpu_idle_enter __tracepoint_android_vh_cpu_idle_exit + __tracepoint_android_vh_cpuidle_psci_enter + __tracepoint_android_vh_cpuidle_psci_exit __tracepoint_android_vh_disable_thermal_cooling_stats __tracepoint_android_vh_ftrace_dump_buffer __tracepoint_android_vh_ftrace_format_check __tracepoint_android_vh_ftrace_oops_enter __tracepoint_android_vh_ftrace_oops_exit __tracepoint_android_vh_ftrace_size_check + __tracepoint_android_vh_gic_resume + __tracepoint_android_vh_gic_v3_suspend __tracepoint_android_vh_ipi_stop __tracepoint_android_vh_jiffies_update __tracepoint_android_vh_mmc_sdio_pm_flag_set From 282bfc6c30c060ffe4cf34052aa90d43d6bc07ae Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Tue, 27 Feb 2024 21:50:35 +0000 Subject: [PATCH 18/98] UPSTREAM: Revert "usb: typec: class: fix typec_altmode_put_partner to put plugs" This reverts commit b17b7fe6dd5c6ff74b38b0758ca799cdbb79e26e. That commit messed up the reference counting, so it needs to be rethought. Fixes: b17b7fe6dd5c ("usb: typec: class: fix typec_altmode_put_partner to put plugs") Cc: stable@vger.kernel.org Cc: RD Babiera Reported-by: Chris Bainbridge Closes: https://lore.kernel.org/lkml/CAP-bSRb3SXpgo_BEdqZB-p1K5625fMegRZ17ZkPE1J8ZYgEHDg@mail.gmail.com/ Signed-off-by: Heikki Krogerus Bug: 324496488 (cherry picked from commit 9c6b789e954fae73c548f39332bcc56bdf0d4373) Signed-off-by: RD Babiera (cherry picked from https://android-review.googlesource.com/q/commit:27167a6e39a6894d905ea97aece1aa9f0120f452) Merged-In: I2755a5e44dd1970d60e5d996dd7fc6d88f79684a Change-Id: I2755a5e44dd1970d60e5d996dd7fc6d88f79684a --- drivers/usb/typec/class.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index ea86195c75b8..339752fef65e 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -265,7 +265,7 @@ static void typec_altmode_put_partner(struct altmode *altmode) if (!partner) return; - adev = &altmode->adev; + adev = &partner->adev; if (is_typec_plug(adev->dev.parent)) { struct typec_plug *plug = to_typec_plug(adev->dev.parent); @@ -495,8 +495,7 @@ static void typec_altmode_release(struct device *dev) { struct altmode *alt = to_altmode(to_typec_altmode(dev)); - if (!is_typec_port(dev->parent)) - typec_altmode_put_partner(alt); + typec_altmode_put_partner(alt); altmode_id_remove(alt->adev.dev.parent, alt->id); kfree(alt); From c3b70e94f10cbac844ae01538d785d971476ef1a Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Tue, 27 Feb 2024 21:59:34 +0000 Subject: [PATCH 19/98] UPSTREAM: usb: typec: class: fix typec_altmode_put_partner to put plugs usb: typec: class: fix typec_altmode_put_partner to put plugs When typec_altmode_put_partner is called by a plug altmode upon release, the port altmode the plug belongs to will not remove its reference to the plug. The check to see if the altmode being released is a plug evaluates against the released altmode's partner instead of the calling altmode, so change adev in typec_altmode_put_partner to properly refer to the altmode being released. Because typec_altmode_set_partner calls get_device() on the port altmode, add partner_adev that points to the port altmode in typec_put_partner to call put_device() on. typec_altmode_set_partner is not called for port altmodes, so add a check in typec_altmode_release to prevent typec_altmode_put_partner() calls on port altmode release. Fixes: 8a37d87d72f0 ("usb: typec: Bus type for alternate modes") Cc: Co-developed-by: Christian A. Ehrhardt Signed-off-by: Christian A. Ehrhardt Signed-off-by: RD Babiera Tested-by: Christian A. Ehrhardt Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240103181754.2492492-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman Bug: 324496488 (cherry picked from commit 5962ded777d689cd8bf04454273e32228d7fb71f) Signed-off-by: RD Babiera (cherry picked from https://android-review.googlesource.com/q/commit:fe6e5059f610845f3373a734d228148a660ca44e) Merged-In: I688a333753b2e8069c0437a6d9dea30bf029c92c Change-Id: I688a333753b2e8069c0437a6d9dea30bf029c92c --- drivers/usb/typec/class.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 339752fef65e..173d86d120da 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -261,11 +261,13 @@ static void typec_altmode_put_partner(struct altmode *altmode) { struct altmode *partner = altmode->partner; struct typec_altmode *adev; + struct typec_altmode *partner_adev; if (!partner) return; - adev = &partner->adev; + adev = &altmode->adev; + partner_adev = &partner->adev; if (is_typec_plug(adev->dev.parent)) { struct typec_plug *plug = to_typec_plug(adev->dev.parent); @@ -274,7 +276,7 @@ static void typec_altmode_put_partner(struct altmode *altmode) } else { partner->partner = NULL; } - put_device(&adev->dev); + put_device(&partner_adev->dev); } /** @@ -495,7 +497,8 @@ static void typec_altmode_release(struct device *dev) { struct altmode *alt = to_altmode(to_typec_altmode(dev)); - typec_altmode_put_partner(alt); + if (!is_typec_port(dev->parent)) + typec_altmode_put_partner(alt); altmode_id_remove(alt->adev.dev.parent, alt->id); kfree(alt); From 5aed5c34359c94f9dd1d5c815485cf8bdbb8a645 Mon Sep 17 00:00:00 2001 From: lipeifeng Date: Tue, 20 Feb 2024 19:01:27 +0800 Subject: [PATCH 20/98] ANDROID: uid_sys_stat: fix data-error of cputime and io 'commit b6115e14010 ("ANDROID: uid_sys_stat: split the global lock uid_lock to the fine-grained locks for each hlist in hash_table.")' The above patch split the global lock to per-uid lock to reduce lock competition. But result in data-error from uid_cputime_show and uid_io_show in some cases. E.g, if thread1 and thread2 read /proc/uid_cputime/show_uid_stat at the same time, thread2 maybe operate in partA and zero active_stime and active_utime of uid_entry when thread1 is between partB and partC, which would cause thread1 show the error data. static int uid_cputime_show(struct seq_file *m, void *v) { ... /*partA*/ for (bkt = 0, uid_entry = NULL; uid_entry == NULL && bkt < HASH_SIZE(hash_table); bkt++) { lock_uid_by_bkt(bkt); hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) { uid_entry->active_stime = 0; uid_entry->active_utime = 0; } unlock_uid_by_bkt(bkt); } rcu_read_lock(); /* partB */ do_each_thread(temp, task) { ... lock_uid(uid); if (!(task->flags & PF_EXITING)) { task_cputime_adjusted(task, &utime, &stime); uid_entry->active_utime += utime; uid_entry->active_stime += stime; } unlock_uid(uid); } while_each_thread(temp, task); rcu_read_unlock(); for (bkt = 0, uid_entry = NULL; uid_entry == NULL && bkt < HASH_SIZE(hash_table); bkt++) { lock_uid_by_bkt(bkt); hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) { u64 total_utime = uid_entry->utime + uid_entry->active_utime; u64 total_stime = uid_entry->stime + uid_entry->active_stime; /* partC */ seq_printf(m, "%d: %llu %llu\n", uid_entry->uid, ktime_to_us(total_utime), ktime_to_us(total_stime)); } unlock_uid_by_bkt(bkt); } The patch ensures that the calculation and seq_printf of each uid_entry is within the uid_lock range, in order to accurate data. Bug: 278138377 Change-Id: Iaa2ccd95c4b4b333f04b2ba18d7699d94017394e Signed-off-by: lipeifeng (cherry picked from commit ea35d2bd073214e84be242287a2e91741c6588ed) --- drivers/misc/uid_sys_stats.c | 216 ++++++++++++----------------------- 1 file changed, 72 insertions(+), 144 deletions(-) diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c index ff6bc1d6fc45..a78be7fb05ff 100644 --- a/drivers/misc/uid_sys_stats.c +++ b/drivers/misc/uid_sys_stats.c @@ -51,12 +51,9 @@ struct io_stats { #define UID_STATE_FOREGROUND 0 #define UID_STATE_BACKGROUND 1 -#define UID_STATE_BUCKET_SIZE 2 - -#define UID_STATE_TOTAL_CURR 2 -#define UID_STATE_TOTAL_LAST 3 -#define UID_STATE_DEAD_TASKS 4 -#define UID_STATE_SIZE 5 +#define UID_STATE_TOTAL_LAST 2 +#define UID_STATE_DEAD_TASKS 3 +#define UID_STATE_SIZE 4 #define MAX_TASK_COMM_LEN 256 @@ -71,8 +68,6 @@ struct uid_entry { uid_t uid; u64 utime; u64 stime; - u64 active_utime; - u64 active_stime; int state; struct io_stats io[UID_STATE_SIZE]; struct hlist_node hash; @@ -173,58 +168,47 @@ static struct uid_entry *find_or_register_uid(uid_t uid) return uid_entry; } +static void calc_uid_cputime(struct uid_entry *uid_entry, + u64 *total_utime, u64 *total_stime) +{ + struct user_namespace *user_ns = current_user_ns(); + struct task_struct *p, *t; + u64 utime, stime; + uid_t uid; + + rcu_read_lock(); + for_each_process(p) { + uid = from_kuid_munged(user_ns, task_uid(p)); + + if (uid != uid_entry->uid) + continue; + + for_each_thread(p, t) { + /* avoid double accounting of dying threads */ + if (!(t->flags & PF_EXITING)) { + task_cputime_adjusted(t, &utime, &stime); + *total_utime += utime; + *total_stime += stime; + } + } + } + rcu_read_unlock(); +} + static int uid_cputime_show(struct seq_file *m, void *v) { struct uid_entry *uid_entry = NULL; - struct task_struct *task, *temp; - struct user_namespace *user_ns = current_user_ns(); - u64 utime; - u64 stime; u32 bkt; - uid_t uid; for (bkt = 0, uid_entry = NULL; uid_entry == NULL && bkt < HASH_SIZE(hash_table); bkt++) { + lock_uid_by_bkt(bkt); hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) { - uid_entry->active_stime = 0; - uid_entry->active_utime = 0; - } - unlock_uid_by_bkt(bkt); - } + u64 total_utime = uid_entry->utime; + u64 total_stime = uid_entry->stime; - rcu_read_lock(); - do_each_thread(temp, task) { - uid = from_kuid_munged(user_ns, task_uid(task)); - lock_uid(uid); - - if (!uid_entry || uid_entry->uid != uid) - uid_entry = find_or_register_uid(uid); - if (!uid_entry) { - rcu_read_unlock(); - unlock_uid(uid); - pr_err("%s: failed to find the uid_entry for uid %d\n", - __func__, uid); - return -ENOMEM; - } - /* avoid double accounting of dying threads */ - if (!(task->flags & PF_EXITING)) { - task_cputime_adjusted(task, &utime, &stime); - uid_entry->active_utime += utime; - uid_entry->active_stime += stime; - } - unlock_uid(uid); - } while_each_thread(temp, task); - rcu_read_unlock(); - - for (bkt = 0, uid_entry = NULL; uid_entry == NULL && - bkt < HASH_SIZE(hash_table); bkt++) { - lock_uid_by_bkt(bkt); - hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) { - u64 total_utime = uid_entry->utime + - uid_entry->active_utime; - u64 total_stime = uid_entry->stime + - uid_entry->active_stime; + calc_uid_cputime(uid_entry, &total_utime, &total_stime); seq_printf(m, "%d: %llu %llu\n", uid_entry->uid, ktime_to_us(total_utime), ktime_to_us(total_stime)); } @@ -323,86 +307,52 @@ static void add_uid_io_stats(struct uid_entry *uid_entry, __add_uid_io_stats(uid_entry, &task->ioac, slot); } -static void update_io_stats_all(void) +static void update_io_stats_uid(struct uid_entry *uid_entry) { - struct uid_entry *uid_entry = NULL; - struct task_struct *task, *temp; struct user_namespace *user_ns = current_user_ns(); + struct task_struct *p, *t; + struct io_stats io; + + memset(&io, 0, sizeof(struct io_stats)); + + rcu_read_lock(); + for_each_process(p) { + uid_t uid = from_kuid_munged(user_ns, task_uid(p)); + + if (uid != uid_entry->uid) + continue; + + for_each_thread(p, t) { + /* avoid double accounting of dying threads */ + if (!(t->flags & PF_EXITING)) { + io.read_bytes += t->ioac.read_bytes; + io.write_bytes += compute_write_bytes(&t->ioac); + io.rchar += t->ioac.rchar; + io.wchar += t->ioac.wchar; + io.fsync += t->ioac.syscfs; + } + } + } + rcu_read_unlock(); + + compute_io_bucket_stats(&uid_entry->io[uid_entry->state], &io, + &uid_entry->io[UID_STATE_TOTAL_LAST], + &uid_entry->io[UID_STATE_DEAD_TASKS]); +} + +static int uid_io_show(struct seq_file *m, void *v) +{ + + struct uid_entry *uid_entry = NULL; u32 bkt; - uid_t uid; for (bkt = 0, uid_entry = NULL; uid_entry == NULL && bkt < HASH_SIZE(hash_table); bkt++) { lock_uid_by_bkt(bkt); hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) { - memset(&uid_entry->io[UID_STATE_TOTAL_CURR], 0, - sizeof(struct io_stats)); - } - unlock_uid_by_bkt(bkt); - } - rcu_read_lock(); - do_each_thread(temp, task) { - uid = from_kuid_munged(user_ns, task_uid(task)); - lock_uid(uid); - if (!uid_entry || uid_entry->uid != uid) - uid_entry = find_or_register_uid(uid); - if (!uid_entry) { - unlock_uid(uid); - continue; - } - add_uid_io_stats(uid_entry, task, UID_STATE_TOTAL_CURR); - unlock_uid(uid); - } while_each_thread(temp, task); - rcu_read_unlock(); + update_io_stats_uid(uid_entry); - for (bkt = 0, uid_entry = NULL; uid_entry == NULL && bkt < HASH_SIZE(hash_table); - bkt++) { - lock_uid_by_bkt(bkt); - hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) { - compute_io_bucket_stats(&uid_entry->io[uid_entry->state], - &uid_entry->io[UID_STATE_TOTAL_CURR], - &uid_entry->io[UID_STATE_TOTAL_LAST], - &uid_entry->io[UID_STATE_DEAD_TASKS]); - } - unlock_uid_by_bkt(bkt); - } -} - -static void update_io_stats_uid(struct uid_entry *uid_entry) -{ - struct task_struct *task, *temp; - struct user_namespace *user_ns = current_user_ns(); - - memset(&uid_entry->io[UID_STATE_TOTAL_CURR], 0, - sizeof(struct io_stats)); - - rcu_read_lock(); - do_each_thread(temp, task) { - if (from_kuid_munged(user_ns, task_uid(task)) != uid_entry->uid) - continue; - add_uid_io_stats(uid_entry, task, UID_STATE_TOTAL_CURR); - } while_each_thread(temp, task); - rcu_read_unlock(); - - compute_io_bucket_stats(&uid_entry->io[uid_entry->state], - &uid_entry->io[UID_STATE_TOTAL_CURR], - &uid_entry->io[UID_STATE_TOTAL_LAST], - &uid_entry->io[UID_STATE_DEAD_TASKS]); -} - - -static int uid_io_show(struct seq_file *m, void *v) -{ - struct uid_entry *uid_entry; - u32 bkt; - - update_io_stats_all(); - for (bkt = 0, uid_entry = NULL; uid_entry == NULL && bkt < HASH_SIZE(hash_table); - bkt++) { - - lock_uid_by_bkt(bkt); - hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) { seq_printf(m, "%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", uid_entry->uid, uid_entry->io[UID_STATE_FOREGROUND].rchar, @@ -446,7 +396,6 @@ static ssize_t uid_procstat_write(struct file *file, uid_t uid; int argc, state; char input[128]; - struct uid_entry uid_entry_tmp; if (count >= sizeof(input)) return -EINVAL; @@ -475,29 +424,8 @@ static ssize_t uid_procstat_write(struct file *file, return count; } - /* - * Update_io_stats_uid_locked would take a long lock-time of uid_lock - * due to call do_each_thread to compute uid_entry->io, which would - * cause to lock competition sometime. - * - * Using uid_entry_tmp to get the result of Update_io_stats_uid, - * so that we can unlock_uid during update_io_stats_uid, in order - * to avoid the unnecessary lock-time of uid_lock. - */ - uid_entry_tmp = *uid_entry; - - unlock_uid(uid); - update_io_stats_uid(&uid_entry_tmp); - - lock_uid(uid); - hlist_for_each_entry(uid_entry, &hash_table[hash_min(uid, HASH_BITS(hash_table))], hash) { - if (uid_entry->uid == uid_entry_tmp.uid) { - memcpy(uid_entry->io, uid_entry_tmp.io, - sizeof(struct io_stats) * UID_STATE_SIZE); - uid_entry->state = state; - break; - } - } + update_io_stats_uid(uid_entry); + uid_entry->state = state; unlock_uid(uid); return count; From ac4797cea53888fe6f0df21c746bb676e360545a Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Thu, 7 Mar 2024 22:09:02 +0000 Subject: [PATCH 21/98] UPSTREAM: usb: typec: altmodes/displayport: create sysfs nodes as driver's default device attribute group The DisplayPort driver's sysfs nodes may be present to the userspace before typec_altmode_set_drvdata() completes in dp_altmode_probe. This means that a sysfs read can trigger a NULL pointer error by deferencing dp->hpd in hpd_show or dp->lock in pin_assignment_show, as dev_get_drvdata() returns NULL in those cases. Remove manual sysfs node creation in favor of adding attribute group as default for devices bound to the driver. The ATTRIBUTE_GROUPS() macro is not used here otherwise the path to the sysfs nodes is no longer compliant with the ABI. Fixes: 0e3bb7d6894d ("usb: typec: Add driver for DisplayPort alternate mode") Cc: stable@vger.kernel.org Signed-off-by: RD Babiera Link: https://lore.kernel.org/r/20240229001101.3889432-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman Bug: 313517804 (cherry picked from commit 165376f6b23e9a779850e750fb2eb06622e5a531) Change-Id: I13aaa956ff2b37e29eb0d90bbac0b7e1ac969b80 Signed-off-by: RD Babiera --- drivers/usb/typec/altmodes/displayport.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index 8ebcedb682af..213651da2aab 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -546,23 +546,27 @@ static ssize_t hpd_show(struct device *dev, struct device_attribute *attr, char } static DEVICE_ATTR_RO(hpd); -static struct attribute *dp_altmode_attrs[] = { +static struct attribute *displayport_attrs[] = { &dev_attr_configuration.attr, &dev_attr_pin_assignment.attr, &dev_attr_hpd.attr, NULL }; -static const struct attribute_group dp_altmode_group = { +static const struct attribute_group displayport_group = { .name = "displayport", - .attrs = dp_altmode_attrs, + .attrs = displayport_attrs, +}; + +static const struct attribute_group *displayport_groups[] = { + &displayport_group, + NULL, }; int dp_altmode_probe(struct typec_altmode *alt) { const struct typec_altmode *port = typec_altmode_get_partner(alt); struct dp_altmode *dp; - int ret; /* FIXME: Port can only be DFP_U. */ @@ -573,10 +577,6 @@ int dp_altmode_probe(struct typec_altmode *alt) DP_CAP_PIN_ASSIGN_DFP_D(alt->vdo))) return -ENODEV; - ret = sysfs_create_group(&alt->dev.kobj, &dp_altmode_group); - if (ret) - return ret; - dp = devm_kzalloc(&alt->dev, sizeof(*dp), GFP_KERNEL); if (!dp) return -ENOMEM; @@ -602,7 +602,6 @@ void dp_altmode_remove(struct typec_altmode *alt) { struct dp_altmode *dp = typec_altmode_get_drvdata(alt); - sysfs_remove_group(&alt->dev.kobj, &dp_altmode_group); cancel_work_sync(&dp->work); } EXPORT_SYMBOL_GPL(dp_altmode_remove); @@ -620,6 +619,7 @@ static struct typec_altmode_driver dp_altmode_driver = { .driver = { .name = "typec_displayport", .owner = THIS_MODULE, + .dev_groups = displayport_groups, }, }; module_typec_altmode_driver(dp_altmode_driver); From 37b83a89de429af428fc1c8cfb34c617013be894 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Sep 2023 11:11:00 -0700 Subject: [PATCH 22/98] BACKPORT: f2fs: split initial and dynamic conditions for extent_cache Let's allocate the extent_cache tree without dynamic conditions to avoid a missing condition causing a panic as below. # create a file w/ a compressed flag # disable the compression # panic while updating extent_cache F2FS-fs (dm-64): Swapfile: last extent is not aligned to section F2FS-fs (dm-64): Swapfile (3) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(2097152 * N) Adding 124996k swap on ./swap-file. Priority:0 extents:2 across:17179494468k ================================================================== BUG: KASAN: null-ptr-deref in instrument_atomic_read_write out/common/include/linux/instrumented.h:101 [inline] BUG: KASAN: null-ptr-deref in atomic_try_cmpxchg_acquire out/common/include/asm-generic/atomic-instrumented.h:705 [inline] BUG: KASAN: null-ptr-deref in queued_write_lock out/common/include/asm-generic/qrwlock.h:92 [inline] BUG: KASAN: null-ptr-deref in __raw_write_lock out/common/include/linux/rwlock_api_smp.h:211 [inline] BUG: KASAN: null-ptr-deref in _raw_write_lock+0x5a/0x110 out/common/kernel/locking/spinlock.c:295 Write of size 4 at addr 0000000000000030 by task syz-executor154/3327 CPU: 0 PID: 3327 Comm: syz-executor154 Tainted: G O 5.10.185 #1 Hardware name: emulation qemu-x86/qemu-x86, BIOS 2023.01-21885-gb3cc1cd24d 01/01/2023 Call Trace: __dump_stack out/common/lib/dump_stack.c:77 [inline] dump_stack_lvl+0x17e/0x1c4 out/common/lib/dump_stack.c:118 __kasan_report+0x16c/0x260 out/common/mm/kasan/report.c:415 kasan_report+0x51/0x70 out/common/mm/kasan/report.c:428 kasan_check_range+0x2f3/0x340 out/common/mm/kasan/generic.c:186 __kasan_check_write+0x14/0x20 out/common/mm/kasan/shadow.c:37 instrument_atomic_read_write out/common/include/linux/instrumented.h:101 [inline] atomic_try_cmpxchg_acquire out/common/include/asm-generic/atomic-instrumented.h:705 [inline] queued_write_lock out/common/include/asm-generic/qrwlock.h:92 [inline] __raw_write_lock out/common/include/linux/rwlock_api_smp.h:211 [inline] _raw_write_lock+0x5a/0x110 out/common/kernel/locking/spinlock.c:295 __drop_extent_tree+0xdf/0x2f0 out/common/fs/f2fs/extent_cache.c:1155 f2fs_drop_extent_tree+0x17/0x30 out/common/fs/f2fs/extent_cache.c:1172 f2fs_insert_range out/common/fs/f2fs/file.c:1600 [inline] f2fs_fallocate+0x19fd/0x1f40 out/common/fs/f2fs/file.c:1764 vfs_fallocate+0x514/0x9b0 out/common/fs/open.c:310 ksys_fallocate out/common/fs/open.c:333 [inline] __do_sys_fallocate out/common/fs/open.c:341 [inline] __se_sys_fallocate out/common/fs/open.c:339 [inline] __x64_sys_fallocate+0xb8/0x100 out/common/fs/open.c:339 do_syscall_64+0x35/0x50 out/common/arch/x86/entry/common.c:46 Bug: 323236756 Cc: stable@vger.kernel.org Fixes: 72840cccc0a1 ("f2fs: allocate the extent_cache by default") Reported-and-tested-by: syzbot+d342e330a37b48c094b7@syzkaller.appspotmail.com Signed-off-by: Jaegeuk Kim (cherry picked from commit f803982190f0265fd36cf84670aa6daefc2b0768) Change-Id: I584b9e37a4790baf5f6613778365b90f128bb765 --- fs/f2fs/extent_cache.c | 53 +++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 0e2d49140c07..ad8dfac73bd4 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -74,40 +74,14 @@ static void __set_extent_info(struct extent_info *ei, } } -static bool __may_read_extent_tree(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - - if (!test_opt(sbi, READ_EXTENT_CACHE)) - return false; - if (is_inode_flag_set(inode, FI_NO_EXTENT)) - return false; - if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && - !f2fs_sb_has_readonly(sbi)) - return false; - return S_ISREG(inode->i_mode); -} - -static bool __may_age_extent_tree(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - - if (!test_opt(sbi, AGE_EXTENT_CACHE)) - return false; - if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) - return false; - if (file_is_cold(inode)) - return false; - - return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); -} - static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) { if (type == EX_READ) - return __may_read_extent_tree(inode); - else if (type == EX_BLOCK_AGE) - return __may_age_extent_tree(inode); + return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) && + S_ISREG(inode->i_mode); + if (type == EX_BLOCK_AGE) + return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) && + (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)); return false; } @@ -120,7 +94,22 @@ static bool __may_extent_tree(struct inode *inode, enum extent_type type) if (list_empty(&F2FS_I_SB(inode)->s_list)) return false; - return __init_may_extent_tree(inode, type); + if (!__init_may_extent_tree(inode, type)) + return false; + + if (type == EX_READ) { + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + return false; + if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && + !f2fs_sb_has_readonly(F2FS_I_SB(inode))) + return false; + } else if (type == EX_BLOCK_AGE) { + if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) + return false; + if (file_is_cold(inode)) + return false; + } + return true; } static void __try_update_largest_extent(struct extent_tree *et, From 1225d7ed6c88881aaf342842e683fab14a314af9 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Tue, 5 Mar 2024 16:20:32 -0800 Subject: [PATCH 23/98] ANDROID: fuse-bpf: Fix readdir for getdents If you call getdents with a buffer size less than a page, entries can be skipped. This correctly sets the point to continue from. Bug: 325550828 Test: getdents with low buffer size Change-Id: I324e7e815d31742bd4e2d70c5d07c2b09a67a7c2 Signed-off-by: Daniel Rosenberg --- fs/fuse/backing.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/fuse/backing.c b/fs/fuse/backing.c index 8b747a7bde58..8485a0efe3db 100644 --- a/fs/fuse/backing.c +++ b/fs/fuse/backing.c @@ -2331,8 +2331,11 @@ static int filldir(struct dir_context *ctx, const char *name, int namelen, return 0; } -static int parse_dirfile(char *buf, size_t nbytes, struct dir_context *ctx) +static int parse_dirfile(char *buf, size_t nbytes, struct dir_context *ctx, + loff_t next_offset) { + char *buffstart = buf; + while (nbytes >= FUSE_NAME_OFFSET) { struct fuse_dirent *dirent = (struct fuse_dirent *) buf; size_t reclen = FUSE_DIRENT_SIZE(dirent); @@ -2346,12 +2349,18 @@ static int parse_dirfile(char *buf, size_t nbytes, struct dir_context *ctx) ctx->pos = dirent->off; if (!dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino, - dirent->type)) - break; + dirent->type)) { + // If we can't make any progress, user buffer is too small + if (buf == buffstart) + return -EINVAL; + else + return 0; + } buf += reclen; nbytes -= reclen; } + ctx->pos = next_offset; return 0; } @@ -2398,13 +2407,12 @@ void *fuse_readdir_finalize(struct fuse_bpf_args *fa, struct file *backing_dir = ff->backing_file; int err = 0; - err = parse_dirfile(fa->out_args[1].value, fa->out_args[1].size, ctx); + err = parse_dirfile(fa->out_args[1].value, fa->out_args[1].size, ctx, fro->offset); *force_again = !!fro->again; if (*force_again && !*allow_force) err = -EINVAL; - ctx->pos = fro->offset; - backing_dir->f_pos = fro->offset; + backing_dir->f_pos = ctx->pos; free_page((unsigned long) fa->out_args[1].value); return ERR_PTR(err); From b1f8c250264bae2108cb482b2fc63c8f045db106 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 8 Mar 2024 23:24:00 +0000 Subject: [PATCH 24/98] ANDROID: fips140 - add option for debugging the integrity check There now have been two times where I've had to debug the fips140 integrity check failing due to a new type of runtime code patching. Debugging such issues requires dumping the text and rodata actually used for the integrity check and comparing them with the originals. Add a kconfig option to make this easier. Similar to CRYPTO_FIPS140_MOD_EVAL_TESTING, the production build won't use this. Bug: 188620248 Change-Id: I392de466ff31f999d65997dbc610e23e9eeca49d Signed-off-by: Eric Biggers --- crypto/Kconfig | 27 +++++++++++++++ crypto/fips140-module.c | 75 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 99 insertions(+), 3 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index f41ddc91b7b2..b3728ad6ff79 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -58,6 +58,33 @@ config CRYPTO_FIPS140_MOD_EVAL_TESTING errors and support for a userspace interface to some of the module's services. This option should not be enabled in production builds. +config CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK + bool "Debug the integrity check in FIPS 140 module" + depends on CRYPTO_FIPS140_MOD + help + This option makes the FIPS 140 module provide debugfs files containing + the text and rodata that were used for the integrity check, i.e. the + runtime text and rodata with relocations and code patches unapplied. + This option also makes the module load even if the integrity check + fails so that these files can be used to debug the failure. (A + possible failure mode is that the kernel has added a new type of code + patching and the module needs to be updated to disable or unapply it.) + + This option must not be enabled in production builds. + + Example commands for debugging an integrity check failure: + + adb root + adb shell mount debugfs -t debugfs /sys/kernel/debug + adb shell cp /sys/kernel/debug/fips140/{text,rodata} /data/local/tmp/ + adb pull /data/local/tmp/text text.checked + adb pull /data/local/tmp/rodata rodata.checked + llvm-objcopy -O binary --only-section=.text fips140.ko text.orig + llvm-objcopy -O binary --only-section=.rodata fips140.ko rodata.orig + for f in {text,rodata}.{orig,checked}; do xxd -g1 $f > $f.xxd; done + vimdiff text.{orig,checked}.xxd + vimdiff rodata.{orig,checked}.xxd + config CRYPTO_ALGAPI tristate select CRYPTO_ALGAPI2 diff --git a/crypto/fips140-module.c b/crypto/fips140-module.c index 6412ad6c1234..ed0442b49a43 100644 --- a/crypto/fips140-module.c +++ b/crypto/fips140-module.c @@ -23,6 +23,7 @@ #undef __DISABLE_EXPORTS #include +#include #include #include #include @@ -357,6 +358,67 @@ static void __init unapply_rodata_relocations(void *section, int section_size, } } +#ifdef CONFIG_CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK +static struct { + const void *text; + int textsize; + const void *rodata; + int rodatasize; +} saved_integrity_check_info; + +static ssize_t fips140_text_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return simple_read_from_buffer(to, count, ppos, + saved_integrity_check_info.text, + saved_integrity_check_info.textsize); +} + +static ssize_t fips140_rodata_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return simple_read_from_buffer(to, count, ppos, + saved_integrity_check_info.rodata, + saved_integrity_check_info.rodatasize); +} + +static const struct file_operations fips140_text_fops = { + .read = fips140_text_read, +}; + +static const struct file_operations fips140_rodata_fops = { + .read = fips140_rodata_read, +}; + +static void fips140_init_integrity_debug_files(const void *text, int textsize, + const void *rodata, + int rodatasize) +{ + struct dentry *dir; + + dir = debugfs_create_dir("fips140", NULL); + + saved_integrity_check_info.text = kmemdup(text, textsize, GFP_KERNEL); + saved_integrity_check_info.textsize = textsize; + if (saved_integrity_check_info.text) + debugfs_create_file("text", 0400, dir, NULL, + &fips140_text_fops); + + saved_integrity_check_info.rodata = kmemdup(rodata, rodatasize, + GFP_KERNEL); + saved_integrity_check_info.rodatasize = rodatasize; + if (saved_integrity_check_info.rodata) + debugfs_create_file("rodata", 0400, dir, NULL, + &fips140_rodata_fops); +} +#else /* CONFIG_CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK */ +static void fips140_init_integrity_debug_files(const void *text, int textsize, + const void *rodata, + int rodatasize) +{ +} +#endif /* !CONFIG_CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK */ + extern struct { u32 offset; u32 count; @@ -398,6 +460,9 @@ static bool __init check_fips140_module_hmac(void) offset_to_ptr(&fips140_rela_rodata.offset), fips140_rela_rodata.count); + fips140_init_integrity_debug_files(textcopy, textsize, + rodatacopy, rodatasize); + fips140_inject_integrity_failure(textcopy); tfm = crypto_alloc_shash("hmac(sha256)", 0, 0); @@ -538,10 +603,14 @@ fips140_init(void) */ if (!check_fips140_module_hmac()) { - pr_crit("integrity check failed -- giving up!\n"); - goto panic; + if (!IS_ENABLED(CONFIG_CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK)) { + pr_crit("integrity check failed -- giving up!\n"); + goto panic; + } + pr_crit("ignoring integrity check failure due to debug mode\n"); + } else { + pr_info("integrity check passed\n"); } - pr_info("integrity check passed\n"); complete_all(&fips140_tests_done); From 7b301c70795e79f1a1cf632fad51d37a0c820232 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 8 Mar 2024 23:24:00 +0000 Subject: [PATCH 25/98] ANDROID: fips140 - fix integrity check by unapplying dynamic SCS Since the kernel now has dynamic Shadow Call Stack (SCS) enabled, on CPUs that don't support Pointer Authentication Codes (PAC) the kernel runtime-patches paciasp and autiasp instructions into instructions that push and pop from the shadow call stack. This includes instructions in loaded modules. This broke the fips140 integrity check which needs to know how to undo all text changes made by the module loader in order to re-create the original text. Fix this by updating fips140.ko to undo the dynamic SCS patching. Bug: 188620248 Change-Id: I992bcd6c34b3340c6489b40a125715e1304cb445 Signed-off-by: Eric Biggers --- crypto/fips140-module.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/crypto/fips140-module.c b/crypto/fips140-module.c index ed0442b49a43..cc5c457ddd29 100644 --- a/crypto/fips140-module.c +++ b/crypto/fips140-module.c @@ -358,6 +358,33 @@ static void __init unapply_rodata_relocations(void *section, int section_size, } } +enum { + PACIASP = 0xd503233f, + AUTIASP = 0xd50323bf, + SCS_PUSH = 0xf800865e, + SCS_POP = 0xf85f8e5e, +}; + +/* + * To make the integrity check work with dynamic Shadow Call Stack (SCS), + * replace all instructions that push or pop from the SCS with the Pointer + * Authentication Code (PAC) instructions that were present originally. + */ +static void __init unapply_scs_patch(void *section, int section_size) +{ +#if defined(CONFIG_ARM64) && defined(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) + u32 *insns = section; + int i; + + for (i = 0; i < section_size / sizeof(insns[0]); i++) { + if (insns[i] == SCS_PUSH) + insns[i] = PACIASP; + else if (insns[i] == SCS_POP) + insns[i] = AUTIASP; + } +#endif +} + #ifdef CONFIG_CRYPTO_FIPS140_MOD_DEBUG_INTEGRITY_CHECK static struct { const void *text; @@ -460,6 +487,8 @@ static bool __init check_fips140_module_hmac(void) offset_to_ptr(&fips140_rela_rodata.offset), fips140_rela_rodata.count); + unapply_scs_patch(textcopy, textsize); + fips140_init_integrity_debug_files(textcopy, textsize, rodatacopy, rodatasize); From 483395b4453aa7e54e88f22bdb43e133bbc9dc1e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 13 Mar 2024 12:23:56 -0700 Subject: [PATCH 26/98] Revert "ANDROID: Add CONFIG_BLK_DEV_NULL_BLK=m to gki_defconfig" This reverts commit d217ccf7c8e03d9147de4a36ecc337b6a374a704. Debug drivers should not be included in the GKI kernel configuration. Hence this revert. Bug: 326456248 Change-Id: I18db9d07ad49b22f09b6b3414d39e6ed0a728d73 Signed-off-by: Bart Van Assche --- arch/arm64/configs/gki_defconfig | 1 - arch/x86/configs/gki_defconfig | 1 - modules.bzl | 1 - 3 files changed, 3 deletions(-) diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index ccd5908ac73a..733f1f2da58a 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -316,7 +316,6 @@ CONFIG_ARM_SCPI_PROTOCOL=y # CONFIG_ARM_SCPI_POWER_DOMAIN is not set # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set CONFIG_GNSS=y -CONFIG_BLK_DEV_NULL_BLK=m CONFIG_ZRAM=m CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=16 diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index ca45275c81e1..2e3d924152bc 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -294,7 +294,6 @@ CONFIG_FW_LOADER_USER_HELPER=y # CONFIG_FW_CACHE is not set CONFIG_GNSS=y CONFIG_OF=y -CONFIG_BLK_DEV_NULL_BLK=m CONFIG_ZRAM=m CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=16 diff --git a/modules.bzl b/modules.bzl index 8f8e07abcec9..d8c38eeb33a9 100644 --- a/modules.bzl +++ b/modules.bzl @@ -8,7 +8,6 @@ This module contains a full list of kernel modules _COMMON_GKI_MODULES_LIST = [ # keep sorted - "drivers/block/null_blk/null_blk.ko", "drivers/block/zram/zram.ko", "drivers/bluetooth/btbcm.ko", "drivers/bluetooth/btqca.ko", From 3fd32dc1716d904707b016615d32344d631256ac Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 14 Mar 2024 15:53:20 -0700 Subject: [PATCH 27/98] ANDROID: fix isolate_migratepages_range return value When [1] was cherry-picked from 5.10 into 5.15 kernel, it modified the variable used to store isolate_migratepages_block() return value like it was done in 5.10. However in 5.15 the variable used to store the return value is different. As a result, failure to isolate a block is not reported back to the caller. Fix by restoring the original code and using the right variable to store the return value. [1] ANDROID: mm: do not allow file-backed pages from CMA Bug: 326556976 Change-Id: I06900eb43de356584ff63acfe6e994f11610b494 Signed-off-by: Suren Baghdasaryan --- mm/compaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/compaction.c b/mm/compaction.c index ff09d8c4a836..cccb46701c23 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1269,7 +1269,7 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, block_end_pfn, cc->zone)) continue; - pfn = isolate_migratepages_block(&cc_ext, pfn, block_end_pfn, + ret = isolate_migratepages_block(&cc_ext, pfn, block_end_pfn, ISOLATE_UNEVICTABLE); if (ret) From 3673533a09b6998bd0e3eafd14c1d6457bc23529 Mon Sep 17 00:00:00 2001 From: Lokesh Gidra Date: Fri, 8 Mar 2024 10:04:39 -0800 Subject: [PATCH 28/98] ANDROID: userfaultfd: add MMAP_TRYLOCK mode for COPY/ZEROPAGE In case mmap_lock is contended, it is possible that userspace can spend time performing other tasks rather than waiting in uninterruptible-sleep state for the lock to become available. Even if no other task is available, it is better to yield or sleep rather than adding contention to already contended lock. We introduce MMAP_TRYLOCK mode so that when possible, userspace can request to use mmap_read_trylock(), returning -EAGAIN if and when it fails. Bug: 320478828 Change-Id: I2d196fd317e054af03dbd35ac1b0c7634cb370dc Signed-off-by: Lokesh Gidra --- fs/userfaultfd.c | 9 ++++++--- include/linux/userfaultfd_k.h | 8 +++++--- include/uapi/linux/userfaultfd.h | 2 ++ mm/userfaultfd.c | 13 +++++++++---- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index b4c24753ec19..f8e138d903a2 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1764,7 +1764,9 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, ret = -EINVAL; if (uffdio_copy.src + uffdio_copy.len <= uffdio_copy.src) goto out; - if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP)) + if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE| + UFFDIO_COPY_MODE_WP| + UFFDIO_COPY_MODE_MMAP_TRYLOCK)) goto out; if (mmget_not_zero(ctx->mm)) { ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src, @@ -1815,13 +1817,14 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, if (ret) goto out; ret = -EINVAL; - if (uffdio_zeropage.mode & ~UFFDIO_ZEROPAGE_MODE_DONTWAKE) + if (uffdio_zeropage.mode & ~(UFFDIO_ZEROPAGE_MODE_DONTWAKE| + UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK)) goto out; if (mmget_not_zero(ctx->mm)) { ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start, uffdio_zeropage.range.len, - &ctx->mmap_changing); + &ctx->mmap_changing, uffdio_zeropage.mode); mmput(ctx->mm); } else { return -ESRCH; diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 8ea2827a4eba..14fdc2dd1532 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -33,6 +33,9 @@ #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS) +static_assert(UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK == UFFDIO_COPY_MODE_MMAP_TRYLOCK); +#define UFFDIO_MODE_MMAP_TRYLOCK UFFDIO_COPY_MODE_MMAP_TRYLOCK + extern int sysctl_unprivileged_userfaultfd; extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); @@ -65,9 +68,8 @@ extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, atomic_t *mmap_changing, __u64 mode); extern ssize_t mfill_zeropage(struct mm_struct *dst_mm, - unsigned long dst_start, - unsigned long len, - atomic_t *mmap_changing); + unsigned long dst_start, unsigned long len, + atomic_t *mmap_changing, __u64 mode); extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long len, atomic_t *mmap_changing); extern int mwriteprotect_range(struct mm_struct *dst_mm, diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 05b31d60acf6..a13fa043c092 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -237,6 +237,7 @@ struct uffdio_copy { * according to the uffdio_register.ioctls. */ #define UFFDIO_COPY_MODE_WP ((__u64)1<<1) +#define UFFDIO_COPY_MODE_MMAP_TRYLOCK ((__u64)1<<63) __u64 mode; /* @@ -249,6 +250,7 @@ struct uffdio_copy { struct uffdio_zeropage { struct uffdio_range range; #define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0) +#define UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK ((__u64)1<<63) __u64 mode; /* diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index d4175821dd29..522d88bd73ef 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -519,14 +519,19 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, copied = 0; page = NULL; retry: - mmap_read_lock(dst_mm); + err = -EAGAIN; + if (mode & UFFDIO_MODE_MMAP_TRYLOCK) { + if (!mmap_read_trylock(dst_mm)) + goto out; + } else { + mmap_read_lock(dst_mm); + } /* * If memory mappings are changing because of non-cooperative * operation (e.g. mremap) running in parallel, bail out and * request the user to retry later */ - err = -EAGAIN; if (mmap_changing && atomic_read(mmap_changing)) goto out_unlock; @@ -668,10 +673,10 @@ ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, } ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start, - unsigned long len, atomic_t *mmap_changing) + unsigned long len, atomic_t *mmap_changing, __u64 mode) { return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE, - mmap_changing, 0); + mmap_changing, mode); } ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start, From 96305e30e970c4676b89cfebd671934485107716 Mon Sep 17 00:00:00 2001 From: Lokesh Gidra Date: Fri, 8 Mar 2024 10:59:56 -0800 Subject: [PATCH 29/98] ANDROID: userfaultfd: abort uffdio ops if mmap_lock is contended Check if the mmap_lock is contended when looping over the pages that are requested to be filled. When it is observed, we rely on the already existing mechanism to return bytes copied/filled and -EAGAIN as error. This helps by avoiding contention of mmap_lock for long running userfaultfd operations. The userspace can perform other tasks before retrying the operation for the remaining pages. Bug: 320478828 Change-Id: I6d485fd03c96a826956ee3962e58058be3cf81c1 Signed-off-by: Lokesh Gidra --- mm/userfaultfd.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 522d88bd73ef..70e54462bbf2 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -624,6 +624,15 @@ retry: if (unlikely(err == -ENOENT)) { void *page_kaddr; + /* + * Return early due to mmap_lock contention only after + * some pages are copied to ensure that jank sensitive + * threads don't keep retrying for progress-critical + * pages. + */ + if (copied && mmap_lock_is_contended(dst_mm)) + break; + mmap_read_unlock(dst_mm); BUG_ON(!page); @@ -648,6 +657,9 @@ retry: if (fatal_signal_pending(current)) err = -EINTR; + + if (mmap_lock_is_contended(dst_mm)) + err = -EAGAIN; } if (err) break; From 537e133918a2cd44b0d85190a915c04f2f5c54fd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 13 Dec 2022 15:28:49 +0100 Subject: [PATCH 30/98] UPSTREAM: arm64: Apply dynamic shadow call stack patching in two passes Code patching for the dynamically enabled shadow call stack comes down to finding PACIASP and AUTIASP instructions -which behave as NOPs on cores that do not implement pointer authentication- and converting them into shadow call stack pushes and pops, respectively. Due to past bad experiences with the highly complex and overengineered DWARF standard that describes the unwind metadata that we are using to locate these instructions, let's make this patching logic a little bit more robust so that any issues with the unwind metadata detected at boot time can de dealt with gracefully. The DWARF annotations that are used for this are emitted at function granularity, and due to the fact that the instructions we are patching will simply behave as NOPs if left unpatched, we can abort on errors as long as we don't leave any functions in a half-patched state. So do a dry run of each FDE frame (covering a single function) before performing the actual patching, and give up if the DWARF metadata cannot be understood. Change-Id: Iea167b37a4d84e2b444189c7af939cf58d6dc9cf Signed-off-by: Ard Biesheuvel Acked-by: Will Deacon Reviewed-by: Sami Tolvanen Link: https://lore.kernel.org/r/20221213142849.1629026-1-ardb@kernel.org Signed-off-by: Catalin Marinas (cherry picked from commit 54c968bec344b101ba3596f2544f0f3b4c1eef2f) Signed-off-by: Mark-PK Tsai --- arch/arm64/kernel/patch-scs.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/patch-scs.c b/arch/arm64/kernel/patch-scs.c index 1b3da02d5b74..a1fe4b4ff591 100644 --- a/arch/arm64/kernel/patch-scs.c +++ b/arch/arm64/kernel/patch-scs.c @@ -130,7 +130,8 @@ struct eh_frame { static int noinstr scs_handle_fde_frame(const struct eh_frame *frame, bool fde_has_augmentation_data, - int code_alignment_factor) + int code_alignment_factor, + bool dry_run) { int size = frame->size - offsetof(struct eh_frame, opcodes) + 4; u64 loc = (u64)offset_to_ptr(&frame->initial_loc); @@ -184,7 +185,8 @@ static int noinstr scs_handle_fde_frame(const struct eh_frame *frame, break; case DW_CFA_negate_ra_state: - scs_patch_loc(loc - 4); + if (!dry_run) + scs_patch_loc(loc - 4); break; case 0x40 ... 0x7f: @@ -235,9 +237,12 @@ int noinstr scs_patch(const u8 eh_frame[], int size) } else { ret = scs_handle_fde_frame(frame, fde_has_augmentation_data, - code_alignment_factor); + code_alignment_factor, + true); if (ret) return ret; + scs_handle_fde_frame(frame, fde_has_augmentation_data, + code_alignment_factor, false); } p += sizeof(frame->size) + frame->size; From 865e6d9df1de5f0e55cf4a40bd8d2f922dbee8c3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Jun 2023 15:22:18 +0200 Subject: [PATCH 31/98] UPSTREAM: netfilter: nf_tables: disallow timeout for anonymous sets commit e26d3009efda338f19016df4175f354a9bd0a4ab upstream. Never used from userspace, disallow these parameters. Bug: 329205828 Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman (cherry picked from commit b7be6c737a179a76901c872f6b4c1d00552d9a1b) Signed-off-by: Lee Jones Change-Id: I3d8358a6dee3246e3ac56697dbb2be8fdc5f716f --- net/netfilter/nf_tables_api.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 715be2edc542..087cbccaa48a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4671,6 +4671,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout); if (err) return err; @@ -4679,6 +4682,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_SET_GC_INTERVAL] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); } From 5854f4c2aff0f7975b9fdaaf034b550dcd296626 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Mon, 18 Mar 2024 11:05:06 +0000 Subject: [PATCH 32/98] ANDROID: KVM: arm64: Fix missing trace event for nVHE dyn HVCs The hyp event host_hcall was missing when a custom HVC runs. Bug: 278749606 Bug: 244543039 Bug: 244373730 Signed-off-by: Vincent Donnefort (cherry picked from https://android-review.googlesource.com/q/commit:a1836ffbea9fcb70fa9d49af7382b9343285036f) Merged-In: I760cab4fbd36a13ad262842880d9ec484f23fd22 Change-Id: I760cab4fbd36a13ad262842880d9ec484f23fd22 --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 995c3a840010..87ca8cd735ea 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -1334,7 +1334,7 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) hcall_t hfn; if (handle_host_dynamic_hcall(host_ctxt) == HCALL_HANDLED) - return; + goto end; /* * If pKVM has been initialised then reject any calls to the @@ -1359,7 +1359,7 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS; hfn(host_ctxt); - +end: trace_host_hcall(id, 0); return; From ec86765bae82a842d58839fcdb6a70dfb310441f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 25 Mar 2024 15:32:08 +0000 Subject: [PATCH 33/98] ANDROID: KVM: arm64: Fix TLB invalidation when coalescing into a block Wnen coalescing a table into a block, the break-before-make sequence must invalidate the whole range of addresses translated by the entry in order to avoid the possibility of a TLB conflict. Fix the coalescing post-table walker so that the whole range of the old table is invalidated, rather than just the first address, since a refcount of 1 on the child page is not sufficient to ensure the absence of any valid mappings. Cc: Sebastian Ene Reported-by: Mostafa Saleh Fixes: 6b3810205304 ("ANDROID: KVM: arm64: Coalesce host stage2 entries on ownership reclaim") Bug: 331232642 Signed-off-by: Will Deacon Change-Id: I4c94f552e4385599ad88b1be50b69ffbafa64a9b --- arch/arm64/kvm/hyp/pgtable.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 05b3645d09e8..d3bd18c4dca4 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -885,7 +885,9 @@ static void stage2_coalesce_walk_table_post(u64 addr, u64 end, u32 level, * of the page table page. */ if (mm_ops->page_count(childp) == 1) { - stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); + kvm_clear_pte(ptep); + kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu); + mm_ops->put_page(ptep); mm_ops->put_page(childp); } } From d154026d33f2eeceae73e82c7e8a1c83dcc28b97 Mon Sep 17 00:00:00 2001 From: Vilas Bhat Date: Thu, 28 Mar 2024 19:59:00 -0700 Subject: [PATCH 34/98] ANDROID: GKI: Update the ABI symbol list Update the pixel_watch symbol list. 6 function symbol(s) added __module_get emergency_restart kernel_restart watchdog_init_timeout watchdog_register_device watchdog_unregister_device Bug: 329913683 Change-Id: I7bce78be9c642c7fea483aab25b1eb6ce15a232d Signed-off-by: Vilas Bhat --- android/abi_gki_aarch64_pixel_watch | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/android/abi_gki_aarch64_pixel_watch b/android/abi_gki_aarch64_pixel_watch index f60a3092e140..a2877e136e4c 100644 --- a/android/abi_gki_aarch64_pixel_watch +++ b/android/abi_gki_aarch64_pixel_watch @@ -733,6 +733,7 @@ dump_stack __dynamic_dev_dbg __dynamic_pr_debug + emergency_restart enable_irq enable_percpu_irq ether_setup @@ -1092,6 +1093,7 @@ kernel_getsockname kernel_kobj kernel_recvmsg + kernel_restart kernel_sendmsg kernfs_find_and_get_ns kernfs_notify @@ -1272,6 +1274,7 @@ mod_node_page_state mod_timer mod_timer_pending + __module_get module_layout module_put __msecs_to_jiffies @@ -2457,6 +2460,9 @@ wakeup_source_unregister __wake_up_sync __warn_printk + watchdog_init_timeout + watchdog_register_device + watchdog_unregister_device wireless_nlevent_flush wireless_send_event woken_wake_function From f600c62d2552dba12611ba21fda137c094072eff Mon Sep 17 00:00:00 2001 From: "qinglin.li" Date: Fri, 29 Mar 2024 10:07:28 +0800 Subject: [PATCH 35/98] ANDROID: GKI: Update symbol list for Amlogic 12 function symbol(s) added 'struct backing_dev_info* bdi_alloc(int)' 'void bdi_put(struct backing_dev_info*)' 'int bdi_register(struct backing_dev_info*, const char*, ...)' 'void crypto_unregister_ahashes(struct ahash_alg*, int)' 'void deactivate_locked_super(struct super_block*)' 'loff_t fixed_size_llseek(struct file*, loff_t, int, loff_t)' 'void generic_shutdown_super(struct super_block*)' 'struct gpio_desc* gpiod_get_index_optional(struct device*, const char*, unsigned int, enum gpiod_flags)' 'int lookup_bdev(const char*, dev_t*)' 'struct nvmem_device* nvmem_register(const struct nvmem_config*)' 'void nvmem_unregister(struct nvmem_device*)' 'struct super_block* sget_fc(struct fs_context*, int(*)(struct super_block*, struct fs_context*), int(*)(struct super_block*, struct fs_context*))' Bug: 331874739 Change-Id: Icaa620cd09b0cccb3a1075c1f0429355d90103f4 Signed-off-by: Qinglin Li --- android/abi_gki_aarch64.stg | 169 ++++++++++++++++++++++++++++++++ android/abi_gki_aarch64_amlogic | 68 ++++++++++--- 2 files changed, 224 insertions(+), 13 deletions(-) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 68e2b49e33af..ea57d83b30e8 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -263850,6 +263850,12 @@ function { return_type_id: 0x48b5725f parameter_id: 0x0bf6beaf } +function { + id: 0x12663997 + return_type_id: 0x48b5725f + parameter_id: 0x0d8bad22 + parameter_id: 0x6720d32f +} function { id: 0x1267759f return_type_id: 0x48b5725f @@ -268413,6 +268419,11 @@ function { parameter_id: 0x1259e377 parameter_id: 0xe276adef } +function { + id: 0x1aa56a0d + return_type_id: 0x48b5725f + parameter_id: 0x28f4ee78 +} function { id: 0x1aab662c return_type_id: 0x48b5725f @@ -272566,6 +272577,14 @@ function { parameter_id: 0x1d19a9d5 parameter_id: 0x94ed3026 } +function { + id: 0x2d6430a1 + return_type_id: 0x27a7c613 + parameter_id: 0x18ea6ae3 + parameter_id: 0x27a7c613 + parameter_id: 0x6720d32f + parameter_id: 0x27a7c613 +} function { id: 0x2d64ae3e return_type_id: 0x27a7c613 @@ -272619,6 +272638,11 @@ function { parameter_id: 0x3760766d parameter_id: 0x3c38843b } +function { + id: 0x2eb1a24e + return_type_id: 0x145f7b00 + parameter_id: 0x3ddc5dec +} function { id: 0x2f31eea6 return_type_id: 0xa8fff47c @@ -274279,6 +274303,11 @@ function { parameter_id: 0x1dce0fdd parameter_id: 0x6d7f5ff6 } +function { + id: 0x5120d3a0 + return_type_id: 0x28f4ee78 + parameter_id: 0x6720d32f +} function { id: 0x513ab761 return_type_id: 0x92233392 @@ -281173,6 +281202,12 @@ function { parameter_id: 0x6720d32f parameter_id: 0x295c7202 } +function { + id: 0x92c581e2 + return_type_id: 0x6720d32f + parameter_id: 0x3e10b518 + parameter_id: 0x3053e2de +} function { id: 0x92c58e2b return_type_id: 0x6720d32f @@ -284880,6 +284915,13 @@ function { parameter_id: 0x2dde023d parameter_id: 0x6720d32f } +function { + id: 0x976cd0e7 + return_type_id: 0x6720d32f + parameter_id: 0x28f4ee78 + parameter_id: 0x3e10b518 + parameter_id: 0xa52a0930 +} function { id: 0x977041a6 return_type_id: 0x6720d32f @@ -300509,6 +300551,13 @@ function { parameter_id: 0x38d23361 parameter_id: 0x3ea31487 } +function { + id: 0xf07462c6 + return_type_id: 0x26ee682a + parameter_id: 0x2f5fce77 + parameter_id: 0x2fb1b820 + parameter_id: 0x2fb1b820 +} function { id: 0xf078e232 return_type_id: 0x6d7f5ff6 @@ -313042,6 +313091,33 @@ elf_symbol { type_id: 0x915d9e6f full_name: "bdevname" } +elf_symbol { + id: 0x18c171b8 + name: "bdi_alloc" + is_defined: true + symbol_type: FUNCTION + crc: 0xad31d036 + type_id: 0x5120d3a0 + full_name: "bdi_alloc" +} +elf_symbol { + id: 0x8fb4b5f6 + name: "bdi_put" + is_defined: true + symbol_type: FUNCTION + crc: 0xe1b928fd + type_id: 0x1aa56a0d + full_name: "bdi_put" +} +elf_symbol { + id: 0x6bb4e80d + name: "bdi_register" + is_defined: true + symbol_type: FUNCTION + crc: 0x435e0782 + type_id: 0x976cd0e7 + full_name: "bdi_register" +} elf_symbol { id: 0xcdba3a55 name: "bgpio_init" @@ -317803,6 +317879,15 @@ elf_symbol { type_id: 0x13fabadb full_name: "crypto_unregister_ahash" } +elf_symbol { + id: 0xa9d18a36 + name: "crypto_unregister_ahashes" + is_defined: true + symbol_type: FUNCTION + crc: 0xf36b1210 + type_id: 0x12663997 + full_name: "crypto_unregister_ahashes" +} elf_symbol { id: 0x883df740 name: "crypto_unregister_alg" @@ -318067,6 +318152,15 @@ elf_symbol { type_id: 0xcebcf7b7 full_name: "dbs_update" } +elf_symbol { + id: 0x02f83230 + name: "deactivate_locked_super" + is_defined: true + symbol_type: FUNCTION + crc: 0xdc36d757 + type_id: 0x1923cb99 + full_name: "deactivate_locked_super" +} elf_symbol { id: 0x36a5b3df name: "deactivate_task" @@ -327684,6 +327778,15 @@ elf_symbol { type_id: 0x9cec28b8 full_name: "firmware_request_nowarn" } +elf_symbol { + id: 0xbbba9aad + name: "fixed_size_llseek" + is_defined: true + symbol_type: FUNCTION + crc: 0xad5d5d5c + type_id: 0x2d6430a1 + full_name: "fixed_size_llseek" +} elf_symbol { id: 0xd211b195 name: "flow_block_cb_setup_simple" @@ -328734,6 +328837,15 @@ elf_symbol { type_id: 0x12e4741f full_name: "generic_read_dir" } +elf_symbol { + id: 0x798f83da + name: "generic_shutdown_super" + is_defined: true + symbol_type: FUNCTION + crc: 0xf832643f + type_id: 0x1923cb99 + full_name: "generic_shutdown_super" +} elf_symbol { id: 0x542785af name: "generic_write_checks" @@ -329936,6 +330048,15 @@ elf_symbol { type_id: 0x5f3cfa16 full_name: "gpiod_get_index" } +elf_symbol { + id: 0x226f81df + name: "gpiod_get_index_optional" + is_defined: true + symbol_type: FUNCTION + crc: 0x316800a4 + type_id: 0x5f3cfa16 + full_name: "gpiod_get_index_optional" +} elf_symbol { id: 0x4950fc9e name: "gpiod_get_optional" @@ -335829,6 +335950,15 @@ elf_symbol { type_id: 0x11cf2aed full_name: "logfc" } +elf_symbol { + id: 0xad10cf0f + name: "lookup_bdev" + is_defined: true + symbol_type: FUNCTION + crc: 0x34c7cdbc + type_id: 0x92c581e2 + full_name: "lookup_bdev" +} elf_symbol { id: 0x5868181f name: "lookup_page_ext" @@ -338662,6 +338792,24 @@ elf_symbol { type_id: 0x99bc47d8 full_name: "nvmem_device_write" } +elf_symbol { + id: 0x65d6ca81 + name: "nvmem_register" + is_defined: true + symbol_type: FUNCTION + crc: 0x9e85ac36 + type_id: 0x2eb1a24e + full_name: "nvmem_register" +} +elf_symbol { + id: 0x27081fa8 + name: "nvmem_unregister" + is_defined: true + symbol_type: FUNCTION + crc: 0xcc39c03e + type_id: 0x158f8f53 + full_name: "nvmem_unregister" +} elf_symbol { id: 0x458a15db name: "of_address_to_resource" @@ -349610,6 +349758,15 @@ elf_symbol { type_id: 0x9264424b full_name: "sg_zero_buffer" } +elf_symbol { + id: 0x12402fa7 + name: "sget_fc" + is_defined: true + symbol_type: FUNCTION + crc: 0x5c47772e + type_id: 0xf07462c6 + full_name: "sget_fc" +} elf_symbol { id: 0x84e1c494 name: "shash_free_singlespawn_instance" @@ -363342,6 +363499,9 @@ interface { symbol_id: 0xe860837e symbol_id: 0x76f82fb7 symbol_id: 0xc22bc12c + symbol_id: 0x18c171b8 + symbol_id: 0x8fb4b5f6 + symbol_id: 0x6bb4e80d symbol_id: 0xcdba3a55 symbol_id: 0x977cc973 symbol_id: 0x49201db3 @@ -363871,6 +364031,7 @@ interface { symbol_id: 0x77cf6687 symbol_id: 0x81600265 symbol_id: 0x57a4fa38 + symbol_id: 0xa9d18a36 symbol_id: 0x883df740 symbol_id: 0xeb9b8f1f symbol_id: 0x4d669c6d @@ -363900,6 +364061,7 @@ interface { symbol_id: 0xd8f17b20 symbol_id: 0xf94e1278 symbol_id: 0xf1e399f5 + symbol_id: 0x02f83230 symbol_id: 0x36a5b3df symbol_id: 0x7225f00d symbol_id: 0xe2150034 @@ -364968,6 +365130,7 @@ interface { symbol_id: 0x40cf5b57 symbol_id: 0xf882020f symbol_id: 0x24954a6b + symbol_id: 0xbbba9aad symbol_id: 0xd211b195 symbol_id: 0xba7efe3b symbol_id: 0x6e6c429b @@ -365084,6 +365247,7 @@ interface { symbol_id: 0x7f639ef1 symbol_id: 0x6bd69c06 symbol_id: 0x4c4073c3 + symbol_id: 0x798f83da symbol_id: 0x542785af symbol_id: 0xb45dfa4f symbol_id: 0x86da67c0 @@ -365217,6 +365381,7 @@ interface { symbol_id: 0x80497778 symbol_id: 0xb92a798d symbol_id: 0xaca51db1 + symbol_id: 0x226f81df symbol_id: 0x4950fc9e symbol_id: 0x74240b4d symbol_id: 0xe4150b00 @@ -365870,6 +366035,7 @@ interface { symbol_id: 0xc41c43d2 symbol_id: 0x02bffe2a symbol_id: 0x06e891b4 + symbol_id: 0xad10cf0f symbol_id: 0x5868181f symbol_id: 0x493ce9fc symbol_id: 0x531bbd3e @@ -366184,6 +366350,8 @@ interface { symbol_id: 0x23113228 symbol_id: 0xc1013b0e symbol_id: 0x28ba8e50 + symbol_id: 0x65d6ca81 + symbol_id: 0x27081fa8 symbol_id: 0x458a15db symbol_id: 0x058addcb symbol_id: 0x93579cdd @@ -367400,6 +367568,7 @@ interface { symbol_id: 0x7c74f0ec symbol_id: 0x8525915d symbol_id: 0x4c3efe30 + symbol_id: 0x12402fa7 symbol_id: 0x84e1c494 symbol_id: 0xc2e7cb56 symbol_id: 0x7f3b48aa diff --git a/android/abi_gki_aarch64_amlogic b/android/abi_gki_aarch64_amlogic index 3fb321828154..017df1429251 100644 --- a/android/abi_gki_aarch64_amlogic +++ b/android/abi_gki_aarch64_amlogic @@ -28,6 +28,7 @@ arm64_use_ng_mappings __arm_smccc_hvc __arm_smccc_smc + async_schedule_node atomic_notifier_call_chain atomic_notifier_chain_register atomic_notifier_chain_unregister @@ -35,6 +36,9 @@ backlight_device_unregister balance_dirty_pages_ratelimited bcmp + bdi_alloc + bdi_put + bdi_register bio_add_page bio_alloc_bioset bio_associate_blkg @@ -82,8 +86,8 @@ bpf_master_redirect_enabled_key bpf_prog_put bpf_stats_enabled_key - bpf_trace_run10 bpf_trace_run1 + bpf_trace_run10 bpf_trace_run2 bpf_trace_run3 bpf_trace_run4 @@ -91,6 +95,7 @@ bpf_trace_run6 bpf_trace_run7 bpf_trace_run8 + bpf_trace_run9 bpf_warn_invalid_xdp_action __bread_gfp __brelse @@ -256,6 +261,7 @@ crypto_skcipher_encrypt crypto_skcipher_setkey crypto_unregister_ahash + crypto_unregister_ahashes crypto_unregister_shash crypto_unregister_skcipher __crypto_xor @@ -265,6 +271,7 @@ _ctype current_time current_umask + deactivate_locked_super debugfs_attr_read debugfs_attr_write debugfs_create_blob @@ -585,6 +592,7 @@ drm_connector_cleanup drm_connector_init drm_connector_list_iter_begin + drm_connector_list_iter_end drm_connector_list_iter_next drm_connector_set_vrr_capable_property drm_connector_unregister @@ -695,6 +703,7 @@ drm_state_dump drm_universal_plane_init drm_vblank_init + drm_wait_one_vblank drm_writeback_connector_init drm_writeback_queue_job drm_writeback_signal_completion @@ -728,9 +737,9 @@ extcon_get_extcon_dev extcon_get_state extcon_register_notifier - extcon_unregister_notifier extcon_set_state extcon_set_state_sync + extcon_unregister_notifier fasync_helper fault_in_iov_iter_readable __fdget @@ -755,6 +764,7 @@ find_vm_area find_vpid finish_wait + fixed_size_llseek flow_block_cb_setup_simple flow_rule_match_basic flow_rule_match_ipv4_addrs @@ -808,6 +818,7 @@ generic_handle_irq generic_permission generic_read_dir + generic_shutdown_super generic_write_checks generic_write_end genlmsg_multicast_allns @@ -819,13 +830,13 @@ genphy_handle_interrupt_no_ack genphy_read_abilities genphy_read_mmd_unsupported - genphy_write_mmd_unsupported genphy_read_status genphy_restart_aneg genphy_resume genphy_soft_reset genphy_suspend genphy_update_link + genphy_write_mmd_unsupported gen_pool_add_owner gen_pool_alloc_algo_owner gen_pool_avail @@ -873,6 +884,7 @@ gpiod_direction_output_raw gpiod_get gpiod_get_index + gpiod_get_index_optional gpiod_get_optional gpiod_get_raw_value gpiod_get_raw_value_cansleep @@ -1034,8 +1046,8 @@ iwe_stream_add_event iwe_stream_add_point iwe_stream_add_value - jiffies_64 jiffies + jiffies_64 jiffies_to_msecs jiffies_to_usecs kasan_flag_enabled @@ -1144,12 +1156,18 @@ __lock_page lockref_get logfc + log_post_read_mmio + log_post_write_mmio + log_read_mmio + log_write_mmio + lookup_bdev loops_per_jiffy mac_pton make_bad_inode mangle_path mark_buffer_dirty __mark_inode_dirty + match_string mbox_chan_received_data mbox_chan_txdone mbox_controller_register @@ -1161,11 +1179,11 @@ mdiobus_alloc_size mdiobus_free mdiobus_get_phy - mdiobus_read __mdiobus_read + mdiobus_read mdiobus_unregister - mdiobus_write __mdiobus_write + mdiobus_write mdio_device_create mdio_device_free media_create_pad_link @@ -1184,8 +1202,8 @@ memparse memremap mem_section - memset64 memset + memset64 __memset_io memstart_addr memunmap @@ -1204,8 +1222,8 @@ mmc_cqe_request_done mmc_detect_change mmc_free_host - mmc_gpio_get_cd mmc_gpiod_request_cd + mmc_gpio_get_cd mmc_of_parse mmc_regulator_get_supply mmc_regulator_set_ocr @@ -1223,6 +1241,7 @@ mod_delayed_work_on mod_node_page_state mod_timer + __module_get module_layout module_put mpage_readahead @@ -1245,6 +1264,7 @@ napi_gro_receive __napi_schedule napi_schedule_prep + __ndelay netdev_alert __netdev_alloc_skb netdev_err @@ -1285,8 +1305,8 @@ nla_find nla_memcpy __nla_parse - nla_put_64bit nla_put + nla_put_64bit nla_put_nohdr nla_reserve nla_strscpy @@ -1301,6 +1321,8 @@ ns_to_timespec64 __num_online_cpus nvmem_cell_read + nvmem_register + nvmem_unregister of_address_to_resource of_alias_get_id of_clk_add_provider @@ -1309,6 +1331,7 @@ of_clk_hw_onecell_get of_clk_set_defaults of_clk_src_onecell_get + of_count_phandle_with_args of_device_get_match_data of_device_is_available of_device_is_compatible @@ -1348,6 +1371,8 @@ of_match_node __of_mdiobus_register of_mdio_find_bus + of_n_addr_cells + of_n_size_cells of_parse_phandle of_parse_phandle_with_args of_phy_is_fixed_link @@ -1359,9 +1384,11 @@ of_property_read_string_helper of_property_read_u32_index of_property_read_u64 + of_property_read_u64_index of_property_read_variable_u16_array of_property_read_variable_u32_array of_property_read_variable_u8_array + of_prop_next_string of_prop_next_u32 of_pwm_xlate_with_flags of_reserved_mem_device_init_by_idx @@ -1441,6 +1468,7 @@ pci_unlock_rescan_remove pci_write_config_byte pci_write_config_dword + pcpu_nr_pages PDE_DATA __per_cpu_offset perf_trace_buf_alloc @@ -1478,8 +1506,8 @@ phylink_start phylink_stop phylink_suspend - phy_modify __phy_modify + phy_modify phy_modify_changed phy_modify_paged phy_modify_paged_changed @@ -1563,6 +1591,7 @@ __printk_ratelimit proc_create proc_create_data + proc_create_single_data proc_dointvec proc_get_parent_data proc_mkdir @@ -1803,10 +1832,12 @@ sg_alloc_table_from_pages_segment sg_copy_from_buffer sg_copy_to_buffer + sget_fc sg_free_table sg_init_one sg_init_table sg_nents + sg_nents_for_len sg_next __sg_page_iter_next __sg_page_iter_start @@ -1814,6 +1845,8 @@ sg_pcopy_to_buffer show_class_attr_string show_regs + si_mem_available + si_meminfo simple_attr_open simple_attr_read simple_attr_release @@ -1993,12 +2026,14 @@ sysctl_sched_latency sysfs_create_bin_file sysfs_create_file_ns + sysfs_create_files sysfs_create_group sysfs_create_link sysfs_emit __sysfs_match_string sysfs_remove_bin_file sysfs_remove_file_ns + sysfs_remove_files sysfs_remove_group sysfs_remove_link sysfs_streq @@ -2028,6 +2063,7 @@ time64_to_tm timespec64_to_jiffies _totalram_pages + touch_softlockup_watchdog trace_clock_local trace_event_buffer_commit trace_event_buffer_reserve @@ -2047,9 +2083,11 @@ __traceiter_android_rvh_place_entity __traceiter_android_rvh_replace_next_task_fair __traceiter_android_rvh_schedule + __traceiter_android_rvh_select_task_rq_fair __traceiter_android_rvh_select_task_rq_rt __traceiter_android_rvh_tick_entry __traceiter_android_vh_alloc_pages_entry + __traceiter_android_vh_alloc_pages_slowpath __traceiter_android_vh_calc_alloc_flags __traceiter_android_vh_cma_alloc_bypass __traceiter_android_vh_cma_drain_all_pages_bypass @@ -2097,9 +2135,11 @@ __tracepoint_android_rvh_place_entity __tracepoint_android_rvh_replace_next_task_fair __tracepoint_android_rvh_schedule + __tracepoint_android_rvh_select_task_rq_fair __tracepoint_android_rvh_select_task_rq_rt __tracepoint_android_rvh_tick_entry __tracepoint_android_vh_alloc_pages_entry + __tracepoint_android_vh_alloc_pages_slowpath __tracepoint_android_vh_calc_alloc_flags __tracepoint_android_vh_cma_alloc_bypass __tracepoint_android_vh_cma_drain_all_pages_bypass @@ -2209,16 +2249,16 @@ usb_autopm_get_interface usb_autopm_put_interface usb_control_msg - usb_create_hcd __usb_create_hcd + usb_create_hcd usb_debug_root usb_decode_ctrl usb_del_gadget usb_del_gadget_udc usb_deregister usb_deregister_dev - usb_disabled usb_disable_autosuspend + usb_disabled usb_driver_claim_interface usb_driver_release_interface usb_ep_set_maxpacket_limit @@ -2239,9 +2279,9 @@ usb_get_maximum_ssp_rate usb_get_role_switch_default_mode usb_hcd_check_unlink_urb - usb_hc_died usb_hcd_end_port_resume usb_hcd_giveback_urb + usb_hc_died usb_hcd_irq usb_hcd_is_primary_hcd usb_hcd_link_urb_to_ep @@ -2364,6 +2404,7 @@ __video_register_device video_unregister_device vmalloc + vmalloc_nr_pages vmalloc_to_page vmalloc_to_pfn vmalloc_user @@ -2371,6 +2412,7 @@ vm_event_states vmf_insert_pfn_prot vm_insert_page + vm_node_stat vm_unmap_aliases vm_zone_stat vprintk From 3b3c1c80e8944f6439493be170b5fe2fb608818e Mon Sep 17 00:00:00 2001 From: Ben Fennema Date: Fri, 29 Mar 2024 12:45:37 -0700 Subject: [PATCH 36/98] ANDROID: GKI: Update the ABI symbol list Update the pixel_watch symbol list. Bug: 330275264 Change-Id: I843394f80d93a3f3d1a33846d1af4f189803b829 Signed-off-by: Ben Fennema --- android/abi_gki_aarch64_pixel_watch | 2 ++ 1 file changed, 2 insertions(+) diff --git a/android/abi_gki_aarch64_pixel_watch b/android/abi_gki_aarch64_pixel_watch index a2877e136e4c..374639c8ee5b 100644 --- a/android/abi_gki_aarch64_pixel_watch +++ b/android/abi_gki_aarch64_pixel_watch @@ -1133,6 +1133,7 @@ kobject_uevent_env kobj_sysfs_ops krealloc + kset_create_and_add ksize ksoftirqd kstat @@ -2078,6 +2079,7 @@ sysfs_remove_link sysfs_remove_link_from_group sysfs_streq + sysfs_update_group sysrq_mask system_32bit_el0_cpumask system_freezable_wq From 668dfb812db727a038035a2386057811f5f08fb0 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Sat, 30 Mar 2024 19:01:14 +0000 Subject: [PATCH 37/98] FROMLIST: binder: check offset alignment in binder_get_object() Commit 6d98eb95b450 ("binder: avoid potential data leakage when copying txn") introduced changes to how binder objects are copied. In doing so, it unintentionally removed an offset alignment check done through calls to binder_alloc_copy_from_buffer() -> check_buffer(). These calls were replaced in binder_get_object() with copy_from_user(), so now an explicit offset alignment check is needed here. This avoids later complications when unwinding the objects gets harder. It is worth noting this check existed prior to commit 7a67a39320df ("binder: add function to copy binder object from buffer"), likely removed due to redundancy at the time. Fixes: 6d98eb95b450 ("binder: avoid potential data leakage when copying txn") Cc: Signed-off-by: Carlos Llamas Bug: 320661088 Link: https://lore.kernel.org/all/20240330190115.1877819-1-cmllamas@google.com/ Change-Id: Iaddabaa28de7ba7b7d35dbb639d38ca79dbc5077 Signed-off-by: Carlos Llamas --- drivers/android/binder.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 57295e600607..29f6a1bd739c 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1915,8 +1915,10 @@ static size_t binder_get_object(struct binder_proc *proc, size_t object_size = 0; read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset); - if (offset > buffer->data_size || read_size < sizeof(*hdr)) + if (offset > buffer->data_size || read_size < sizeof(*hdr) || + !IS_ALIGNED(offset, sizeof(u32))) return 0; + if (u) { if (copy_from_user(object, u + offset, read_size)) return 0; From 3de9177e8168658fe7a28296affc80974496a24a Mon Sep 17 00:00:00 2001 From: erinwang Date: Mon, 25 Mar 2024 10:32:19 +0800 Subject: [PATCH 38/98] ANDROID: GKI: Update symbol list for lenovo 2 function symbol(s) added 'unsigned long* devm_bitmap_zalloc(struct device*, unsigned int, gfp_t)' 'void tracing_on()' Bug: 331118893 Change-Id: I3baa632e1ebb7cb09d4651d656f0dcc6cda21cd3 Signed-off-by: erinwang --- android/abi_gki_aarch64.stg | 27 +++++++++++++++++++++++++++ android/abi_gki_aarch64_lenovo | 3 +++ 2 files changed, 30 insertions(+) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index ea57d83b30e8..7d1c73e8e26d 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -298157,6 +298157,13 @@ function { return_type_id: 0x31b5a66f parameter_id: 0x2668e644 } +function { + id: 0xaa7f8be4 + return_type_id: 0x064d6086 + parameter_id: 0x0258f96e + parameter_id: 0x4585663f + parameter_id: 0xf1a6dfed +} function { id: 0xaa8f5c2d return_type_id: 0xf435685e @@ -320168,6 +320175,15 @@ elf_symbol { type_id: 0x10cc1a70 full_name: "devm_backlight_device_unregister" } +elf_symbol { + id: 0xff3a9dde + name: "devm_bitmap_zalloc" + is_defined: true + symbol_type: FUNCTION + crc: 0x5e53ca9c + type_id: 0xaa7f8be4 + full_name: "devm_bitmap_zalloc" +} elf_symbol { id: 0x01a0cc1d name: "devm_blk_crypto_profile_init" @@ -355052,6 +355068,15 @@ elf_symbol { type_id: 0x10985193 full_name: "tracing_off" } +elf_symbol { + id: 0x6521b803 + name: "tracing_on" + is_defined: true + symbol_type: FUNCTION + crc: 0x10138352 + type_id: 0x10985193 + full_name: "tracing_on" +} elf_symbol { id: 0x3f07269b name: "truncate_inode_pages" @@ -364285,6 +364310,7 @@ interface { symbol_id: 0xa2a47944 symbol_id: 0x97ae66e9 symbol_id: 0x206986c6 + symbol_id: 0xff3a9dde symbol_id: 0x01a0cc1d symbol_id: 0x32439a1e symbol_id: 0x70cc5ef2 @@ -368156,6 +368182,7 @@ interface { symbol_id: 0x3df2f359 symbol_id: 0x33172d21 symbol_id: 0x54bbaa46 + symbol_id: 0x6521b803 symbol_id: 0x3f07269b symbol_id: 0x3c7c6ce9 symbol_id: 0x7a43283c diff --git a/android/abi_gki_aarch64_lenovo b/android/abi_gki_aarch64_lenovo index 9173bee8d6d5..7a8be12c77d4 100644 --- a/android/abi_gki_aarch64_lenovo +++ b/android/abi_gki_aarch64_lenovo @@ -257,6 +257,7 @@ _dev_info __dev_kfree_skb_any devm_add_action + devm_bitmap_zalloc devm_clk_get devm_free_irq devm_fwnode_gpiod_get_index @@ -1330,6 +1331,8 @@ __tracepoint_android_vh_use_amu_fie __tracepoint_binder_transaction_received __tracepoint_cpu_frequency_limits + tracing_off + tracing_on try_module_get __ubsan_handle_cfi_check_fail_abort __udelay From 6a45518094ac67ec227df2f823b0726f89aa7c46 Mon Sep 17 00:00:00 2001 From: "qinglin.li" Date: Wed, 3 Apr 2024 17:46:52 +0800 Subject: [PATCH 39/98] ANDROID: GKI: Update symbol list for Amlogic 1 function symbol(s) added 'int __traceiter_android_rvh_set_sugov_update(void*, struct sugov_policy*, unsigned int, bool*)' 1 variable symbol(s) added 'struct tracepoint __tracepoint_android_rvh_set_sugov_update' Bug: 332649159 Change-Id: Id8efeb5dd38638e2b08a5f2bad5db6744cc0fb15 Signed-off-by: Qinglin Li --- android/abi_gki_aarch64.stg | 168 ++++++++++++++++++++++++++++++++ android/abi_gki_aarch64_amlogic | 4 + 2 files changed, 172 insertions(+) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 7d1c73e8e26d..72278aa6623b 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -478,6 +478,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x2d64ae3e } +pointer_reference { + id: 0x01ce1d56 + kind: POINTER + pointee_type_id: 0x2d7893c6 +} pointer_reference { id: 0x01cecb1b kind: POINTER @@ -20298,6 +20303,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x95c98491 } +pointer_reference { + id: 0x2fe3ab52 + kind: POINTER + pointee_type_id: 0x95ce4bd4 +} pointer_reference { id: 0x2fe3d7ba kind: POINTER @@ -52644,6 +52654,12 @@ member { type_id: 0xc9082b19 offset: 64 } +member { + id: 0xd996d7d9 + name: "cached_raw_freq" + type_id: 0x4585663f + offset: 480 +} member { id: 0xd8c78590 name: "cached_refs" @@ -85332,6 +85348,12 @@ member { type_id: 0x92233392 offset: 1408 } +member { + id: 0x9a51f6de + name: "freq_update_delay_ns" + type_id: 0x2e0f9112 + offset: 384 +} member { id: 0x4cc7367d name: "freqm" @@ -101067,6 +101089,12 @@ member { type_id: 0xb95bf932 offset: 640 } +member { + id: 0xed01f807 + name: "irq_work" + type_id: 0xb95bf932 + offset: 512 +} member { id: 0xedbbc0ae name: "irq_work" @@ -104174,6 +104202,12 @@ member { type_id: 0xedf277ba offset: 9600 } +member { + id: 0x5fee299a + name: "last_freq_update_time" + type_id: 0x92233392 + offset: 320 +} member { id: 0x280cf0ef name: "last_func" @@ -106114,6 +106148,12 @@ member { type_id: 0x0eafcf90 offset: 384 } +member { + id: 0xa1f988ed + name: "limits_changed" + type_id: 0x6d7f5ff6 + offset: 1928 +} member { id: 0xdd5d93e6 name: "line" @@ -120936,6 +120976,12 @@ member { offset: 1360 bitsize: 1 } +member { + id: 0x89ec1f61 + name: "need_freq_update" + type_id: 0x6d7f5ff6 + offset: 1936 +} member { id: 0x2ddb9f52 name: "need_mb" @@ -121980,6 +122026,12 @@ member { type_id: 0x6720d32f offset: 1536 } +member { + id: 0x50805cf7 + name: "next_freq" + type_id: 0x4585663f + offset: 448 +} member { id: 0x4d6204ba name: "next_hash" @@ -144133,6 +144185,12 @@ member { type_id: 0x2d154530 offset: 2752 } +member { + id: 0xeb113bfe + name: "rate_limit_us" + type_id: 0x4585663f + offset: 1344 +} member { id: 0x4134711d name: "rate_list" @@ -173533,6 +173591,12 @@ member { type_id: 0x1d19a9d5 offset: 128 } +member { + id: 0xfc7d4753 + name: "thread" + type_id: 0x1d19a9d5 + offset: 1856 +} member { id: 0xfc7d4b89 name: "thread" @@ -177145,6 +177209,18 @@ member { type_id: 0x11d941b8 offset: 384 } +member { + id: 0x14692088 + name: "tunables" + type_id: 0x2fe3ab52 + offset: 64 +} +member { + id: 0x0c096620 + name: "tunables_hook" + type_id: 0xd3c80119 + offset: 128 +} member { id: 0x3203c9f3 name: "tuner" @@ -181378,6 +181454,12 @@ member { type_id: 0x0aee7ba0 offset: 896 } +member { + id: 0x550062ea + name: "update_lock" + type_id: 0xc8b17aa7 + offset: 256 +} member { id: 0x556f10f2 name: "update_lock" @@ -187998,6 +188080,12 @@ member { type_id: 0x3835dcc0 offset: 9600 } +member { + id: 0xd6c16cd0 + name: "work" + type_id: 0x3835dcc0 + offset: 704 +} member { id: 0xd6c16eed name: "work" @@ -188134,6 +188222,12 @@ member { type_id: 0x6d7f5ff6 offset: 1480 } +member { + id: 0x98b9adc1 + name: "work_in_progress" + type_id: 0x6d7f5ff6 + offset: 1920 +} member { id: 0x44ded5a9 name: "work_irq_change" @@ -188170,6 +188264,12 @@ member { type_id: 0xd3c80119 offset: 3072 } +member { + id: 0x4620b8c5 + name: "work_lock" + type_id: 0xa7c362b0 + offset: 1024 +} member { id: 0x46746957 name: "work_lock" @@ -188297,6 +188397,12 @@ member { type_id: 0x1d19a9d5 offset: 704 } +member { + id: 0xff970cdd + name: "worker" + type_id: 0xf87d4486 + offset: 1408 +} member { id: 0x3c046673 name: "worker_done" @@ -235490,6 +235596,40 @@ struct_union { member_id: 0x866b1acd } } +struct_union { + id: 0x2d7893c6 + kind: STRUCT + name: "sugov_policy" + definition { + bytesize: 248 + member_id: 0x60e88f7d + member_id: 0x14692088 + member_id: 0x0c096620 + member_id: 0x550062ea + member_id: 0x5fee299a + member_id: 0x9a51f6de + member_id: 0x50805cf7 + member_id: 0xd996d7d9 + member_id: 0xed01f807 + member_id: 0xd6c16cd0 + member_id: 0x4620b8c5 + member_id: 0xff970cdd + member_id: 0xfc7d4753 + member_id: 0x98b9adc1 + member_id: 0xa1f988ed + member_id: 0x89ec1f61 + } +} +struct_union { + id: 0x95ce4bd4 + kind: STRUCT + name: "sugov_tunables" + definition { + bytesize: 176 + member_id: 0x2ae4a5b7 + member_id: 0xeb113bfe + } +} struct_union { id: 0xb1f94634 kind: STRUCT @@ -290900,6 +291040,14 @@ function { parameter_id: 0x15c389f6 parameter_id: 0xd25db1d3 } +function { + id: 0x9bb9a470 + return_type_id: 0x6720d32f + parameter_id: 0x18bd6530 + parameter_id: 0x01ce1d56 + parameter_id: 0x4585663f + parameter_id: 0x11cfee5a +} function { id: 0x9bba5387 return_type_id: 0x6720d32f @@ -305692,6 +305840,15 @@ elf_symbol { type_id: 0x9b745c5a full_name: "__traceiter_android_rvh_set_skip_swapcache_flags" } +elf_symbol { + id: 0xdbc1c244 + name: "__traceiter_android_rvh_set_sugov_update" + is_defined: true + symbol_type: FUNCTION + crc: 0x4b0174aa + type_id: 0x9bb9a470 + full_name: "__traceiter_android_rvh_set_sugov_update" +} elf_symbol { id: 0xc6a28b4a name: "__traceiter_android_rvh_set_task_cpu" @@ -308959,6 +309116,15 @@ elf_symbol { type_id: 0x18ccbd2c full_name: "__tracepoint_android_rvh_set_skip_swapcache_flags" } +elf_symbol { + id: 0xe665d9e6 + name: "__tracepoint_android_rvh_set_sugov_update" + is_defined: true + symbol_type: OBJECT + crc: 0xfaf475a5 + type_id: 0x18ccbd2c + full_name: "__tracepoint_android_rvh_set_sugov_update" +} elf_symbol { id: 0xc5049f7c name: "__tracepoint_android_rvh_set_task_cpu" @@ -362701,6 +362867,7 @@ interface { symbol_id: 0x9c2c2d71 symbol_id: 0x615c3dcf symbol_id: 0x96033ccd + symbol_id: 0xdbc1c244 symbol_id: 0xc6a28b4a symbol_id: 0x9b0cc890 symbol_id: 0x559e0725 @@ -363064,6 +363231,7 @@ interface { symbol_id: 0xbe9f9d4f symbol_id: 0xde470f79 symbol_id: 0xa88f0d7b + symbol_id: 0xe665d9e6 symbol_id: 0xc5049f7c symbol_id: 0x42fff08e symbol_id: 0x74f29f73 diff --git a/android/abi_gki_aarch64_amlogic b/android/abi_gki_aarch64_amlogic index 017df1429251..a201089a1294 100644 --- a/android/abi_gki_aarch64_amlogic +++ b/android/abi_gki_aarch64_amlogic @@ -199,6 +199,8 @@ cpu_all_bits cpu_bit_bitmap cpufreq_boost_enabled + cpufreq_cpu_get + cpufreq_cpu_put cpufreq_generic_attr cpufreq_generic_frequency_table_verify cpufreq_generic_suspend @@ -2085,6 +2087,7 @@ __traceiter_android_rvh_schedule __traceiter_android_rvh_select_task_rq_fair __traceiter_android_rvh_select_task_rq_rt + __traceiter_android_rvh_set_sugov_update __traceiter_android_rvh_tick_entry __traceiter_android_vh_alloc_pages_entry __traceiter_android_vh_alloc_pages_slowpath @@ -2137,6 +2140,7 @@ __tracepoint_android_rvh_schedule __tracepoint_android_rvh_select_task_rq_fair __tracepoint_android_rvh_select_task_rq_rt + __tracepoint_android_rvh_set_sugov_update __tracepoint_android_rvh_tick_entry __tracepoint_android_vh_alloc_pages_entry __tracepoint_android_vh_alloc_pages_slowpath From 948f42ca2bc502a042a545ee8a900956f60c19ec Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 10 Mar 2024 10:02:41 +0100 Subject: [PATCH 40/98] UPSTREAM: netfilter: nft_set_pipapo: release elements in clone only from destroy path [ Upstream commit b0e256f3dd2ba6532f37c5c22e07cb07a36031ee ] Clone already always provides a current view of the lookup table, use it to destroy the set, otherwise it is possible to destroy elements twice. This fix requires: 212ed75dc5fb ("netfilter: nf_tables: integrate pipapo into commit protocol") which came after: 9827a0e6e23b ("netfilter: nft_set_pipapo: release elements in clone from abort path"). Bug: 330876672 Fixes: 9827a0e6e23b ("netfilter: nft_set_pipapo: release elements in clone from abort path") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin (cherry picked from commit ff90050771412b91e928093ccd8736ae680063c2) Signed-off-by: Lee Jones Change-Id: I8c0811e69f82681c7fcfdca1111f1702e27bb80e --- net/netfilter/nft_set_pipapo.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 4e1cc31729b8..050672ccfa7e 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -2234,8 +2234,6 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, if (m) { rcu_barrier(); - nft_set_pipapo_match_destroy(ctx, set, m); - #ifdef NFT_PIPAPO_ALIGN free_percpu(m->scratch_aligned); #endif @@ -2250,8 +2248,7 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, if (priv->clone) { m = priv->clone; - if (priv->dirty) - nft_set_pipapo_match_destroy(ctx, set, m); + nft_set_pipapo_match_destroy(ctx, set, m); #ifdef NFT_PIPAPO_ALIGN free_percpu(priv->clone->scratch_aligned); From cd4da4b748147ccf78714e8f5acaed36ac9490d9 Mon Sep 17 00:00:00 2001 From: Carlos Galo Date: Mon, 8 Apr 2024 18:29:10 +0000 Subject: [PATCH 41/98] Revert "FROMGIT: mm: update mark_victim tracepoints fields" This reverts commit b9e9a2c0094d43a2f05e230a3a6db3accd1ca60d. Reason for revert: b/331214192 Signed-off-by: Carlos Galo Change-Id: I5895d3b8a0577f7aa67a8fbab81991ced49f8eab --- include/trace/events/oom.h | 19 ++++--------------- mm/oom_kill.c | 6 +----- 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h index 3c5941da8075..26a11e4a2c36 100644 --- a/include/trace/events/oom.h +++ b/include/trace/events/oom.h @@ -72,30 +72,19 @@ TRACE_EVENT(reclaim_retry_zone, ); TRACE_EVENT(mark_victim, - TP_PROTO(struct task_struct *task, uid_t uid), + TP_PROTO(int pid), - TP_ARGS(task, uid), + TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) - __field(uid_t, uid) - __string(comm, task->comm) - __field(short, oom_score_adj) ), TP_fast_assign( - __entry->pid = task->pid; - __entry->uid = uid; - __assign_str(comm, task->comm); - __entry->oom_score_adj = task->signal->oom_score_adj; + __entry->pid = pid; ), - TP_printk("pid=%d uid=%u comm=%s oom_score_adj=%hd", - __entry->pid, - __entry->uid, - __get_str(comm), - __entry->oom_score_adj - ) + TP_printk("pid=%d", __entry->pid) ); TRACE_EVENT(wake_reaper, diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 212f5d6aca01..67946e2f50ea 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -44,7 +44,6 @@ #include #include #include -#include #include #include "internal.h" @@ -729,7 +728,6 @@ static inline void queue_oom_reaper(struct task_struct *tsk) */ static void mark_oom_victim(struct task_struct *tsk) { - const struct cred *cred; struct mm_struct *mm = tsk->mm; WARN_ON(oom_killer_disabled); @@ -751,9 +749,7 @@ static void mark_oom_victim(struct task_struct *tsk) */ __thaw_task(tsk); atomic_inc(&oom_victims); - cred = get_task_cred(tsk); - trace_mark_victim(tsk, cred->uid.val); - put_cred(cred); + trace_mark_victim(tsk->pid); } /** From 3507c287a681cab08d81e41df9618d36ed572118 Mon Sep 17 00:00:00 2001 From: Carlos Galo Date: Fri, 23 Feb 2024 17:32:49 +0000 Subject: [PATCH 42/98] UPSTREAM: mm: update mark_victim tracepoints fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation of the mark_victim tracepoint provides only the process ID (pid) of the victim process. This limitation poses challenges for userspace tools requiring real-time OOM analysis and intervention. Although this information is available from the kernel logs, it’s not the appropriate format to provide OOM notifications. In Android, BPF programs are used with the mark_victim trace events to notify userspace of an OOM kill. For consistency, update the trace event to include the same information about the OOMed victim as the kernel logs. - UID In Android each installed application has a unique UID. Including the `uid` assists in correlating OOM events with specific apps. - Process Name (comm) Enables identification of the affected process. - OOM Score Will allow userspace to get additional insight of the relative kill priority of the OOM victim. In Android, the oom_score_adj is used to categorize app state (foreground, background, etc.), which aids in analyzing user-perceptible impacts of OOM events [1]. - Total VM, RSS Stats, and pgtables Amount of memory used by the victim that will, potentially, be freed up by killing it. [1] https://cs.android.com/android/platform/superproject/main/+/246dc8fc95b6d93afcba5c6d6c133307abb3ac2e:frameworks/base/services/core/java/com/android/server/am/ProcessList.java;l=188-283 Signed-off-by: Carlos Galo Reviewed-by: Steven Rostedt Cc: Suren Baghdasaryan Cc: Michal Hocko Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Signed-off-by: Andrew Morton Bug: 331214192 (cherry picked from commit 72ba14deb40a9e9668ec5e66a341ed657e5215c2) Link: https://lore.kernel.org/all/20240223173258.174828-1-carlosgalo@google.com/ Change-Id: I24f503ceca04b83f8abf42fcd04a3409e17be6b5 --- include/trace/events/oom.h | 36 ++++++++++++++++++++++++++++++++---- mm/oom_kill.c | 6 +++++- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h index 26a11e4a2c36..b799f3bcba82 100644 --- a/include/trace/events/oom.h +++ b/include/trace/events/oom.h @@ -7,6 +7,8 @@ #include #include +#define PG_COUNT_TO_KB(x) ((x) << (PAGE_SHIFT - 10)) + TRACE_EVENT(oom_score_adj_update, TP_PROTO(struct task_struct *task), @@ -72,19 +74,45 @@ TRACE_EVENT(reclaim_retry_zone, ); TRACE_EVENT(mark_victim, - TP_PROTO(int pid), + TP_PROTO(struct task_struct *task, uid_t uid), - TP_ARGS(pid), + TP_ARGS(task, uid), TP_STRUCT__entry( __field(int, pid) + __string(comm, task->comm) + __field(unsigned long, total_vm) + __field(unsigned long, anon_rss) + __field(unsigned long, file_rss) + __field(unsigned long, shmem_rss) + __field(uid_t, uid) + __field(unsigned long, pgtables) + __field(short, oom_score_adj) ), TP_fast_assign( - __entry->pid = pid; + __entry->pid = task->pid; + __assign_str(comm, task->comm); + __entry->total_vm = PG_COUNT_TO_KB(task->mm->total_vm); + __entry->anon_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_ANONPAGES)); + __entry->file_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_FILEPAGES)); + __entry->shmem_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_SHMEMPAGES)); + __entry->uid = uid; + __entry->pgtables = mm_pgtables_bytes(task->mm) >> 10; + __entry->oom_score_adj = task->signal->oom_score_adj; ), - TP_printk("pid=%d", __entry->pid) + TP_printk("pid=%d comm=%s total-vm=%lukB anon-rss=%lukB file-rss:%lukB shmem-rss:%lukB uid=%u pgtables=%lukB oom_score_adj=%hd", + __entry->pid, + __get_str(comm), + __entry->total_vm, + __entry->anon_rss, + __entry->file_rss, + __entry->shmem_rss, + __entry->uid, + __entry->pgtables, + __entry->oom_score_adj + ) ); TRACE_EVENT(wake_reaper, diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 67946e2f50ea..212f5d6aca01 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "internal.h" @@ -728,6 +729,7 @@ static inline void queue_oom_reaper(struct task_struct *tsk) */ static void mark_oom_victim(struct task_struct *tsk) { + const struct cred *cred; struct mm_struct *mm = tsk->mm; WARN_ON(oom_killer_disabled); @@ -749,7 +751,9 @@ static void mark_oom_victim(struct task_struct *tsk) */ __thaw_task(tsk); atomic_inc(&oom_victims); - trace_mark_victim(tsk->pid); + cred = get_task_cred(tsk); + trace_mark_victim(tsk, cred->uid.val); + put_cred(cred); } /** From 19cbe316423133e087dbe89b6d3fac2858d683e6 Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Wed, 8 Sep 2021 09:55:37 -0700 Subject: [PATCH 43/98] UPSTREAM: HID: playstation: expose DualSense lightbar through a multi-color LED. The DualSense lightbar has so far been supported, but it was not yet adjustable from user space. This patch exposes it through a multi-color LED. Signed-off-by: Roderick Colenbrander Signed-off-by: Jiri Kosina Bug: 260685629 (cherry picked from commit fc97b4d6a1a6d418fd4053fd7716eca746fdd163) Change-Id: I48204113da804b13ad5bed2f651a5826ab5a86f7 Signed-off-by: Farid Chahla (cherry picked from commit 392b327fe02113aaaa332ca4cf06e4edb36f5566) Signed-off-by: Lee Jones --- drivers/hid/hid-playstation.c | 72 +++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c index 944e5e5ff134..ba502522479a 100644 --- a/drivers/hid/hid-playstation.c +++ b/drivers/hid/hid-playstation.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include @@ -38,6 +40,7 @@ struct ps_device { uint8_t battery_capacity; int battery_status; + const char *input_dev_name; /* Name of primary input device. */ uint8_t mac_address[6]; /* Note: stored in little endian order. */ uint32_t hw_version; uint32_t fw_version; @@ -147,6 +150,7 @@ struct dualsense { uint8_t motor_right; /* RGB lightbar */ + struct led_classdev_mc lightbar; bool update_lightbar; uint8_t lightbar_red; uint8_t lightbar_green; @@ -288,6 +292,8 @@ static const struct {int x; int y; } ps_gamepad_hat_mapping[] = { {0, 0}, }; +static void dualsense_set_lightbar(struct dualsense *ds, uint8_t red, uint8_t green, uint8_t blue); + /* * Add a new ps_device to ps_devices if it doesn't exist. * Return error on duplicate device, which can happen if the same @@ -525,6 +531,45 @@ static int ps_get_report(struct hid_device *hdev, uint8_t report_id, uint8_t *bu return 0; } +/* Register a DualSense/DualShock4 RGB lightbar represented by a multicolor LED. */ +static int ps_lightbar_register(struct ps_device *ps_dev, struct led_classdev_mc *lightbar_mc_dev, + int (*brightness_set)(struct led_classdev *, enum led_brightness)) +{ + struct hid_device *hdev = ps_dev->hdev; + struct mc_subled *mc_led_info; + struct led_classdev *led_cdev; + int ret; + + mc_led_info = devm_kmalloc_array(&hdev->dev, 3, sizeof(*mc_led_info), + GFP_KERNEL | __GFP_ZERO); + if (!mc_led_info) + return -ENOMEM; + + mc_led_info[0].color_index = LED_COLOR_ID_RED; + mc_led_info[1].color_index = LED_COLOR_ID_GREEN; + mc_led_info[2].color_index = LED_COLOR_ID_BLUE; + + lightbar_mc_dev->subled_info = mc_led_info; + lightbar_mc_dev->num_colors = 3; + + led_cdev = &lightbar_mc_dev->led_cdev; + led_cdev->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s:rgb:indicator", + ps_dev->input_dev_name); + if (!led_cdev->name) + return -ENOMEM; + led_cdev->brightness = 255; + led_cdev->max_brightness = 255; + led_cdev->brightness_set_blocking = brightness_set; + + ret = devm_led_classdev_multicolor_register(&hdev->dev, lightbar_mc_dev); + if (ret < 0) { + hid_err(hdev, "Cannot register multicolor LED device\n"); + return ret; + } + + return 0; +} + static struct input_dev *ps_sensors_create(struct hid_device *hdev, int accel_range, int accel_res, int gyro_range, int gyro_res) { @@ -793,6 +838,22 @@ err_free: return ret; } +static int dualsense_lightbar_set_brightness(struct led_classdev *cdev, + enum led_brightness brightness) +{ + struct led_classdev_mc *mc_cdev = lcdev_to_mccdev(cdev); + struct dualsense *ds = container_of(mc_cdev, struct dualsense, lightbar); + uint8_t red, green, blue; + + led_mc_calc_color_components(mc_cdev, brightness); + red = mc_cdev->subled_info[0].brightness; + green = mc_cdev->subled_info[1].brightness; + blue = mc_cdev->subled_info[2].brightness; + + dualsense_set_lightbar(ds, red, green, blue); + return 0; +} + static void dualsense_init_output_report(struct dualsense *ds, struct dualsense_output_report *rp, void *buf) { @@ -1138,10 +1199,14 @@ static int dualsense_reset_leds(struct dualsense *ds) static void dualsense_set_lightbar(struct dualsense *ds, uint8_t red, uint8_t green, uint8_t blue) { + unsigned long flags; + + spin_lock_irqsave(&ds->base.lock, flags); ds->update_lightbar = true; ds->lightbar_red = red; ds->lightbar_green = green; ds->lightbar_blue = blue; + spin_unlock_irqrestore(&ds->base.lock, flags); schedule_work(&ds->output_worker); } @@ -1228,6 +1293,8 @@ static struct ps_device *dualsense_create(struct hid_device *hdev) ret = PTR_ERR(ds->gamepad); goto err; } + /* Use gamepad input device name as primary device name for e.g. LEDs */ + ps_dev->input_dev_name = dev_name(&ds->gamepad->dev); ds->sensors = ps_sensors_create(hdev, DS_ACC_RANGE, DS_ACC_RES_PER_G, DS_GYRO_RANGE, DS_GYRO_RES_PER_DEG_S); @@ -1255,6 +1322,11 @@ static struct ps_device *dualsense_create(struct hid_device *hdev) if (ret) goto err; + ret = ps_lightbar_register(ps_dev, &ds->lightbar, dualsense_lightbar_set_brightness); + if (ret) + goto err; + + /* Set default lightbar color. */ dualsense_set_lightbar(ds, 0, 0, 128); /* blue */ ret = ps_device_set_player_id(ps_dev); From f011142fea046cdc7fef755153acd6c2672e00ca Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Wed, 8 Sep 2021 09:55:38 -0700 Subject: [PATCH 44/98] UPSTREAM: leds: add new LED_FUNCTION_PLAYER for player LEDs for game controllers. Player LEDs are commonly found on game controllers from Nintendo and Sony to indicate a player ID across a number of LEDs. For example, "Player 2" might be indicated as "-x--" on a device with 4 LEDs where "x" means on. This patch introduces LED_FUNCTION_PLAYER1-5 defines to properly indicate player LEDs from the kernel. Until now there was no good standard, which resulted in inconsistent behavior across xpad, hid-sony, hid-wiimote and other drivers. Moving forward new drivers should use LED_FUNCTION_PLAYERx. Note: management of Player IDs is left to user space, though a kernel driver may pick a default value. Signed-off-by: Roderick Colenbrander Acked-by: Pavel Machek Signed-off-by: Jiri Kosina Bug: 260685629 (cherry picked from commit 61177c088a57bed259122f3c7bc6d61984936a12) Change-Id: Ie1de4d66304bb25fc2c9fcdb1ec9b7589ad9e7ac Signed-off-by: Farid Chahla (cherry picked from commit 8abc9ed234b1b10e4949720e056c294dab4552d7) Signed-off-by: Lee Jones --- Documentation/leds/well-known-leds.txt | 14 ++++++++++++++ include/dt-bindings/leds/common.h | 7 +++++++ 2 files changed, 21 insertions(+) diff --git a/Documentation/leds/well-known-leds.txt b/Documentation/leds/well-known-leds.txt index 4a8b9dc4bf52..2160382c86be 100644 --- a/Documentation/leds/well-known-leds.txt +++ b/Documentation/leds/well-known-leds.txt @@ -16,6 +16,20 @@ but then try the legacy ones, too. Notice there's a list of functions in include/dt-bindings/leds/common.h . +* Gamepads and joysticks + +Game controllers may feature LEDs to indicate a player number. This is commonly +used on game consoles in which multiple controllers can be connected to a system. +The "player LEDs" are then programmed with a pattern to indicate a particular +player. For example, a game controller with 4 LEDs, may be programmed with "x---" +to indicate player 1, "-x--" to indicate player 2 etcetera where "x" means on. +Input drivers can utilize the LED class to expose the individual player LEDs +of a game controller using the function "player". +Note: tracking and management of Player IDs is the responsibility of user space, +though drivers may pick a default value. + +Good: "input*:*:player-{1,2,3,4,5} + * Keyboards Good: "input*:*:capslock" diff --git a/include/dt-bindings/leds/common.h b/include/dt-bindings/leds/common.h index 52b619d44ba2..3be89a7c20a9 100644 --- a/include/dt-bindings/leds/common.h +++ b/include/dt-bindings/leds/common.h @@ -60,6 +60,13 @@ #define LED_FUNCTION_MICMUTE "micmute" #define LED_FUNCTION_MUTE "mute" +/* Used for player LEDs as found on game controllers from e.g. Nintendo, Sony. */ +#define LED_FUNCTION_PLAYER1 "player-1" +#define LED_FUNCTION_PLAYER2 "player-2" +#define LED_FUNCTION_PLAYER3 "player-3" +#define LED_FUNCTION_PLAYER4 "player-4" +#define LED_FUNCTION_PLAYER5 "player-5" + /* Miscelleaus functions. Use functions above if you can. */ #define LED_FUNCTION_ACTIVITY "activity" #define LED_FUNCTION_ALARM "alarm" From c996cb50e262d2bf558daef3774ab7bd9c80ba8e Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Wed, 8 Sep 2021 09:55:39 -0700 Subject: [PATCH 45/98] UPSTREAM: HID: playstation: expose DualSense player LEDs through LED class. The DualSense player LEDs were so far not adjustable from user-space. This patch exposes each LED individually through the LED class. Each LED uses the new 'player' function resulting in a name like: 'inputX:white:player-1' for the first LED. Signed-off-by: Roderick Colenbrander Signed-off-by: Jiri Kosina Bug: 260685629 (cherry picked from commit 8c0ab553b072025530308f74b2c0223ec50dffe5) Change-Id: I49c699a99b0b8a7bb7980560e3ea7a12faf646aa Signed-off-by: Farid Chahla (cherry picked from commit 1c2aceb8d7ca297ec5b485163361d40a93023347) Signed-off-by: Lee Jones --- drivers/hid/hid-playstation.c | 85 ++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c index ba502522479a..074e1a2f6fca 100644 --- a/drivers/hid/hid-playstation.c +++ b/drivers/hid/hid-playstation.c @@ -56,6 +56,13 @@ struct ps_calibration_data { int sens_denom; }; +struct ps_led_info { + const char *name; + const char *color; + enum led_brightness (*brightness_get)(struct led_classdev *cdev); + int (*brightness_set)(struct led_classdev *cdev, enum led_brightness); +}; + /* Seed values for DualShock4 / DualSense CRC32 for different report types. */ #define PS_INPUT_CRC32_SEED 0xA1 #define PS_OUTPUT_CRC32_SEED 0xA2 @@ -531,6 +538,32 @@ static int ps_get_report(struct hid_device *hdev, uint8_t report_id, uint8_t *bu return 0; } +static int ps_led_register(struct ps_device *ps_dev, struct led_classdev *led, + const struct ps_led_info *led_info) +{ + int ret; + + led->name = devm_kasprintf(&ps_dev->hdev->dev, GFP_KERNEL, + "%s:%s:%s", ps_dev->input_dev_name, led_info->color, led_info->name); + + if (!led->name) + return -ENOMEM; + + led->brightness = 0; + led->max_brightness = 1; + led->flags = LED_CORE_SUSPENDRESUME; + led->brightness_get = led_info->brightness_get; + led->brightness_set_blocking = led_info->brightness_set; + + ret = devm_led_classdev_register(&ps_dev->hdev->dev, led); + if (ret) { + hid_err(ps_dev->hdev, "Failed to register LED %s: %d\n", led_info->name, ret); + return ret; + } + + return 0; +} + /* Register a DualSense/DualShock4 RGB lightbar represented by a multicolor LED. */ static int ps_lightbar_register(struct ps_device *ps_dev, struct led_classdev_mc *lightbar_mc_dev, int (*brightness_set)(struct led_classdev *, enum led_brightness)) @@ -854,6 +887,35 @@ static int dualsense_lightbar_set_brightness(struct led_classdev *cdev, return 0; } +static enum led_brightness dualsense_player_led_get_brightness(struct led_classdev *led) +{ + struct hid_device *hdev = to_hid_device(led->dev->parent); + struct dualsense *ds = hid_get_drvdata(hdev); + + return !!(ds->player_leds_state & BIT(led - ds->player_leds)); +} + +static int dualsense_player_led_set_brightness(struct led_classdev *led, enum led_brightness value) +{ + struct hid_device *hdev = to_hid_device(led->dev->parent); + struct dualsense *ds = hid_get_drvdata(hdev); + unsigned long flags; + unsigned int led_index; + + spin_lock_irqsave(&ds->base.lock, flags); + + led_index = led - ds->player_leds; + if (value == LED_OFF) + ds->player_leds_state &= ~BIT(led_index); + else + ds->player_leds_state |= BIT(led_index); + + ds->update_player_leds = true; + spin_unlock_irqrestore(&ds->base.lock, flags); + + schedule_work(&ds->output_worker); +} + static void dualsense_init_output_report(struct dualsense *ds, struct dualsense_output_report *rp, void *buf) { @@ -1239,7 +1301,20 @@ static struct ps_device *dualsense_create(struct hid_device *hdev) struct dualsense *ds; struct ps_device *ps_dev; uint8_t max_output_report_size; - int ret; + int i, ret; + + static const struct ps_led_info player_leds_info[] = { + { LED_FUNCTION_PLAYER1, "white", dualsense_player_led_get_brightness, + dualsense_player_led_set_brightness }, + { LED_FUNCTION_PLAYER2, "white", dualsense_player_led_get_brightness, + dualsense_player_led_set_brightness }, + { LED_FUNCTION_PLAYER3, "white", dualsense_player_led_get_brightness, + dualsense_player_led_set_brightness }, + { LED_FUNCTION_PLAYER4, "white", dualsense_player_led_get_brightness, + dualsense_player_led_set_brightness }, + { LED_FUNCTION_PLAYER5, "white", dualsense_player_led_get_brightness, + dualsense_player_led_set_brightness } + }; ds = devm_kzalloc(&hdev->dev, sizeof(*ds), GFP_KERNEL); if (!ds) @@ -1329,6 +1404,14 @@ static struct ps_device *dualsense_create(struct hid_device *hdev) /* Set default lightbar color. */ dualsense_set_lightbar(ds, 0, 0, 128); /* blue */ + for (i = 0; i < ARRAY_SIZE(player_leds_info); i++) { + const struct ps_led_info *led_info = &player_leds_info[i]; + + ret = ps_led_register(ps_dev, &ds->player_leds[i], led_info); + if (ret < 0) + goto err; + } + ret = ps_device_set_player_id(ps_dev); if (ret) { hid_err(hdev, "Failed to assign player id for DualSense: %d\n", ret); From adce8aae671e837b5ebcf5a5a431f3f00b19dfa1 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 27 Oct 2021 10:04:10 +0200 Subject: [PATCH 46/98] UPSTREAM: HID: playstation: fix return from dualsense_player_led_set_brightness() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit brightness_set_blocking() callback expects function returning int. This fixes the follwoing build failure: drivers/hid/hid-playstation.c: In function ‘dualsense_player_led_set_brightness’: drivers/hid/hid-playstation.c:885:1: error: no return statement in function returning non-void [-Werror=return-type] } ^ Signed-off-by: Jiri Kosina Bug: 260685629 (cherry picked from commit 3c92cb4cb60c71b574e47108ead8b6f0470850db) Change-Id: Id16b960826a26ac22c1a14572444f9af29689ed6 Signed-off-by: Farid Chahla (cherry picked from commit 4281e236100d7ca198bca4e0e7e74410dc3fe751) Signed-off-by: Lee Jones --- drivers/hid/hid-playstation.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c index 074e1a2f6fca..ba148aa2d151 100644 --- a/drivers/hid/hid-playstation.c +++ b/drivers/hid/hid-playstation.c @@ -914,6 +914,8 @@ static int dualsense_player_led_set_brightness(struct led_classdev *led, enum le spin_unlock_irqrestore(&ds->base.lock, flags); schedule_work(&ds->output_worker); + + return 0; } static void dualsense_init_output_report(struct dualsense *ds, struct dualsense_output_report *rp, From 62085a0e6d90c0fda226d2e1de331ad244acd7b7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 4 Aug 2022 13:30:52 +0200 Subject: [PATCH 47/98] UPSTREAM: HID: playstation: convert to use dev_groups There is no need for a driver to individually add/create device groups, the driver core will do it automatically for you. Convert the hid-playstation driver to use the dev_groups pointer instead of manually calling the driver core to create the group and have it be cleaned up later on by the devm core. Cc: Roderick Colenbrander Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: linux-input@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Acked-by: Roderick Colenbrander Signed-off-by: Jiri Kosina Bug: 260685629 (cherry picked from commit b4a9af9be628e4f9d09997e0bdef30f6718e88ec) Change-Id: I516a1b0ef7f4f8545e0c1b9485b49879dd7a3136 Signed-off-by: Farid Chahla (cherry picked from commit 2096eced42faf94979f530ddb99cf0cef601af46) Signed-off-by: Lee Jones --- drivers/hid/hid-playstation.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c index ba148aa2d151..9959472e6db2 100644 --- a/drivers/hid/hid-playstation.c +++ b/drivers/hid/hid-playstation.c @@ -692,15 +692,12 @@ static ssize_t hardware_version_show(struct device *dev, static DEVICE_ATTR_RO(hardware_version); -static struct attribute *ps_device_attributes[] = { +static struct attribute *ps_device_attrs[] = { &dev_attr_firmware_version.attr, &dev_attr_hardware_version.attr, NULL }; - -static const struct attribute_group ps_device_attribute_group = { - .attrs = ps_device_attributes, -}; +ATTRIBUTE_GROUPS(ps_device); static int dualsense_get_calibration_data(struct dualsense *ds) { @@ -1481,12 +1478,6 @@ static int ps_probe(struct hid_device *hdev, const struct hid_device_id *id) } } - ret = devm_device_add_group(&hdev->dev, &ps_device_attribute_group); - if (ret) { - hid_err(hdev, "Failed to register sysfs nodes.\n"); - goto err_close; - } - return ret; err_close: @@ -1522,6 +1513,9 @@ static struct hid_driver ps_driver = { .probe = ps_probe, .remove = ps_remove, .raw_event = ps_raw_event, + .driver = { + .dev_groups = ps_device_groups, + }, }; static int __init ps_init(void) From e3da19b2180f079866af1aeba5db23a5e73eef58 Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Mon, 10 Oct 2022 14:23:11 -0700 Subject: [PATCH 48/98] UPSTREAM: HID: playstation: stop DualSense output work on remove. Ensure we don't schedule any new output work on removal and wait for any existing work to complete. If we don't do this e.g. rumble work can get queued during deletion and we trigger a kernel crash. Signed-off-by: Roderick Colenbrander CC: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20221010212313.78275-2-roderick.colenbrander@sony.com Bug: 260685629 (cherry picked from commit 182934a1e93b17f4edf71f4fcc8d19b19a6fe67a) Change-Id: I40cadfde5765cdabf45def929860258d6019bf10 Signed-off-by: Farid Chahla (cherry picked from commit 72fd6526898fc536159dc2ee72f6aaff34183547) Signed-off-by: Lee Jones --- drivers/hid/hid-playstation.c | 41 ++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c index 9959472e6db2..df9761764289 100644 --- a/drivers/hid/hid-playstation.c +++ b/drivers/hid/hid-playstation.c @@ -46,6 +46,7 @@ struct ps_device { uint32_t fw_version; int (*parse_report)(struct ps_device *dev, struct hid_report *report, u8 *data, int size); + void (*remove)(struct ps_device *dev); }; /* Calibration data for playstation motion sensors. */ @@ -174,6 +175,7 @@ struct dualsense { struct led_classdev player_leds[5]; struct work_struct output_worker; + bool output_worker_initialized; void *output_report_dmabuf; uint8_t output_seq; /* Sequence number for output report. */ }; @@ -299,6 +301,7 @@ static const struct {int x; int y; } ps_gamepad_hat_mapping[] = { {0, 0}, }; +static inline void dualsense_schedule_work(struct dualsense *ds); static void dualsense_set_lightbar(struct dualsense *ds, uint8_t red, uint8_t green, uint8_t blue); /* @@ -821,6 +824,7 @@ err_free: return ret; } + static int dualsense_get_firmware_info(struct dualsense *ds) { uint8_t *buf; @@ -910,7 +914,7 @@ static int dualsense_player_led_set_brightness(struct led_classdev *led, enum le ds->update_player_leds = true; spin_unlock_irqrestore(&ds->base.lock, flags); - schedule_work(&ds->output_worker); + dualsense_schedule_work(ds); return 0; } @@ -954,6 +958,16 @@ static void dualsense_init_output_report(struct dualsense *ds, struct dualsense_ } } +static inline void dualsense_schedule_work(struct dualsense *ds) +{ + unsigned long flags; + + spin_lock_irqsave(&ds->base.lock, flags); + if (ds->output_worker_initialized) + schedule_work(&ds->output_worker); + spin_unlock_irqrestore(&ds->base.lock, flags); +} + /* * Helper function to send DualSense output reports. Applies a CRC at the end of a report * for Bluetooth reports. @@ -1114,7 +1128,7 @@ static int dualsense_parse_report(struct ps_device *ps_dev, struct hid_report *r spin_unlock_irqrestore(&ps_dev->lock, flags); /* Schedule updating of microphone state at hardware level. */ - schedule_work(&ds->output_worker); + dualsense_schedule_work(ds); } ds->last_btn_mic_state = btn_mic_state; @@ -1229,10 +1243,22 @@ static int dualsense_play_effect(struct input_dev *dev, void *data, struct ff_ef ds->motor_right = effect->u.rumble.weak_magnitude / 256; spin_unlock_irqrestore(&ds->base.lock, flags); - schedule_work(&ds->output_worker); + dualsense_schedule_work(ds); return 0; } +static void dualsense_remove(struct ps_device *ps_dev) +{ + struct dualsense *ds = container_of(ps_dev, struct dualsense, base); + unsigned long flags; + + spin_lock_irqsave(&ds->base.lock, flags); + ds->output_worker_initialized = false; + spin_unlock_irqrestore(&ds->base.lock, flags); + + cancel_work_sync(&ds->output_worker); +} + static int dualsense_reset_leds(struct dualsense *ds) { struct dualsense_output_report report; @@ -1269,7 +1295,7 @@ static void dualsense_set_lightbar(struct dualsense *ds, uint8_t red, uint8_t gr ds->lightbar_blue = blue; spin_unlock_irqrestore(&ds->base.lock, flags); - schedule_work(&ds->output_worker); + dualsense_schedule_work(ds); } static void dualsense_set_player_leds(struct dualsense *ds) @@ -1292,7 +1318,7 @@ static void dualsense_set_player_leds(struct dualsense *ds) ds->update_player_leds = true; ds->player_leds_state = player_ids[player_id]; - schedule_work(&ds->output_worker); + dualsense_schedule_work(ds); } static struct ps_device *dualsense_create(struct hid_device *hdev) @@ -1331,7 +1357,9 @@ static struct ps_device *dualsense_create(struct hid_device *hdev) ps_dev->battery_capacity = 100; /* initial value until parse_report. */ ps_dev->battery_status = POWER_SUPPLY_STATUS_UNKNOWN; ps_dev->parse_report = dualsense_parse_report; + ps_dev->remove = dualsense_remove; INIT_WORK(&ds->output_worker, dualsense_output_worker); + ds->output_worker_initialized = true; hid_set_drvdata(hdev, ds); max_output_report_size = sizeof(struct dualsense_output_report_bt); @@ -1494,6 +1522,9 @@ static void ps_remove(struct hid_device *hdev) ps_devices_list_remove(dev); ps_device_release_player_id(dev); + if (dev->remove) + dev->remove(dev); + hid_hw_close(hdev); hid_hw_stop(hdev); } From 0cf6fdfb0a991c8d3bdc474434f44e86d2934fef Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Mon, 10 Oct 2022 14:23:13 -0700 Subject: [PATCH 49/98] UPSTREAM: HID: playstation: support updated DualSense rumble mode. Newer DualSense firmware supports a revised classic rumble mode, which feels more similar to rumble as supported on previous PlayStation controllers. It has been made the default on PlayStation and non-PlayStation devices now (e.g. iOS and Windows). Default to this new mode when supported. Signed-off-by: Roderick Colenbrander Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20221010212313.78275-4-roderick.colenbrander@sony.com Bug: 260685629 (cherry picked from commit 9fecab247ed15e6145c126fc56ee1e89860741a7) Change-Id: Icd330111a4d1b1e76a04cd11c623d0982ce3d66f Signed-off-by: Farid Chahla (cherry picked from commit cf8edf192858c5997cae10fa2c028ee9e2a9db6b) Signed-off-by: Lee Jones --- drivers/hid/hid-playstation.c | 37 ++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c index df9761764289..2228f6e4ba23 100644 --- a/drivers/hid/hid-playstation.c +++ b/drivers/hid/hid-playstation.c @@ -108,6 +108,9 @@ struct ps_led_info { #define DS_STATUS_CHARGING GENMASK(7, 4) #define DS_STATUS_CHARGING_SHIFT 4 +/* Feature version from DualSense Firmware Info report. */ +#define DS_FEATURE_VERSION(major, minor) ((major & 0xff) << 8 | (minor & 0xff)) + /* * Status of a DualSense touch point contact. * Contact IDs, with highest bit set are 'inactive' @@ -126,6 +129,7 @@ struct ps_led_info { #define DS_OUTPUT_VALID_FLAG1_RELEASE_LEDS BIT(3) #define DS_OUTPUT_VALID_FLAG1_PLAYER_INDICATOR_CONTROL_ENABLE BIT(4) #define DS_OUTPUT_VALID_FLAG2_LIGHTBAR_SETUP_CONTROL_ENABLE BIT(1) +#define DS_OUTPUT_VALID_FLAG2_COMPATIBLE_VIBRATION2 BIT(2) #define DS_OUTPUT_POWER_SAVE_CONTROL_MIC_MUTE BIT(4) #define DS_OUTPUT_LIGHTBAR_SETUP_LIGHT_OUT BIT(1) @@ -143,6 +147,9 @@ struct dualsense { struct input_dev *sensors; struct input_dev *touchpad; + /* Update version is used as a feature/capability version. */ + uint16_t update_version; + /* Calibration data for accelerometer and gyroscope. */ struct ps_calibration_data accel_calib_data[3]; struct ps_calibration_data gyro_calib_data[3]; @@ -153,6 +160,7 @@ struct dualsense { uint32_t sensor_timestamp_us; /* Compatible rumble state */ + bool use_vibration_v2; bool update_rumble; uint8_t motor_left; uint8_t motor_right; @@ -844,6 +852,15 @@ static int dualsense_get_firmware_info(struct dualsense *ds) ds->base.hw_version = get_unaligned_le32(&buf[24]); ds->base.fw_version = get_unaligned_le32(&buf[28]); + /* Update version is some kind of feature version. It is distinct from + * the firmware version as there can be many different variations of a + * controller over time with the same physical shell, but with different + * PCBs and other internal changes. The update version (internal name) is + * used as a means to detect what features are available and change behavior. + * Note: the version is different between DualSense and DualSense Edge. + */ + ds->update_version = get_unaligned_le16(&buf[44]); + err_free: kfree(buf); return ret; @@ -1006,7 +1023,10 @@ static void dualsense_output_worker(struct work_struct *work) if (ds->update_rumble) { /* Select classic rumble style haptics and enable it. */ common->valid_flag0 |= DS_OUTPUT_VALID_FLAG0_HAPTICS_SELECT; - common->valid_flag0 |= DS_OUTPUT_VALID_FLAG0_COMPATIBLE_VIBRATION; + if (ds->use_vibration_v2) + common->valid_flag2 |= DS_OUTPUT_VALID_FLAG2_COMPATIBLE_VIBRATION2; + else + common->valid_flag0 |= DS_OUTPUT_VALID_FLAG0_COMPATIBLE_VIBRATION; common->motor_left = ds->motor_left; common->motor_right = ds->motor_right; ds->update_rumble = false; @@ -1380,6 +1400,21 @@ static struct ps_device *dualsense_create(struct hid_device *hdev) return ERR_PTR(ret); } + /* Original DualSense firmware simulated classic controller rumble through + * its new haptics hardware. It felt different from classic rumble users + * were used to. Since then new firmwares were introduced to change behavior + * and make this new 'v2' behavior default on PlayStation and other platforms. + * The original DualSense requires a new enough firmware as bundled with PS5 + * software released in 2021. DualSense edge supports it out of the box. + * Both devices also support the old mode, but it is not really used. + */ + if (hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER) { + /* Feature version 2.21 introduced new vibration method. */ + ds->use_vibration_v2 = ds->update_version >= DS_FEATURE_VERSION(2, 21); + } else if (hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER_2) { + ds->use_vibration_v2 = true; + } + ret = ps_devices_list_add(ps_dev); if (ret) return ERR_PTR(ret); From a5d03f57d6c26bc7b5f23c264fd26f43b6016332 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 18 Jan 2024 10:56:26 +0100 Subject: [PATCH 50/98] UPSTREAM: netfilter: nft_chain_filter: handle NETDEV_UNREGISTER for inet/ingress basechain commit 01acb2e8666a6529697141a6017edbf206921913 upstream. Remove netdevice from inet/ingress basechain in case NETDEV_UNREGISTER event is reported, otherwise a stale reference to netdevice remains in the hook list. Bug: 332803585 Fixes: 60a3815da702 ("netfilter: add inet ingress support") Cc: stable@vger.kernel.org Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 70f17b48c86622217a58d5099d29242fc9adac58) Signed-off-by: Lee Jones Change-Id: I28482dca416b61dcf2e722ba0aef62d2d41a8f23 --- net/netfilter/nft_chain_filter.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 5b02408a920b..35aa4ea94205 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -355,9 +355,10 @@ static int nf_tables_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nft_base_chain *basechain; struct nftables_pernet *nft_net; - struct nft_table *table; struct nft_chain *chain, *nr; + struct nft_table *table; struct nft_ctx ctx = { .net = dev_net(dev), }; @@ -369,7 +370,8 @@ static int nf_tables_netdev_event(struct notifier_block *this, nft_net = nft_pernet(ctx.net); mutex_lock(&nft_net->commit_mutex); list_for_each_entry(table, &nft_net->tables, list) { - if (table->family != NFPROTO_NETDEV) + if (table->family != NFPROTO_NETDEV && + table->family != NFPROTO_INET) continue; ctx.family = table->family; @@ -378,6 +380,11 @@ static int nf_tables_netdev_event(struct notifier_block *this, if (!nft_is_base_chain(chain)) continue; + basechain = nft_base_chain(chain); + if (table->family == NFPROTO_INET && + basechain->ops.hooknum != NF_INET_INGRESS) + continue; + ctx.chain = chain; nft_netdev_event(event, dev, &ctx); } From f395ea0980ef1c07569d532a84e5f6d13ebc19bf Mon Sep 17 00:00:00 2001 From: "yenchia.chen" Date: Mon, 8 Apr 2024 20:21:17 +0800 Subject: [PATCH 51/98] ANDROID: GKI: update mtktv symbol 8 function symbol(s) added 'int tty_termios_hw_change(const struct ktermios*, const struct ktermios*)' 'void usb_serial_deregister_drivers(struct usb_serial_driver* const*)' 'void usb_serial_generic_close(struct usb_serial_port*)' 'int usb_serial_generic_get_icount(struct tty_struct*, struct serial_icounter_struct*)' 'int usb_serial_generic_open(struct tty_struct*, struct usb_serial_port*)' 'void usb_serial_generic_throttle(struct tty_struct*)' 'void usb_serial_generic_unthrottle(struct tty_struct*)' 'int usb_serial_register_drivers(struct usb_serial_driver* const*, const char*, const struct usb_device_id*)' Bug: 333350374 Change-Id: Ie1ea35a1c6795adef7d5fd65f9fc29f855d683bb Signed-off-by: yenchia.chen --- android/abi_gki_aarch64.stg | 1159 +++++++++++++++++++++++++++++++++ android/abi_gki_aarch64_mtktv | 37 ++ 2 files changed, 1196 insertions(+) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 72278aa6623b..a1c1af173972 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -3178,6 +3178,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x1b445821 } +pointer_reference { + id: 0x0c436bab + kind: POINTER + pointee_type_id: 0x1b4d4832 +} pointer_reference { id: 0x0c43d124 kind: POINTER @@ -3423,6 +3428,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x1b939067 } +pointer_reference { + id: 0x0c780bd8 + kind: POINTER + pointee_type_id: 0x1ba0c9ff +} pointer_reference { id: 0x0c786e08 kind: POINTER @@ -3518,6 +3528,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x181b0acd } +pointer_reference { + id: 0x0c975eef + kind: POINTER + pointee_type_id: 0x181d9d22 +} pointer_reference { id: 0x0c97f018 kind: POINTER @@ -3558,6 +3573,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x18d41dd9 } +pointer_reference { + id: 0x0ca5cbd8 + kind: POINTER + pointee_type_id: 0x18d7c9fc +} pointer_reference { id: 0x0ca62e19 kind: POINTER @@ -3818,6 +3838,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x19fec76b } +pointer_reference { + id: 0x0cf07e3c + kind: POINTER + pointee_type_id: 0x19811e6e +} pointer_reference { id: 0x0cf0f1be kind: POINTER @@ -6573,6 +6598,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x1738d1f7 } +pointer_reference { + id: 0x0f5e5d84 + kind: POINTER + pointee_type_id: 0x1739908d +} pointer_reference { id: 0x0f5f18a4 kind: POINTER @@ -10058,6 +10088,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x50573700 } +pointer_reference { + id: 0x1e870478 + kind: POINTER + pointee_type_id: 0x505cf77f +} pointer_reference { id: 0x1e881fcb kind: POINTER @@ -10758,6 +10793,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0xa2172324 } +pointer_reference { + id: 0x221732c5 + kind: POINTER + pointee_type_id: 0xa21c2d88 +} pointer_reference { id: 0x22198273 kind: POINTER @@ -12703,6 +12743,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x9a20634b } +pointer_reference { + id: 0x2c18712b + kind: POINTER + pointee_type_id: 0x9a212231 +} pointer_reference { id: 0x2c18d6ee kind: POINTER @@ -12758,6 +12803,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x9a391cba } +pointer_reference { + id: 0x2c207521 + kind: POINTER + pointee_type_id: 0x9ac13218 +} pointer_reference { id: 0x2c209d56 kind: POINTER @@ -12808,6 +12858,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x9ae52b81 } +pointer_reference { + id: 0x2c2aca57 + kind: POINTER + pointee_type_id: 0x9aebcfc2 +} pointer_reference { id: 0x2c2bf57a kind: POINTER @@ -14828,6 +14883,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x99e0f6e0 } +pointer_reference { + id: 0x2ce866aa + kind: POINTER + pointee_type_id: 0x99e17c37 +} pointer_reference { id: 0x2ce9f40c kind: POINTER @@ -19413,6 +19473,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x96ef964a } +pointer_reference { + id: 0x2f2c9c0e + kind: POINTER + pointee_type_id: 0x96f296a4 +} pointer_reference { id: 0x2f30a05a kind: POINTER @@ -19488,6 +19553,16 @@ pointer_reference { kind: POINTER pointee_type_id: 0x96b5469b } +pointer_reference { + id: 0x2f3dfe58 + kind: POINTER + pointee_type_id: 0x96b71ffd +} +pointer_reference { + id: 0x2f3e1dbd + kind: POINTER + pointee_type_id: 0x96b8906b +} pointer_reference { id: 0x2f3e5017 kind: POINTER @@ -20198,6 +20273,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x9559376c } +pointer_reference { + id: 0x2fc68c73 + kind: POINTER + pointee_type_id: 0x955ad750 +} pointer_reference { id: 0x2fc7c937 kind: POINTER @@ -20233,6 +20313,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x9502dd09 } +pointer_reference { + id: 0x2fd17240 + kind: POINTER + pointee_type_id: 0x95052f9e +} pointer_reference { id: 0x2fd46ff4 kind: POINTER @@ -20573,6 +20658,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0xeb0f6de6 } +pointer_reference { + id: 0x3054f2d7 + kind: POINTER + pointee_type_id: 0xeb132dc2 +} pointer_reference { id: 0x3058262d kind: POINTER @@ -21468,6 +21558,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0xe095cad8 } +pointer_reference { + id: 0x32bb7cf5 + kind: POINTER + pointee_type_id: 0xe0ad154a +} pointer_reference { id: 0x32bd639f kind: POINTER @@ -22288,6 +22383,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0xfba05a49 } +pointer_reference { + id: 0x347de8b1 + kind: POINTER + pointee_type_id: 0xfbb74458 +} pointer_reference { id: 0x3481766c kind: POINTER @@ -23403,6 +23503,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0xf620b983 } +pointer_reference { + id: 0x37185c4a + kind: POINTER + pointee_type_id: 0xf62197b5 +} pointer_reference { id: 0x3719a0ef kind: POINTER @@ -26408,6 +26513,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0xd1740ca6 } +pointer_reference { + id: 0x3ed39c44 + kind: POINTER + pointee_type_id: 0xd10e978e +} pointer_reference { id: 0x3ed52bb5 kind: POINTER @@ -29583,6 +29693,11 @@ qualified { qualifier: CONST qualified_type_id: 0x33341885 } +qualified { + id: 0xd10e978e + qualifier: CONST + qualified_type_id: 0x347de8b1 +} qualified { id: 0xd12be7af qualifier: CONST @@ -30913,6 +31028,11 @@ qualified { qualifier: CONST qualified_type_id: 0xdbbcb810 } +qualified { + id: 0xeb132dc2 + qualifier: CONST + qualified_type_id: 0xdc0b0182 +} qualified { id: 0xeb2226c1 qualifier: CONST @@ -32681,6 +32801,11 @@ array { number_of_elements: 16 element_type_id: 0x21069feb } +array { + id: 0x44b60e20 + number_of_elements: 16 + element_type_id: 0x221732c5 +} array { id: 0x44b8b776 number_of_elements: 2 @@ -32816,6 +32941,11 @@ array { number_of_elements: 16 element_type_id: 0x094c30e0 } +array { + id: 0x4f77c35e + number_of_elements: 16 + element_type_id: 0x0d10073d +} array { id: 0x4f935f0c number_of_elements: 32 @@ -33251,6 +33381,11 @@ array { number_of_elements: 2 element_type_id: 0x1e5f9cbf } +array { + id: 0x6bd55bd0 + number_of_elements: 2 + element_type_id: 0x1df06cce +} array { id: 0x6bdac314 number_of_elements: 2 @@ -33266,6 +33401,11 @@ array { number_of_elements: 2 element_type_id: 0x007e8ce4 } +array { + id: 0x6ce54884 + number_of_elements: 2 + element_type_id: 0x0130219f +} array { id: 0x6d099744 number_of_elements: 2 @@ -45511,6 +45651,12 @@ member { type_id: 0x2de1dbe2 offset: 64 } +member { + id: 0x9619b60e + name: "attach" + type_id: 0x2c18712b + offset: 2176 +} member { id: 0x961a0bf7 name: "attach" @@ -45678,6 +45824,13 @@ member { type_id: 0x6d7f5ff6 offset: 2016 } +member { + id: 0x57f5e518 + name: "attached" + type_id: 0x5d8155a5 + offset: 289 + bitsize: 1 +} member { id: 0x95a2ac4d name: "attached_dev" @@ -50186,6 +50339,12 @@ member { type_id: 0xc9082b19 offset: 576 } +member { + id: 0xfd552aa2 + name: "break_ctl" + type_id: 0x0c3ee516 + offset: 3264 +} member { id: 0xfd57c225 name: "break_ctl" @@ -51238,12 +51397,30 @@ member { offset: 354 bitsize: 1 } +member { + id: 0x98845503 + name: "bulk_in" + type_id: 0x4f77c35e + offset: 64 +} member { id: 0x98c50225 name: "bulk_in" type_id: 0x0e2680c2 offset: 1984 } +member { + id: 0x80e885f4 + name: "bulk_in_buffer" + type_id: 0x1df06cce + offset: 4032 +} +member { + id: 0xfd487f77 + name: "bulk_in_buffers" + type_id: 0x6bd55bd0 + offset: 4288 +} member { id: 0x0bed83d9 name: "bulk_in_enabled" @@ -51251,12 +51428,48 @@ member { offset: 1872 bitsize: 1 } +member { + id: 0x1926cd5d + name: "bulk_in_endpointAddress" + type_id: 0xb3e7bac9 + offset: 4224 +} +member { + id: 0x7e50fd73 + name: "bulk_in_size" + type_id: 0x6720d32f + offset: 4096 +} +member { + id: 0x7ec3e667 + name: "bulk_in_size" + type_id: 0xf435685e + offset: 1984 +} member { id: 0x223191aa name: "bulk_out" type_id: 0x0e2680c2 offset: 2048 } +member { + id: 0x2270ca1d + name: "bulk_out" + type_id: 0x4f77c35e + offset: 1088 +} +member { + id: 0x6fbe0621 + name: "bulk_out_buffer" + type_id: 0x1df06cce + offset: 4608 +} +member { + id: 0x2b1788a0 + name: "bulk_out_buffers" + type_id: 0x6bd55bd0 + offset: 4992 +} member { id: 0x1c3ca9e4 name: "bulk_out_enabled" @@ -51264,6 +51477,12 @@ member { offset: 1873 bitsize: 1 } +member { + id: 0xa1532b64 + name: "bulk_out_endpointAddress" + type_id: 0xb3e7bac9 + offset: 5312 +} member { id: 0xc866f0d4 name: "bulk_out_intended_length" @@ -51276,6 +51495,18 @@ member { type_id: 0x4585663f offset: 2880 } +member { + id: 0x6529283d + name: "bulk_out_size" + type_id: 0x6720d32f + offset: 4672 +} +member { + id: 0x65ba3b43 + name: "bulk_out_size" + type_id: 0xf435685e + offset: 2048 +} member { id: 0x460505fb name: "burst" @@ -52731,6 +52962,12 @@ member { type_id: 0x33756485 offset: 27200 } +member { + id: 0x7e1dc95a + name: "calc_num_ports" + type_id: 0x2c2aca57 + offset: 2240 +} member { id: 0x8d929646 name: "calc_sets" @@ -53777,6 +54014,12 @@ member { type_id: 0x4585663f offset: 128 } +member { + id: 0x59488b26 + name: "carrier_raised" + type_id: 0x2fd17240 + offset: 3968 +} member { id: 0x594b0e58 name: "carrier_raised" @@ -55074,6 +55317,12 @@ member { type_id: 0x0483e6f8 offset: 9664 } +member { + id: 0xd08d5830 + name: "chars_in_buffer" + type_id: 0x3bc90e1f + offset: 3328 +} member { id: 0xd08d5d4d name: "chars_in_buffer" @@ -56937,6 +57186,12 @@ member { type_id: 0x0d22c400 offset: 128 } +member { + id: 0xcd7f6f7d + name: "close" + type_id: 0x0c975eef + offset: 2816 +} member { id: 0xcd7fa645 name: "close" @@ -67666,6 +67921,12 @@ member { type_id: 0x23230326 offset: 320 } +member { + id: 0xce1ac7ea + name: "dev" + type_id: 0x23230326 + offset: 6272 +} member { id: 0xce1ac83c name: "dev" @@ -67798,6 +68059,11 @@ member { type_id: 0x0d7ce7cc offset: 64 } +member { + id: 0xce349ead + name: "dev" + type_id: 0x0d7ce7cc +} member { id: 0xce3785c0 name: "dev" @@ -70140,6 +70406,12 @@ member { type_id: 0x18bd6530 offset: 5568 } +member { + id: 0xdc0f3007 + name: "disc_mutex" + type_id: 0xa7c362b0 + offset: 1472 +} member { id: 0x8c3076ff name: "discard" @@ -70188,6 +70460,12 @@ member { type_id: 0x0f3dfb90 offset: 1408 } +member { + id: 0x8da4e999 + name: "disconnect" + type_id: 0x0f5e5d84 + offset: 2304 +} member { id: 0x8da522b5 name: "disconnect" @@ -70267,6 +70545,13 @@ member { type_id: 0x1f3c8679 offset: 1280 } +member { + id: 0xd1a76d30 + name: "disconnected" + type_id: 0x5d8155a5 + offset: 288 + bitsize: 1 +} member { id: 0x77b2de2b name: "discov_interleaved_timeout" @@ -72260,6 +72545,12 @@ member { type_id: 0x6e3b7d7f offset: 50752 } +member { + id: 0xee864cc4 + name: "driver_list" + type_id: 0xd3c80119 + offset: 128 +} member { id: 0xb3c8d0ce name: "driver_max_VFs" @@ -73056,6 +73347,12 @@ member { type_id: 0x0d14f575 offset: 512 } +member { + id: 0xc9730947 + name: "dtr_rts" + type_id: 0x0cf07e3c + offset: 3904 +} member { id: 0x8c9f51a9 name: "dual_link" @@ -73562,6 +73859,12 @@ member { type_id: 0x3077bd6f offset: 2368 } +member { + id: 0x1b86a2bd + name: "dynids" + type_id: 0x91a9600d + offset: 1728 +} member { id: 0x1b86a7a5 name: "dynids" @@ -82452,6 +82755,12 @@ member { type_id: 0x33756485 offset: 4032 } +member { + id: 0x2d5bf7e0 + name: "flags" + type_id: 0x33756485 + offset: 5760 +} member { id: 0x2d5bf7f4 name: "flags" @@ -87533,6 +87842,12 @@ member { name: "get_hwirq" type_id: 0x30934160 } +member { + id: 0x34f4a74e + name: "get_icount" + type_id: 0x2f1a6bce + offset: 3840 +} member { id: 0x34f4a83a name: "get_icount" @@ -88200,6 +88515,12 @@ member { type_id: 0x2f054704 offset: 1856 } +member { + id: 0x956eb42c + name: "get_serial" + type_id: 0x0c436bab + offset: 3072 +} member { id: 0x545ea760 name: "get_sg_table" @@ -94227,6 +94548,12 @@ member { type_id: 0x02900546 offset: 192 } +member { + id: 0x083df5c3 + name: "icount" + type_id: 0x63b8b563 + offset: 5344 +} member { id: 0x0843960a name: "icount" @@ -95217,6 +95544,12 @@ member { type_id: 0x3b2ca4e8 offset: 64 } +member { + id: 0xc4fbc795 + name: "id_table" + type_id: 0x38040a6c + offset: 64 +} member { id: 0xc4fbca06 name: "id_table" @@ -97844,6 +98177,12 @@ member { type_id: 0x3a47ea7a offset: 128 } +member { + id: 0xe7104099 + name: "init_termios" + type_id: 0x0c59c5c5 + offset: 4032 +} member { id: 0xe7c0151f name: "init_termios" @@ -98682,6 +99021,12 @@ member { type_id: 0x442bf459 offset: 768 } +member { + id: 0x1e592a48 + name: "interface" + type_id: 0x21069feb + offset: 128 +} member { id: 0x1e64a125 name: "interface" @@ -98859,6 +99204,60 @@ member { type_id: 0x6d7f5ff6 offset: 72 } +member { + id: 0xb25f3460 + name: "interrupt_in" + type_id: 0x4f77c35e + offset: 2112 +} +member { + id: 0xff6c376e + name: "interrupt_in_buffer" + type_id: 0x1df06cce + offset: 3584 +} +member { + id: 0x85b71e23 + name: "interrupt_in_endpointAddress" + type_id: 0xb3e7bac9 + offset: 3712 +} +member { + id: 0xa213a93e + name: "interrupt_in_urb" + type_id: 0x0130219f + offset: 3648 +} +member { + id: 0x88026c22 + name: "interrupt_out" + type_id: 0x4f77c35e + offset: 3136 +} +member { + id: 0xf35c5c33 + name: "interrupt_out_buffer" + type_id: 0x1df06cce + offset: 3776 +} +member { + id: 0xda583590 + name: "interrupt_out_endpointAddress" + type_id: 0xb3e7bac9 + offset: 3968 +} +member { + id: 0xa83090b1 + name: "interrupt_out_size" + type_id: 0x6720d32f + offset: 3840 +} +member { + id: 0x5e3b6037 + name: "interrupt_out_urb" + type_id: 0x0130219f + offset: 3904 +} member { id: 0x05278e35 name: "interrupt_pin" @@ -99573,6 +99972,12 @@ member { type_id: 0x2e1b56db offset: 384 } +member { + id: 0x4d4b01d0 + name: "ioctl" + type_id: 0x2f595b5a + offset: 3008 +} member { id: 0x4d4b0f76 name: "ioctl" @@ -103539,6 +103944,12 @@ member { name: "kref" type_id: 0x6f1daf87 } +member { + id: 0x02ce5b75 + name: "kref" + type_id: 0x6f1daf87 + offset: 1408 +} member { id: 0x02ce5d67 name: "kref" @@ -107637,6 +108048,12 @@ member { type_id: 0xf313e71a offset: 896 } +member { + id: 0x2d1fe6cf + name: "lock" + type_id: 0xf313e71a + offset: 3456 +} member { id: 0x2d1fe798 name: "lock" @@ -115766,6 +116183,12 @@ member { type_id: 0xc9082b19 offset: 32 } +member { + id: 0xc8ee0756 + name: "minor" + type_id: 0xc9082b19 + offset: 3488 +} member { id: 0xc8ee0a2c name: "minor" @@ -115814,6 +116237,13 @@ member { type_id: 0x6720d32f offset: 64 } +member { + id: 0x4d99c9ee + name: "minors_reserved" + type_id: 0x5d8155a5 + offset: 290 + bitsize: 1 +} member { id: 0x096ecb61 name: "minutes" @@ -125598,6 +126028,41 @@ member { type_id: 0x4585663f offset: 5376 } +member { + id: 0x4cfd40f2 + name: "num_bulk_in" + type_id: 0x5d8155a5 + offset: 1928 +} +member { + id: 0x4cfd4941 + name: "num_bulk_in" + type_id: 0x5d8155a5 + offset: 328 +} +member { + id: 0x4cfd4a18 + name: "num_bulk_in" + type_id: 0x5d8155a5 +} +member { + id: 0xd78f59f9 + name: "num_bulk_out" + type_id: 0x5d8155a5 + offset: 1936 +} +member { + id: 0xd78f5a1c + name: "num_bulk_out" + type_id: 0x5d8155a5 + offset: 336 +} +member { + id: 0xd78f5f05 + name: "num_bulk_out" + type_id: 0x5d8155a5 + offset: 8 +} member { id: 0x32b760e4 name: "num_bus_formats" @@ -126049,6 +126514,42 @@ member { type_id: 0x6720d32f offset: 2048 } +member { + id: 0x1968f7ca + name: "num_interrupt_in" + type_id: 0x5d8155a5 + offset: 312 +} +member { + id: 0x1968f921 + name: "num_interrupt_in" + type_id: 0x5d8155a5 + offset: 16 +} +member { + id: 0x1968fceb + name: "num_interrupt_in" + type_id: 0x5d8155a5 + offset: 1944 +} +member { + id: 0xc578d426 + name: "num_interrupt_out" + type_id: 0x5d8155a5 + offset: 24 +} +member { + id: 0xc578d580 + name: "num_interrupt_out" + type_id: 0x5d8155a5 + offset: 320 +} +member { + id: 0xc578dab3 + name: "num_interrupt_out" + type_id: 0x5d8155a5 + offset: 1952 +} member { id: 0xdf32959c name: "num_ioctls" @@ -126346,12 +126847,30 @@ member { type_id: 0x4585663f offset: 58624 } +member { + id: 0x176ff071 + name: "num_port_pointers" + type_id: 0x5d8155a5 + offset: 304 +} member { id: 0x0f014be3 name: "num_ports" type_id: 0x4585663f offset: 64 } +member { + id: 0x0f194513 + name: "num_ports" + type_id: 0x5d8155a5 + offset: 296 +} +member { + id: 0x0f194aef + name: "num_ports" + type_id: 0x5d8155a5 + offset: 1920 +} member { id: 0x0f23ee6e name: "num_ports" @@ -128776,6 +129295,12 @@ member { type_id: 0x2f1fe96a offset: 128 } +member { + id: 0xad9bbf06 + name: "open" + type_id: 0x2f3dfe58 + offset: 2752 +} member { id: 0xadb0bada name: "open" @@ -130539,6 +131064,12 @@ member { type_id: 0xe62ebf07 offset: 256 } +member { + id: 0x455a2c83 + name: "overrun" + type_id: 0xe62ebf07 + offset: 256 +} member { id: 0x455a2f0c name: "overrun" @@ -132605,6 +133136,12 @@ member { type_id: 0xe62ebf07 offset: 256 } +member { + id: 0xd6e7fef7 + name: "parity" + type_id: 0xe62ebf07 + offset: 224 +} member { id: 0xc70bed2a name: "park" @@ -137396,6 +137933,12 @@ member { name: "port" type_id: 0x4201a01e } +member { + id: 0x48b91cec + name: "port" + type_id: 0x4201a01e + offset: 64 +} member { id: 0x48be982a name: "port" @@ -137407,6 +137950,12 @@ member { name: "port" type_id: 0x4585663f } +member { + id: 0x48bfae31 + name: "port" + type_id: 0x44b60e20 + offset: 384 +} member { id: 0x48cc7707 name: "port" @@ -137622,6 +138171,12 @@ member { type_id: 0x295c7202 offset: 3520 } +member { + id: 0xdc31667a + name: "port_number" + type_id: 0x295c7202 + offset: 3520 +} member { id: 0xdcd13b2b name: "port_number" @@ -137651,12 +138206,24 @@ member { type_id: 0x0baa70a7 offset: 8224 } +member { + id: 0x66c5c2ea + name: "port_probe" + type_id: 0x2fd17240 + offset: 2432 +} member { id: 0x687fd28c name: "port_remote_wakeup" type_id: 0xc9082b19 offset: 192 } +member { + id: 0xe4114db1 + name: "port_remove" + type_id: 0x0c975eef + offset: 2496 +} member { id: 0xa22ff2af name: "port_split" @@ -139111,6 +139678,12 @@ member { type_id: 0x0f7ac5c1 offset: 128 } +member { + id: 0x934f43bd + name: "prepare_write_buffer" + type_id: 0x2fc68c73 + offset: 4416 +} member { id: 0x9ef237da name: "prepare_writeback_job" @@ -140111,6 +140684,12 @@ member { type_id: 0x18bd6530 offset: 7296 } +member { + id: 0x91796dae + name: "private" + type_id: 0x18bd6530 + offset: 1856 +} member { id: 0x91796e7a name: "private" @@ -140516,6 +141095,12 @@ member { type_id: 0x2c27cb1a offset: 64 } +member { + id: 0xd77a2d00 + name: "probe" + type_id: 0x2c207521 + offset: 2112 +} member { id: 0xd77a3f94 name: "probe" @@ -140947,6 +141532,12 @@ member { type_id: 0x578f9c2b offset: 128 } +member { + id: 0x320901bb + name: "process_read_urb" + type_id: 0x0ea52fda + offset: 4352 +} member { id: 0x3cce5fd6 name: "process_todo" @@ -145695,6 +146286,12 @@ member { type_id: 0x4585663f offset: 224 } +member { + id: 0xda4d0f3d + name: "read_bulk_callback" + type_id: 0x0ea52fda + offset: 4224 +} member { id: 0xc9397873 name: "read_bytes" @@ -145821,6 +146418,12 @@ member { type_id: 0x2e7062fc offset: 448 } +member { + id: 0xe2acc05f + name: "read_int_callback" + type_id: 0x0ea52fda + offset: 4096 +} member { id: 0xeb51ccd7 name: "read_iter" @@ -146021,6 +146624,24 @@ member { type_id: 0x1dcc0874 offset: 1088 } +member { + id: 0x20931d2e + name: "read_urb" + type_id: 0x0130219f + offset: 4160 +} +member { + id: 0xc6c744ad + name: "read_urbs" + type_id: 0x6ce54884 + offset: 4416 +} +member { + id: 0x599921b1 + name: "read_urbs_free" + type_id: 0x33756485 + offset: 4544 +} member { id: 0x1cafcc11 name: "read_w" @@ -148207,6 +148828,12 @@ member { type_id: 0x0f626ee5 offset: 1152 } +member { + id: 0xae97a5b3 + name: "release" + type_id: 0x0f5e5d84 + offset: 2368 +} member { id: 0xae97f307 name: "release" @@ -150801,6 +151428,12 @@ member { type_id: 0x2fe06892 offset: 384 } +member { + id: 0xa792c035 + name: "reset_resume" + type_id: 0x2c18712b + offset: 2688 +} member { id: 0xa793f54a name: "reset_resume" @@ -151410,6 +152043,12 @@ member { type_id: 0x2cee6908 offset: 448 } +member { + id: 0xcab3f147 + name: "resume" + type_id: 0x2c18712b + offset: 2624 +} member { id: 0xa4d8edf6 name: "resume_done" @@ -153933,6 +154572,12 @@ member { type_id: 0xe62ebf07 offset: 128 } +member { + id: 0x6c1664a5 + name: "rx" + type_id: 0xe62ebf07 + offset: 160 +} member { id: 0x6c511725 name: "rx" @@ -158526,6 +159171,11 @@ member { type_id: 0x0483e6f8 offset: 11392 } +member { + id: 0xa79bc81d + name: "serial" + type_id: 0x1e870478 +} member { id: 0xa7a9403c name: "serial" @@ -159734,6 +160384,12 @@ member { type_id: 0x2f054704 offset: 1920 } +member { + id: 0xd9943837 + name: "set_serial" + type_id: 0x2f054704 + offset: 3136 +} member { id: 0xdada940c name: "set_signals" @@ -159853,6 +160509,12 @@ member { type_id: 0x2de347ce offset: 384 } +member { + id: 0x7e9ca3e9 + name: "set_termios" + type_id: 0x0c780bd8 + offset: 3200 +} member { id: 0x7e9cb9f7 name: "set_termios" @@ -161511,6 +162173,12 @@ member { type_id: 0xd3c80119 offset: 13184 } +member { + id: 0xeec37d27 + name: "sibling" + type_id: 0x21069feb + offset: 192 +} member { id: 0xeed6076d name: "sibling" @@ -169671,6 +170339,12 @@ member { type_id: 0x2ce67932 offset: 576 } +member { + id: 0xf3963ffd + name: "suspend" + type_id: 0x2ce866aa + offset: 2560 +} member { id: 0xf396c9d5 name: "suspend" @@ -169781,6 +170455,12 @@ member { offset: 2498 bitsize: 1 } +member { + id: 0xdcd36a92 + name: "suspend_count" + type_id: 0x4585663f + offset: 256 +} member { id: 0xdce23cc4 name: "suspend_count" @@ -171634,6 +172314,12 @@ member { type_id: 0x33756485 offset: 2752 } +member { + id: 0x1e9bc448 + name: "sysrq" + type_id: 0x33756485 + offset: 6208 +} member { id: 0x4d8ad507 name: "sysrq_ch" @@ -173806,6 +174492,12 @@ member { type_id: 0xa7c362b0 offset: 9280 } +member { + id: 0xca51229f + name: "throttle" + type_id: 0x0c59c5c5 + offset: 3520 +} member { id: 0xca512364 name: "throttle" @@ -174777,6 +175469,18 @@ member { type_id: 0x2f1fe96a offset: 1600 } +member { + id: 0x7363590a + name: "tiocmget" + type_id: 0x2f1fe96a + offset: 3648 +} +member { + id: 0x6e7a79e5 + name: "tiocmiwait" + type_id: 0x2f2c9c0e + offset: 3776 +} member { id: 0x7e407b75 name: "tiocmset" @@ -174789,6 +175493,12 @@ member { type_id: 0x2f5e345a offset: 1664 } +member { + id: 0x7e4156be + name: "tiocmset" + type_id: 0x2f5e345a + offset: 3712 +} member { id: 0x073cf00c name: "tipc_ptr" @@ -177594,6 +178304,12 @@ member { type_id: 0xe62ebf07 offset: 160 } +member { + id: 0x653ea6fe + name: "tx" + type_id: 0xe62ebf07 + offset: 128 +} member { id: 0x6548ea05 name: "tx" @@ -177668,6 +178384,12 @@ member { type_id: 0x391f15ea offset: 128 } +member { + id: 0xb51b57a4 + name: "tx_bytes" + type_id: 0x6720d32f + offset: 5696 +} member { id: 0xb54f06ad name: "tx_bytes" @@ -177831,6 +178553,12 @@ member { type_id: 0xb02b353a offset: 3904 } +member { + id: 0x14573c12 + name: "tx_empty" + type_id: 0x37185c4a + offset: 3456 +} member { id: 0x1459039b name: "tx_empty" @@ -178801,6 +179529,12 @@ member { type_id: 0x37ce2c2c offset: 544 } +member { + id: 0x5c62ac77 + name: "type" + type_id: 0x347de8b1 + offset: 64 +} member { id: 0x5c62c9fc name: "type" @@ -181188,6 +181922,12 @@ member { type_id: 0x0c59c5c5 offset: 1024 } +member { + id: 0x80cd11b3 + name: "unthrottle" + type_id: 0x0c59c5c5 + offset: 3584 +} member { id: 0x80cff2d9 name: "unthrottle" @@ -182067,6 +182807,12 @@ member { type_id: 0x0258f96e offset: 7680 } +member { + id: 0xfc09f0db + name: "usb_driver" + type_id: 0x3c9a9fb2 + offset: 1664 +} member { id: 0x075c8af5 name: "usb_id" @@ -186849,6 +187595,12 @@ member { type_id: 0x0c3ee516 offset: 1472 } +member { + id: 0x691df5b8 + name: "wait_until_sent" + type_id: 0x0ca5cbd8 + offset: 3392 +} member { id: 0x691e4bbe name: "wait_until_sent" @@ -188139,6 +188891,12 @@ member { type_id: 0x1f3c8679 offset: 832 } +member { + id: 0xd6e6643f + name: "work" + type_id: 0x1f3c8679 + offset: 5824 +} member { id: 0xd6e66625 name: "work" @@ -188790,6 +189548,12 @@ member { type_id: 0x2f830764 offset: 256 } +member { + id: 0x342fe464 + name: "write" + type_id: 0x2f3e1dbd + offset: 2880 +} member { id: 0x342ff252 name: "write" @@ -188856,6 +189620,12 @@ member { type_id: 0x1df06cce offset: 5952 } +member { + id: 0x68c4d9d3 + name: "write_bulk_callback" + type_id: 0x0ea52fda + offset: 4288 +} member { id: 0xfeec54df name: "write_busy" @@ -188933,6 +189703,12 @@ member { type_id: 0x2c45ef00 offset: 704 } +member { + id: 0xa2006573 + name: "write_fifo" + type_id: 0x18745118 + offset: 4800 +} member { id: 0x2ec77a1f name: "write_file_info" @@ -188999,6 +189775,12 @@ member { type_id: 0x2c685816 offset: 256 } +member { + id: 0x84573c9c + name: "write_int_callback" + type_id: 0x0ea52fda + offset: 4160 +} member { id: 0xa854c899 name: "write_iter" @@ -189097,6 +189879,12 @@ member { type_id: 0x0f0e8ef4 offset: 1408 } +member { + id: 0xb08e3438 + name: "write_room" + type_id: 0x3bc90e1f + offset: 2944 +} member { id: 0xb08e3b1a name: "write_room" @@ -189145,6 +189933,24 @@ member { type_id: 0x2f53e65e offset: 1472 } +member { + id: 0xd4bce130 + name: "write_urb" + type_id: 0x0130219f + offset: 4736 +} +member { + id: 0x47ac84ab + name: "write_urbs" + type_id: 0x6ce54884 + offset: 5120 +} +member { + id: 0x34019984 + name: "write_urbs_free" + type_id: 0x33756485 + offset: 5248 +} member { id: 0x10a6114d name: "write_w" @@ -197737,6 +198543,25 @@ struct_union { member_id: 0x9d3e49d9 } } +struct_union { + id: 0x63b8b563 + kind: STRUCT + name: "async_icount" + definition { + bytesize: 44 + member_id: 0x273f08da + member_id: 0x3992ebe5 + member_id: 0xdd584c28 + member_id: 0x510e9896 + member_id: 0x653ea6fe + member_id: 0x6c1664a5 + member_id: 0x1691771b + member_id: 0xd6e7fef7 + member_id: 0x455a2c83 + member_id: 0x56c792be + member_id: 0x408101ca + } +} struct_union { id: 0x3cd7d077 kind: STRUCT @@ -241481,6 +242306,147 @@ struct_union { member_id: 0x0de577f0 } } +struct_union { + id: 0x505cf77f + kind: STRUCT + name: "usb_serial" + definition { + bytesize: 240 + member_id: 0xce349ead + member_id: 0x5c62ac77 + member_id: 0x1e592a48 + member_id: 0xeec37d27 + member_id: 0xdcd36a92 + member_id: 0xd1a76d30 + member_id: 0x57f5e518 + member_id: 0x4d99c9ee + member_id: 0x0f194513 + member_id: 0x176ff071 + member_id: 0x1968f7ca + member_id: 0xc578d580 + member_id: 0x4cfd4941 + member_id: 0xd78f5a1c + member_id: 0x48bfae31 + member_id: 0x02ce5b75 + member_id: 0xdc0f3007 + member_id: 0x91796dae + } +} +struct_union { + id: 0xfbb74458 + kind: STRUCT + name: "usb_serial_driver" + definition { + bytesize: 560 + member_id: 0x3144f518 + member_id: 0xc4fbc795 + member_id: 0xee864cc4 + member_id: 0xd4ad4cc3 + member_id: 0xfc09f0db + member_id: 0x1b86a2bd + member_id: 0x0f194aef + member_id: 0x4cfd40f2 + member_id: 0xd78f59f9 + member_id: 0x1968fceb + member_id: 0xc578dab3 + member_id: 0x7ec3e667 + member_id: 0x65ba3b43 + member_id: 0xd77a2d00 + member_id: 0x9619b60e + member_id: 0x7e1dc95a + member_id: 0x8da4e999 + member_id: 0xae97a5b3 + member_id: 0x66c5c2ea + member_id: 0xe4114db1 + member_id: 0xf3963ffd + member_id: 0xcab3f147 + member_id: 0xa792c035 + member_id: 0xad9bbf06 + member_id: 0xcd7f6f7d + member_id: 0x342fe464 + member_id: 0xb08e3438 + member_id: 0x4d4b01d0 + member_id: 0x956eb42c + member_id: 0xd9943837 + member_id: 0x7e9ca3e9 + member_id: 0xfd552aa2 + member_id: 0xd08d5830 + member_id: 0x691df5b8 + member_id: 0x14573c12 + member_id: 0xca51229f + member_id: 0x80cd11b3 + member_id: 0x7363590a + member_id: 0x7e4156be + member_id: 0x6e7a79e5 + member_id: 0x34f4a74e + member_id: 0xc9730947 + member_id: 0x59488b26 + member_id: 0xe7104099 + member_id: 0xe2acc05f + member_id: 0x84573c9c + member_id: 0xda4d0f3d + member_id: 0x68c4d9d3 + member_id: 0x320901bb + member_id: 0x934f43bd + } +} +struct_union { + id: 0xe0ad154a + kind: STRUCT + name: "usb_serial_endpoints" + definition { + bytesize: 520 + member_id: 0x4cfd4a18 + member_id: 0xd78f5f05 + member_id: 0x1968f921 + member_id: 0xc578d426 + member_id: 0x98845503 + member_id: 0x2270ca1d + member_id: 0xb25f3460 + member_id: 0x88026c22 + } +} +struct_union { + id: 0xa21c2d88 + kind: STRUCT + name: "usb_serial_port" + definition { + bytesize: 1696 + member_id: 0xa79bc81d + member_id: 0x48b91cec + member_id: 0x2d1fe6cf + member_id: 0xc8ee0756 + member_id: 0xdc31667a + member_id: 0xff6c376e + member_id: 0xa213a93e + member_id: 0x85b71e23 + member_id: 0xf35c5c33 + member_id: 0xa83090b1 + member_id: 0x5e3b6037 + member_id: 0xda583590 + member_id: 0x80e885f4 + member_id: 0x7e50fd73 + member_id: 0x20931d2e + member_id: 0x1926cd5d + member_id: 0xfd487f77 + member_id: 0xc6c744ad + member_id: 0x599921b1 + member_id: 0x6fbe0621 + member_id: 0x6529283d + member_id: 0xd4bce130 + member_id: 0xa2006573 + member_id: 0x2b1788a0 + member_id: 0x47ac84ab + member_id: 0x34019984 + member_id: 0xa1532b64 + member_id: 0x083df5c3 + member_id: 0xb51b57a4 + member_id: 0x2d5bf7e0 + member_id: 0xd6e6643f + member_id: 0x1e9bc448 + member_id: 0xce1ac7ea + } +} struct_union { id: 0xabc64e21 kind: STRUCT @@ -267011,6 +267977,11 @@ function { return_type_id: 0x48b5725f parameter_id: 0x1e820193 } +function { + id: 0x1739908d + return_type_id: 0x48b5725f + parameter_id: 0x1e870478 +} function { id: 0x173c840d return_type_id: 0x48b5725f @@ -267472,6 +268443,11 @@ function { return_type_id: 0x48b5725f parameter_id: 0x2208f89a } +function { + id: 0x181d9d22 + return_type_id: 0x48b5725f + parameter_id: 0x221732c5 +} function { id: 0x181ece84 return_type_id: 0x48b5725f @@ -267800,6 +268776,12 @@ function { return_type_id: 0x48b5725f parameter_id: 0x2131312a } +function { + id: 0x18d7c9fc + return_type_id: 0x48b5725f + parameter_id: 0x2efe8065 + parameter_id: 0xfc0e1dbd +} function { id: 0x18d85efa return_type_id: 0x48b5725f @@ -268083,6 +269065,12 @@ function { parameter_id: 0x26a80a21 parameter_id: 0x13f8b706 } +function { + id: 0x19811e6e + return_type_id: 0x48b5725f + parameter_id: 0x221732c5 + parameter_id: 0x6720d32f +} function { id: 0x19832066 return_type_id: 0x48b5725f @@ -268849,6 +269837,12 @@ function { parameter_id: 0x2cba2cd4 parameter_id: 0x3eacd4c8 } +function { + id: 0x1b4d4832 + return_type_id: 0x48b5725f + parameter_id: 0x2efe8065 + parameter_id: 0x1aae6e0f +} function { id: 0x1b4fa20e return_type_id: 0x48b5725f @@ -269020,6 +270014,13 @@ function { parameter_id: 0x18bd6530 parameter_id: 0xe02e14d6 } +function { + id: 0x1ba0c9ff + return_type_id: 0x48b5725f + parameter_id: 0x2efe8065 + parameter_id: 0x221732c5 + parameter_id: 0x3d92f9c7 +} function { id: 0x1ba0fc33 return_type_id: 0x48b5725f @@ -271629,6 +272630,11 @@ function { parameter_id: 0x3ee88c45 parameter_id: 0x0258f96e } +function { + id: 0x1f2cb682 + return_type_id: 0x48b5725f + parameter_id: 0x3ed39c44 +} function { id: 0x1f3094b2 return_type_id: 0x48b5725f @@ -279205,6 +280211,12 @@ function { parameter_id: 0x315b7e01 parameter_id: 0x21003da7 } +function { + id: 0x91548c51 + return_type_id: 0x6720d32f + parameter_id: 0x3054f2d7 + parameter_id: 0x3054f2d7 +} function { id: 0x9154ff7c return_type_id: 0x6720d32f @@ -281315,6 +282327,13 @@ function { parameter_id: 0x3eed77c0 parameter_id: 0x0258f96e } +function { + id: 0x92c247e8 + return_type_id: 0x6720d32f + parameter_id: 0x3ed39c44 + parameter_id: 0x3e10b518 + parameter_id: 0x38040a6c +} function { id: 0x92c286e9 return_type_id: 0x6720d32f @@ -283450,6 +284469,11 @@ function { return_type_id: 0x6720d32f parameter_id: 0x2208f89a } +function { + id: 0x95052f9e + return_type_id: 0x6720d32f + parameter_id: 0x221732c5 +} function { id: 0x950581be return_type_id: 0x6720d32f @@ -283682,6 +284706,13 @@ function { parameter_id: 0x6720d32f parameter_id: 0x6d7f5ff6 } +function { + id: 0x955ad750 + return_type_id: 0x6720d32f + parameter_id: 0x221732c5 + parameter_id: 0x18bd6530 + parameter_id: 0xf435685e +} function { id: 0x955f7e5a return_type_id: 0x6720d32f @@ -284531,6 +285562,20 @@ function { return_type_id: 0x6720d32f parameter_id: 0x2cd31328 } +function { + id: 0x96b71ffd + return_type_id: 0x6720d32f + parameter_id: 0x2efe8065 + parameter_id: 0x221732c5 +} +function { + id: 0x96b8906b + return_type_id: 0x6720d32f + parameter_id: 0x2efe8065 + parameter_id: 0x221732c5 + parameter_id: 0x384c5795 + parameter_id: 0x6720d32f +} function { id: 0x96b9a6c2 return_type_id: 0x6720d32f @@ -284797,6 +285842,12 @@ function { parameter_id: 0x054f691a parameter_id: 0x3fe8ca70 } +function { + id: 0x96f296a4 + return_type_id: 0x6720d32f + parameter_id: 0x2efe8065 + parameter_id: 0x33756485 +} function { id: 0x96fd9031 return_type_id: 0x6720d32f @@ -287414,6 +288465,12 @@ function { parameter_id: 0x92233392 parameter_id: 0x2e0f9112 } +function { + id: 0x99e17c37 + return_type_id: 0x6720d32f + parameter_id: 0x1e870478 + parameter_id: 0xf017819f +} function { id: 0x99e350e4 return_type_id: 0x6720d32f @@ -287700,6 +288757,11 @@ function { return_type_id: 0x6720d32f parameter_id: 0x1e820193 } +function { + id: 0x9a212231 + return_type_id: 0x6720d32f + parameter_id: 0x1e870478 +} function { id: 0x9a23bd25 return_type_id: 0xcc33625b @@ -288436,6 +289498,12 @@ function { parameter_id: 0x1c2f6323 parameter_id: 0x128c0fdb } +function { + id: 0x9ac13218 + return_type_id: 0x6720d32f + parameter_id: 0x1e870478 + parameter_id: 0x38040a6c +} function { id: 0x9ac293c4 return_type_id: 0x6720d32f @@ -288574,6 +289642,12 @@ function { parameter_id: 0x1a8d1bcb parameter_id: 0x716d7970 } +function { + id: 0x9aebcfc2 + return_type_id: 0x6720d32f + parameter_id: 0x1e870478 + parameter_id: 0x32bb7cf5 +} function { id: 0x9aef3374 return_type_id: 0x6720d32f @@ -301341,6 +302415,11 @@ function { parameter_id: 0x38fdd541 parameter_id: 0x3c01aef6 } +function { + id: 0xf62197b5 + return_type_id: 0x6d7f5ff6 + parameter_id: 0x221732c5 +} function { id: 0xf6266522 return_type_id: 0x6d7f5ff6 @@ -355982,6 +357061,15 @@ elf_symbol { type_id: 0x1d825cc4 full_name: "tty_termios_encode_baud_rate" } +elf_symbol { + id: 0xacb09a3e + name: "tty_termios_hw_change" + is_defined: true + symbol_type: FUNCTION + crc: 0x6c257ac0 + type_id: 0x91548c51 + full_name: "tty_termios_hw_change" +} elf_symbol { id: 0x6eee841a name: "tty_unregister_device" @@ -358810,6 +359898,69 @@ elf_symbol { type_id: 0x1d8d80fc full_name: "usb_scuttle_anchored_urbs" } +elf_symbol { + id: 0x5fde6ab0 + name: "usb_serial_deregister_drivers" + is_defined: true + symbol_type: FUNCTION + crc: 0x6cfec83b + type_id: 0x1f2cb682 + full_name: "usb_serial_deregister_drivers" +} +elf_symbol { + id: 0xcb415220 + name: "usb_serial_generic_close" + is_defined: true + symbol_type: FUNCTION + crc: 0xf57aed21 + type_id: 0x181d9d22 + full_name: "usb_serial_generic_close" +} +elf_symbol { + id: 0x256f289d + name: "usb_serial_generic_get_icount" + is_defined: true + symbol_type: FUNCTION + crc: 0x7c014a1a + type_id: 0x962949a5 + full_name: "usb_serial_generic_get_icount" +} +elf_symbol { + id: 0xcc99e836 + name: "usb_serial_generic_open" + is_defined: true + symbol_type: FUNCTION + crc: 0x509aaf7d + type_id: 0x96b71ffd + full_name: "usb_serial_generic_open" +} +elf_symbol { + id: 0xbeec161b + name: "usb_serial_generic_throttle" + is_defined: true + symbol_type: FUNCTION + crc: 0x339df3ba + type_id: 0x1b27f18a + full_name: "usb_serial_generic_throttle" +} +elf_symbol { + id: 0x1e85ca64 + name: "usb_serial_generic_unthrottle" + is_defined: true + symbol_type: FUNCTION + crc: 0xf1a85827 + type_id: 0x1b27f18a + full_name: "usb_serial_generic_unthrottle" +} +elf_symbol { + id: 0xbc49d007 + name: "usb_serial_register_drivers" + is_defined: true + symbol_type: FUNCTION + crc: 0x2b562ef7 + type_id: 0x92c247e8 + full_name: "usb_serial_register_drivers" +} elf_symbol { id: 0x34af8a35 name: "usb_set_device_state" @@ -368433,6 +369584,7 @@ interface { symbol_id: 0x40ef0583 symbol_id: 0x66974d1b symbol_id: 0x4cc18d95 + symbol_id: 0xacb09a3e symbol_id: 0x6eee841a symbol_id: 0x0c2de3ab symbol_id: 0xaf7b86f3 @@ -368747,6 +369899,13 @@ interface { symbol_id: 0x2db97071 symbol_id: 0x88509066 symbol_id: 0x3f64aa24 + symbol_id: 0x5fde6ab0 + symbol_id: 0xcb415220 + symbol_id: 0x256f289d + symbol_id: 0xcc99e836 + symbol_id: 0xbeec161b + symbol_id: 0x1e85ca64 + symbol_id: 0xbc49d007 symbol_id: 0x34af8a35 symbol_id: 0x3cc50b4b symbol_id: 0x3e6b6dd2 diff --git a/android/abi_gki_aarch64_mtktv b/android/abi_gki_aarch64_mtktv index 20425705a20c..3b924f3c08cc 100644 --- a/android/abi_gki_aarch64_mtktv +++ b/android/abi_gki_aarch64_mtktv @@ -192,6 +192,7 @@ copy_page _copy_to_iter cpu_bit_bitmap + cpufreq_boost_enabled cpufreq_cpu_get_raw cpufreq_dbs_governor_exit cpufreq_dbs_governor_init @@ -199,6 +200,8 @@ cpufreq_dbs_governor_start cpufreq_dbs_governor_stop __cpufreq_driver_target + cpufreq_freq_attr_scaling_available_freqs + cpufreq_freq_attr_scaling_boost_freqs cpufreq_generic_attr cpufreq_generic_frequency_table_verify cpufreq_register_driver @@ -352,6 +355,7 @@ device_register device_remove_file device_rename + device_set_wakeup_capable device_set_wakeup_enable device_unregister _dev_info @@ -403,8 +407,10 @@ devm_phy_optional_get devm_pinctrl_get devm_pinctrl_put + devm_platform_ioremap_resource devm_pwm_get __devm_regmap_init_i2c + __devm_regmap_init_mmio_clk devm_regulator_bulk_get devm_regulator_get devm_regulator_register @@ -523,6 +529,7 @@ d_obtain_alias do_exit do_trace_netlink_extack + do_wait_intr down downgrade_write down_interruptible @@ -563,6 +570,7 @@ drm_atomic_helper_wait_for_fences drm_atomic_state_default_clear __drm_atomic_state_free + drm_bridge_add drm_compat_ioctl drm_connector_attach_encoder drm_connector_cleanup @@ -582,6 +590,11 @@ drm_dev_alloc drm_dev_register drm_display_mode_from_videomode + drm_dp_aux_init + drm_dp_channel_eq_ok + drm_dp_clock_recovery_ok + drm_dp_dpcd_read + drm_dp_dpcd_write drm_encoder_cleanup drm_encoder_init __drm_err @@ -994,11 +1007,14 @@ kmsg_dump_register kmsg_dump_rewind kmsg_dump_unregister + kobject_add kobject_create_and_add kobject_del + kobject_init kobject_init_and_add kobject_put kobject_uevent + kobj_sysfs_ops krealloc kstrdup kstrndup @@ -1013,13 +1029,18 @@ kstrtoul_from_user kstrtoull kthread_bind + kthread_cancel_work_sync kthread_create_on_node + kthread_flush_worker + __kthread_init_worker kthread_park kthread_parkme + kthread_queue_work kthread_should_park kthread_should_stop kthread_stop kthread_unpark + kthread_worker_fn ktime_get ktime_get_coarse_ts64 ktime_get_coarse_with_offset @@ -1208,6 +1229,7 @@ of_address_to_resource of_clk_add_provider of_clk_get + of_clk_get_by_name of_clk_get_from_provider of_clk_src_onecell_get of_count_phandle_with_args @@ -1382,6 +1404,7 @@ pm_runtime_force_resume pm_runtime_force_suspend __pm_runtime_idle + pm_runtime_no_callbacks __pm_runtime_resume pm_runtime_set_autosuspend_delay __pm_runtime_set_status @@ -1488,6 +1511,7 @@ __register_blkdev __register_chrdev register_chrdev_region + register_die_notifier register_filesystem register_inet6addr_notifier register_inetaddr_notifier @@ -1569,6 +1593,7 @@ rpmsg_create_ept rpmsg_find_device rpmsg_register_device + rpmsg_register_device_override rpmsg_release_channel rpmsg_send rpmsg_sendto @@ -1759,6 +1784,7 @@ snd_ctl_notify snd_devm_card_new snd_ecards_limit + snd_hwdep_new snd_info_get_line snd_interval_refine snd_pcm_format_big_endian @@ -1871,6 +1897,7 @@ strncat strncmp strncpy + strndup_user strnlen strnstr strpbrk @@ -2015,6 +2042,7 @@ tty_termios_baud_rate tty_termios_copy_hw tty_termios_encode_baud_rate + tty_termios_hw_change tty_unregister_device tty_unregister_driver tty_unregister_ldisc @@ -2041,6 +2069,7 @@ unregister_blkdev __unregister_chrdev unregister_chrdev_region + unregister_die_notifier unregister_filesystem unregister_inet6addr_notifier unregister_inetaddr_notifier @@ -2131,6 +2160,13 @@ usb_role_switch_get_drvdata usb_role_switch_register usb_role_switch_unregister + usb_serial_deregister_drivers + usb_serial_generic_close + usb_serial_generic_get_icount + usb_serial_generic_open + usb_serial_generic_throttle + usb_serial_generic_unthrottle + usb_serial_register_drivers usb_set_interface usb_show_dynids usb_speed_string @@ -2281,6 +2317,7 @@ wait_woken __wake_up wake_up_bit + __wake_up_locked wake_up_process wakeup_source_add wakeup_source_create From 6b883cdac21f8a6826557c45f703368652eaba15 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 4 Mar 2024 14:22:12 +0100 Subject: [PATCH 52/98] UPSTREAM: netfilter: nf_tables: mark set as dead when unbinding anonymous set with timeout commit 552705a3650bbf46a22b1adedc1b04181490fc36 upstream. While the rhashtable set gc runs asynchronously, a race allows it to collect elements from anonymous sets with timeouts while it is being released from the commit path. Mingi Cho originally reported this issue in a different path in 6.1.x with a pipapo set with low timeouts which is not possible upstream since 7395dfacfff6 ("netfilter: nf_tables: use timestamp to check for set element timeout"). Fix this by setting on the dead flag for anonymous sets to skip async gc in this case. According to 08e4c8c5919f ("netfilter: nf_tables: mark newset as dead on transaction abort"), Florian plans to accelerate abort path by releasing objects via workqueue, therefore, this sets on the dead flag for abort path too. Bug: 329205787 Cc: stable@vger.kernel.org Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Reported-by: Mingi Cho Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 406b0241d0eb598a0b330ab20ae325537d8d8163) Signed-off-by: Lee Jones Change-Id: I6170493c267e020c50a739150f8c421deb635b35 --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 520bd64144d6..2285548f0292 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5055,6 +5055,7 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + set->dead = 1; if (event) nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_KERNEL); From ea419cda5cbbff97648e1434ef532f8d36879f30 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 28 Mar 2024 13:27:36 +0100 Subject: [PATCH 53/98] UPSTREAM: netfilter: nf_tables: release batch on table validation from abort path commit a45e6889575c2067d3c0212b6bc1022891e65b91 upstream. Unlike early commit path stage which triggers a call to abort, an explicit release of the batch is required on abort, otherwise mutex is released and commit_list remains in place. Add WARN_ON_ONCE to ensure commit_list is empty from the abort path before releasing the mutex. After this patch, commit_list is always assumed to be empty before grabbing the mutex, therefore 03c1f1ef1584 ("netfilter: Cleanup nft_net->module_list from nf_tables_exit_net()") only needs to release the pending modules for registration. Bug: 332996726 Cc: stable@vger.kernel.org Fixes: c0391b6ab810 ("netfilter: nf_tables: missing validation from the abort path") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman (cherry picked from commit b0b36dcbe0f24383612e5e62bd48df5a8107f7fc) Signed-off-by: Lee Jones Change-Id: I38f9b05ac4eadd1d2b7b306cccaf0aeacb61b57a --- net/netfilter/nf_tables_api.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2285548f0292..17c34afa6779 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9680,10 +9680,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) struct nft_trans *trans, *next; LIST_HEAD(set_update_list); struct nft_trans_elem *te; + int err = 0; if (action == NFNL_ABORT_VALIDATE && nf_tables_validate(net) < 0) - return -EAGAIN; + err = -EAGAIN; list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { @@ -9859,7 +9860,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) else nf_tables_module_autoload_cleanup(net); - return 0; + return err; } static int nf_tables_abort(struct net *net, struct sk_buff *skb, @@ -9873,6 +9874,8 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, ret = __nf_tables_abort(net, action); nft_gc_seq_end(nft_net, gc_seq); + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + mutex_unlock(&nft_net->commit_mutex); return ret; @@ -10674,9 +10677,10 @@ static void __net_exit nf_tables_exit_net(struct net *net) gc_seq = nft_gc_seq_begin(nft_net); - if (!list_empty(&nft_net->commit_list) || - !list_empty(&nft_net->module_list)) - __nf_tables_abort(net, NFNL_ABORT_NONE); + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + + if (!list_empty(&nft_net->module_list)) + nf_tables_module_autoload_cleanup(net); __nft_release_tables(net); From ceb8c595f8072bc2af914930ac659fb3c6f63e12 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 28 Mar 2024 14:23:55 +0100 Subject: [PATCH 54/98] UPSTREAM: netfilter: nf_tables: release mutex after nft_gc_seq_end from abort path commit 0d459e2ffb541841714839e8228b845458ed3b27 upstream. The commit mutex should not be released during the critical section between nft_gc_seq_begin() and nft_gc_seq_end(), otherwise, async GC worker could collect expired objects and get the released commit lock within the same GC sequence. nf_tables_module_autoload() temporarily releases the mutex to load module dependencies, then it goes back to replay the transaction again. Move it at the end of the abort phase after nft_gc_seq_end() is called. Bug: 332996726 Cc: stable@vger.kernel.org Fixes: 720344340fb9 ("netfilter: nf_tables: GC transaction race with abort path") Reported-by: Kuan-Ting Chen Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 8038ee3c3e5b59bcd78467686db5270c68544e30) Signed-off-by: Lee Jones Change-Id: I637389421d8eca5ab59a41bd1a4b70432440034c --- net/netfilter/nf_tables_api.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 17c34afa6779..165524d06995 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9855,11 +9855,6 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nf_tables_abort_release(trans); } - if (action == NFNL_ABORT_AUTOLOAD) - nf_tables_module_autoload(net); - else - nf_tables_module_autoload_cleanup(net); - return err; } @@ -9876,6 +9871,14 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + /* module autoload needs to happen after GC sequence update because it + * temporarily releases and grabs mutex again. + */ + if (action == NFNL_ABORT_AUTOLOAD) + nf_tables_module_autoload(net); + else + nf_tables_module_autoload_cleanup(net); + mutex_unlock(&nft_net->commit_mutex); return ret; From 9274c308d8469e26a5f3fb8b598000edcff6b135 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Fri, 5 Apr 2024 11:00:40 -0700 Subject: [PATCH 55/98] ANDROID: 16K: Introduce /sys/kernel/mm/pgsize_miration/enabled Migrating from 4kB to 16kB page-size in Android requires first making the platform page-agnostic, which involves increasing Android-ELFs' max-page-size (p_align) from 4kB to 16kB. Increasing the ELF max-page-size was found to cause compatibility issues in apps that use obfuscation or depend on the ELF segments being mapped based on 4kB-alignment. Working around these compatibility issues involves both kernel and userspace (dynamic linker) changes. Introduce a knob for userspace (dynamic linker) to determine whether the kernel supports the mitigations needed for page-size migration compatibility. The knob also allows for userspace to turn on or off these mitigations by writing 1 or 0 to /sys/kernel/mm/pgsize_miration/enabled: echo 1 > /sys/kernel/mm//pgsize_miration/enabled # Enable echo 0 > /sys/kernel/mm//pgsize_miration/enabled # Disable Bug: 330117029 Bug: 327600007 Bug: 330767927 Bug: 328266487 Bug: 329803029 Change-Id: I9ac1d15d397b8226b27827ecffa30502da91e10e Signed-off-by: Kalesh Singh --- mm/Makefile | 2 +- mm/pgsize_migration.c | 105 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 mm/pgsize_migration.c diff --git a/mm/Makefile b/mm/Makefile index 8a9954121e4d..a17ebb357dcb 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -52,7 +52,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ mm_init.o percpu.o slab_common.o \ compaction.o vmacache.o \ interval_tree.o list_lru.o workingset.o \ - debug.o gup.o mmap_lock.o $(mmu-y) + debug.o gup.o mmap_lock.o pgsize_migration.o $(mmu-y) # Give 'page_alloc' its own module-parameter namespace page-alloc-y := page_alloc.o diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c new file mode 100644 index 000000000000..e840cda99e22 --- /dev/null +++ b/mm/pgsize_migration.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Page Size Migration + * + * This file contains the core logic of mitigations to ensure + * app compatibility during the transition from 4kB to 16kB + * page size in Android. + * + * Copyright (c) 2024, Google LLC. + * Author: Kalesh Singh + */ + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_64BIT +#if PAGE_SIZE == SZ_4K +DEFINE_STATIC_KEY_TRUE(pgsize_migration_enabled); + +#define is_pgsize_migration_enabled() (static_branch_likely(&pgsize_migration_enabled)) +#else /* PAGE_SIZE != SZ_4K */ +DEFINE_STATIC_KEY_FALSE(pgsize_migration_enabled); + +#define is_pgsize_migration_enabled() (static_branch_unlikely(&pgsize_migration_enabled)) +#endif /* PAGE_SIZE == SZ_4K */ + +static ssize_t show_pgsize_migration_enabled(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + if (is_pgsize_migration_enabled()) + return sprintf(buf, "%d\n", 1); + else + return sprintf(buf, "%d\n", 0); +} + +static ssize_t store_pgsize_migration_enabled(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long val; + + /* Migration is only applicable to 4kB kernels */ + if (PAGE_SIZE != SZ_4K) + return n; + + if (kstrtoul(buf, 10, &val)) + return -EINVAL; + + if (val > 1) + return -EINVAL; + + if (val == 1) + static_branch_enable(&pgsize_migration_enabled); + else if (val == 0) + static_branch_disable(&pgsize_migration_enabled); + + return n; +} + +static struct kobj_attribute pgsize_migration_enabled_attr = __ATTR( + enabled, + 0644, + show_pgsize_migration_enabled, + store_pgsize_migration_enabled +); + +static struct attribute *pgsize_migration_attrs[] = { + &pgsize_migration_enabled_attr.attr, + NULL +}; + +static struct attribute_group pgsize_migration_attr_group = { + .name = "pgsize_migration", + .attrs = pgsize_migration_attrs, +}; + +/** + * What: /sys/kernel/mm/pgsize_migration/enabled + * Date: April 2024 + * KernelVersion: v5.4+ (GKI kernels) + * Contact: Kalesh Singh + * Description: /sys/kernel/mm/pgsize_migration/enabled + * allows for userspace to turn on or off page size + * migration mitigations necessary for app compatibility + * during Android's transition from 4kB to 16kB page size. + * Such mitigations include preserving /proc//[s]maps + * output as if there was no segment extension by the + * dynamic loader; and preventing fault around in the padding + * sections of ELF LOAD segment mappings. + * Users: Bionic's dynamic linker + */ +static int __init init_pgsize_migration(void) +{ + if (sysfs_create_group(mm_kobj, &pgsize_migration_attr_group)) + pr_err("pgsize_migration: failed to create sysfs group\n"); + + return 0; +}; +late_initcall(init_pgsize_migration); +#endif /* CONFIG_64BIT */ From 38cccb91549e0daf566f955e88d6e92f883c484c Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Thu, 4 Apr 2024 22:21:32 -0700 Subject: [PATCH 56/98] ANDROID: 16K: Introduce ELF padding representation for VMAs The dynamic linker may extend ELF LOAD segment mappings to be contiguous in memory when loading a 16kB compatible ELF on a 4kB page-size system. This is done to reduce the use of unreclaimable VMA slab memory for the otherwise necessary "gap" VMAs. The extended portion of the mapping (VMA) can be viewed as "padding", meaning that the mapping in that range corresponds to an area of the file that does not contain contents of the respective segments (maybe zero's depending on how the ELF is built). For some compatibility mitigations, the region of a VMA corresponding to these padding sections need to be known. In order to represent such regions without adding addtional overhead or breaking ABI, some upper bits of vm_flags are used. Add the VMA padding pages representation and the necessary APIs to manipulate it. Bug: 330117029 Bug: 327600007 Bug: 330767927 Bug: 328266487 Bug: 329803029 Change-Id: Ieb9fa98e30ec9b0bec62256624f14e3ed6062a75 Signed-off-by: Kalesh Singh --- include/linux/pgsize_migration.h | 64 ++++++++++++++++++++++++++++++++ mm/pgsize_migration.c | 22 ++++++++++- 2 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 include/linux/pgsize_migration.h diff --git a/include/linux/pgsize_migration.h b/include/linux/pgsize_migration.h new file mode 100644 index 000000000000..60f719d44107 --- /dev/null +++ b/include/linux/pgsize_migration.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PAGE_SIZE_MIGRATION_H +#define _LINUX_PAGE_SIZE_MIGRATION_H + +/* + * Page Size Migration + * + * Copyright (c) 2024, Google LLC. + * Author: Kalesh Singh + * + * This file contains the APIs for mitigations to ensure + * app compatibility during the transition from 4kB to 16kB + * page size in Android. + */ + +#include +#include + +/* + * vm_flags representation of VMA padding pages. + * + * This allows the kernel to identify the portion of an ELF LOAD segment VMA + * that is padding. + * + * 4 high bits of vm_flags [63,60] are used to represent ELF segment padding + * up to 60kB, which is sufficient for ELFs of both 16kB and 64kB segment + * alignment (p_align). + * + * The representation is illustrated below. + * + * 63 62 61 60 + * _________ _________ _________ _________ + * | Bit 3 | Bit 2 | Bit 1 | Bit 0 | + * | of 4kB | of 4kB | of 4kB | of 4kB | + * | chunks | chunks | chunks | chunks | + * |_________|_________|_________|_________| + */ + +#define VM_PAD_WIDTH 4 +#define VM_PAD_SHIFT (BITS_PER_LONG - VM_PAD_WIDTH) +#define VM_TOTAL_PAD_PAGES ((1ULL << VM_PAD_WIDTH) - 1) + +#if PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT) +extern void vma_set_pad_pages(struct vm_area_struct *vma, + unsigned long nr_pages); + +extern unsigned long vma_pad_pages(struct vm_area_struct *vma); +#else /* PAGE_SIZE != SZ_4K || !defined(CONFIG_64BIT) */ +static inline void vma_set_pad_pages(struct vm_area_struct *vma, + unsigned long nr_pages) +{ +} + +static inline unsigned long vma_pad_pages(struct vm_area_struct *vma) +{ + return 0; +} +#endif /* PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT) */ + +static inline unsigned long vma_data_pages(struct vm_area_struct *vma) +{ + return vma_pages(vma) - vma_pad_pages(vma); +} +#endif /* _LINUX_PAGE_SIZE_MIGRATION_H */ diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c index e840cda99e22..dda4ec802332 100644 --- a/mm/pgsize_migration.c +++ b/mm/pgsize_migration.c @@ -10,11 +10,12 @@ * Author: Kalesh Singh */ +#include + #include #include #include #include -#include #include #ifdef CONFIG_64BIT @@ -102,4 +103,23 @@ static int __init init_pgsize_migration(void) return 0; }; late_initcall(init_pgsize_migration); + +#if PAGE_SIZE == SZ_4K +void vma_set_pad_pages(struct vm_area_struct *vma, + unsigned long nr_pages) +{ + if (!is_pgsize_migration_enabled()) + return; + + vma->vm_flags |= (nr_pages << VM_PAD_SHIFT); +} + +unsigned long vma_pad_pages(struct vm_area_struct *vma) +{ + if (!is_pgsize_migration_enabled()) + return 0; + + return vma->vm_flags >> VM_PAD_SHIFT; +} +#endif /* PAGE_SIZE == SZ_4K */ #endif /* CONFIG_64BIT */ From e7bff50b229b4658f2ab7e3649635da54d9fcdec Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Thu, 4 Apr 2024 22:21:32 -0700 Subject: [PATCH 57/98] ANDROID: 16K: Use MADV_DONTNEED to save VMA padding pages. When performing LOAD segment extension, the dynamic linker knows what portion of the VMA is padding. In order for the kernel to implement mitigations that ensure app compatibility, the extent of the padding must be made available to the kernel. To achieve this, reuse MADV_DONTNEED on single VMAs to hint the padding range to the kernel. This information is then stored in vm_flag bits. This allows userspace (dynamic linker) to set the padding pages on the VMA without a need for new out-of-tree UAPI. Bug: 330117029 Bug: 327600007 Bug: 330767927 Bug: 328266487 Bug: 329803029 Change-Id: I3421de32ab38ad3cb0fbce73ecbd8f7314287cde Signed-off-by: Kalesh Singh --- include/linux/pgsize_migration.h | 8 +++++ mm/madvise.c | 3 ++ mm/pgsize_migration.c | 56 ++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+) diff --git a/include/linux/pgsize_migration.h b/include/linux/pgsize_migration.h index 60f719d44107..fd1e74ea4283 100644 --- a/include/linux/pgsize_migration.h +++ b/include/linux/pgsize_migration.h @@ -45,6 +45,9 @@ extern void vma_set_pad_pages(struct vm_area_struct *vma, unsigned long nr_pages); extern unsigned long vma_pad_pages(struct vm_area_struct *vma); + +extern void madvise_vma_pad_pages(struct vm_area_struct *vma, + unsigned long start, unsigned long end); #else /* PAGE_SIZE != SZ_4K || !defined(CONFIG_64BIT) */ static inline void vma_set_pad_pages(struct vm_area_struct *vma, unsigned long nr_pages) @@ -55,6 +58,11 @@ static inline unsigned long vma_pad_pages(struct vm_area_struct *vma) { return 0; } + +static inline void madvise_vma_pad_pages(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ +} #endif /* PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT) */ static inline unsigned long vma_data_pages(struct vm_area_struct *vma) diff --git a/mm/madvise.c b/mm/madvise.c index 703b68381241..b365c15112b6 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -788,6 +789,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, static long madvise_dontneed_single_vma(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + madvise_vma_pad_pages(vma, start, end); + zap_page_range(vma, start, end - start); return 0; } diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c index dda4ec802332..59d4fbfb96da 100644 --- a/mm/pgsize_migration.c +++ b/mm/pgsize_migration.c @@ -121,5 +121,61 @@ unsigned long vma_pad_pages(struct vm_area_struct *vma) return vma->vm_flags >> VM_PAD_SHIFT; } + +static __always_inline bool str_has_suffix(const char *str, const char *suffix) +{ + size_t str_len = strlen(str); + size_t suffix_len = strlen(suffix); + + if (str_len < suffix_len) + return false; + + return !strncmp(str + str_len - suffix_len, suffix, suffix_len); +} + +/* + * Saves the number of padding pages for an ELF segment mapping + * in vm_flags. + * + * The number of padding pages is deduced from the madvise DONTNEED range [start, end) + * if the following conditions are met: + * 1) The range is enclosed by a single VMA + * 2) The range ends at the end address of the VMA + * 3) The range starts at an address greater than the start address of the VMA + * 4) The number of the pages in the range does not exceed VM_TOTAL_PAD_PAGES. + * 5) The VMA is a regular file backed VMA (filemap_fault) + * 6) The file backing the VMA is a shared library (*.so) + */ +void madvise_vma_pad_pages(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + unsigned long nr_pad_pages; + + if (!is_pgsize_migration_enabled()) + return; + + /* Only handle this for file backed VMAs */ + if (!vma->vm_file || !vma->vm_ops || vma->vm_ops->fault != filemap_fault) + return; + + + /* Limit this to only shared libraries (*.so) */ + if (!str_has_suffix(vma->vm_file->f_path.dentry->d_name.name, ".so")) + return; + + /* + * If the madvise range is it at the end of the file save the number of + * pages in vm_flags (only need 4 bits are needed for 16kB aligned ELFs). + */ + if (start <= vma->vm_start || end != vma->vm_end) + return; + + nr_pad_pages = (end - start) >> PAGE_SHIFT; + + if (!nr_pad_pages || nr_pad_pages > VM_TOTAL_PAD_PAGES) + return; + + vma_set_pad_pages(vma, nr_pad_pages); +} #endif /* PAGE_SIZE == SZ_4K */ #endif /* CONFIG_64BIT */ From 37ea0e848513390d7ccda788702f19bc23a52e06 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Thu, 4 Apr 2024 22:37:48 -0700 Subject: [PATCH 58/98] ANDROID: 16K: Exclude ELF padding for fault around range Userspace apps often analyze memory consumption by the use of mm rss_stat counters -- via the kmem/rss_stat trace event or from /proc//statm. rss_stat counters are only updated when the PTEs are updated. What this means is that pages can be present in the page cache from readahead but not visible to userspace (not attributed to the app) as there is no corresponding VMA (PTEs) for the respective page cache pages. A side effect of the loader now extending ELF LOAD segments to be contiguously mapped in the virtual address space, means that the VMA is extended to cover the padding pages. When filesystems, such as f2fs and ext4, that implement vm_ops->map_pages() attempt to perform a do_fault_around() the extent of the fault around is restricted by the area of the enclosing VMA. Since the loader extends LOAD segment VMAs to be contiguously mapped, the extent of the fault around is also increased. The result of which, is that the PTEs corresponding to the padding pages are updated and reflected in the rss_stat counters. It is not common that userspace application developers be aware of this nuance in the kernel's memory accounting. To avoid apparent regressions in memory usage to userspace, restrict the fault around range to only valid data pages (i.e. exclude the padding pages at the end of the VMA). Bug: 330117029 Bug: 327600007 Bug: 330767927 Bug: 328266487 Bug: 329803029 Change-Id: I2c7a39ec1b040be2b9fb47801f95042f5dbf869d Signed-off-by: Kalesh Singh --- mm/memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 37640dc59e60..4ee6c484e85d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include @@ -4461,7 +4462,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf) end_pgoff = start_pgoff - ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + PTRS_PER_PTE - 1; - end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1, + end_pgoff = min3(end_pgoff, vma_data_pages(vmf->vma) + vmf->vma->vm_pgoff - 1, start_pgoff + nr_pages - 1); if (!(vmf->flags & FAULT_FLAG_SPECULATIVE) && From 084d22016ccabe60338ea69b0a1c4c264551bd4c Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Thu, 4 Apr 2024 23:02:30 -0700 Subject: [PATCH 59/98] ANDROID: 16K: Separate padding from ELF LOAD segment mappings In has been found that some in-field apps depend on the output of /proc/*/maps to determine the address ranges of other operations. With the extension of LOAD segments VMAs to be contiguous in memory, the apps may perform operations on an area that is not backed by the underlying file, which results in a SIGBUS. Other apps have crashed with yet unindentified reasons. To avoid breaking in-field apps, maintain the output of /proc/*/[s]maps with PROT_NONE VMAs for the padding pages of LOAD segmetns instead of showing the segment extensions. NOTE: This does not allocate actual backing VMAs for the shown PROT_NONE mappings. This approach maintains 2 possible assumptions that userspace (apps) could be depending on: 1) That LOAD segment mappings are "contiguous" (not speparated by unrelated mappings) in memory. 2) That no virtual address space is available between mappings of consecutive LOAD segments for the same ELF. For example the output of /proc/*/[s]maps before and after this change is shown below. Segments maintain PROT_NONE gaps ("[page size compat]") for app compatiblity but these are not backed by actual slab VMA memory. Maps Before: 7fb03604d000-7fb036051000 r--p 00000000 fe:09 21935719 /system/lib64/libnetd_client.so 7fb036051000-7fb036055000 r-xp 00004000 fe:09 21935719 /system/lib64/libnetd_client.so 7fb036055000-7fb036059000 r--p 00008000 fe:09 21935719 /system/lib64/libnetd_client.so 7fb036059000-7fb03605a000 rw-p 0000c000 fe:09 21935719 /system/lib64/libnetd_client.so Maps After: 7fc707390000-7fc707393000 r--p 00000000 fe:09 21935719 /system/lib64/libnetd_client.so 7fc707393000-7fc707394000 ---p 00000000 00:00 0 [page size compat] 7fc707394000-7fc707398000 r-xp 00004000 fe:09 21935719 /system/lib64/libnetd_client.so 7fc707398000-7fc707399000 r--p 00008000 fe:09 21935719 /system/lib64/libnetd_client.so 7fc707399000-7fc70739c000 ---p 00000000 00:00 0 [page size compat] 7fc70739c000-7fc70739d000 rw-p 0000c000 fe:09 21935719 /system/lib64/libnetd_client.so Smaps Before: 7fb03604d000-7fb036051000 r--p 00000000 fe:09 21935719 /system/lib64/libnetd_client.so Size: 16 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Rss: 16 kB Pss: 0 kB Pss_Dirty: 0 kB Shared_Clean: 16 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 0 kB Referenced: 16 kB Anonymous: 0 kB LazyFree: 0 kB AnonHugePages: 0 kB ShmemPmdMapped: 0 kB FilePmdMapped: 0 kB Shared_Hugetlb: 0 kB Private_Hugetlb: 0 kB Swap: 0 kB SwapPss: 0 kB Locked: 0 kB THPeligible: 0 VmFlags: rd mr mw me 7fb036051000-7fb036055000 r-xp 00004000 fe:09 21935719 /system/lib64/libnetd_client.so Size: 16 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Rss: 16 kB Pss: 0 kB Pss_Dirty: 0 kB Shared_Clean: 16 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 0 kB Referenced: 16 kB Anonymous: 0 kB LazyFree: 0 kB AnonHugePages: 0 kB ShmemPmdMapped: 0 kB FilePmdMapped: 0 kB Shared_Hugetlb: 0 kB Private_Hugetlb: 0 kB Swap: 0 kB SwapPss: 0 kB Locked: 0 kB THPeligible: 0 VmFlags: rd ex mr mw me 7fb036055000-7fb036059000 r--p 00008000 fe:09 21935719 /system/lib64/libnetd_client.so Size: 16 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Rss: 4 kB Pss: 4 kB Pss_Dirty: 4 kB Shared_Clean: 0 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 4 kB Referenced: 4 kB Anonymous: 4 kB LazyFree: 0 kB AnonHugePages: 0 kB ShmemPmdMapped: 0 kB FilePmdMapped: 0 kB Shared_Hugetlb: 0 kB Private_Hugetlb: 0 kB Swap: 0 kB SwapPss: 0 kB Locked: 0 kB THPeligible: 0 VmFlags: rd mr mw me ac 7fb036059000-7fb03605a000 rw-p 0000c000 fe:09 21935719 /system/lib64/libnetd_client.so Size: 4 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Rss: 4 kB Pss: 4 kB Pss_Dirty: 4 kB Shared_Clean: 0 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 4 kB Referenced: 4 kB Anonymous: 4 kB LazyFree: 0 kB AnonHugePages: 0 kB ShmemPmdMapped: 0 kB FilePmdMapped: 0 kB Shared_Hugetlb: 0 kB Private_Hugetlb: 0 kB Swap: 0 kB SwapPss: 0 kB Locked: 0 kB THPeligible: 0 VmFlags: rd wr mr mw me ac Smaps After: 7fc707390000-7fc707393000 r--p 00000000 fe:09 21935719 /system/lib64/libnetd_client.so Size: 12 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Rss: 12 kB Pss: 0 kB Shared_Clean: 12 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 0 kB Referenced: 12 kB Anonymous: 0 kB LazyFree: 0 kB AnonHugePages: 0 kB ShmemPmdMapped: 0 kB FilePmdMapped: 0 kB Shared_Hugetlb: 0 kB Private_Hugetlb: 0 kB Swap: 0 kB SwapPss: 0 kB Locked: 0 kB THPeligible: 0 VmFlags: rd mr mw me ?? 7fc707393000-7fc707394000 ---p 00000000 00:00 0 [page size compat] Size: 4 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Rss: 0 kB Pss: 0 kB Shared_Clean: 0 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 0 kB Referenced: 0 kB Anonymous: 0 kB LazyFree: 0 kB AnonHugePages: 0 kB ShmemPmdMapped: 0 kB FilePmdMapped: 0 kB Shared_Hugetlb: 0 kB Private_Hugetlb: 0 kB Swap: 0 kB SwapPss: 0 kB Locked: 0 kB THPeligible: 0 VmFlags: mr mw me 7fc707394000-7fc707398000 r-xp 00004000 fe:09 21935719 /system/lib64/libnetd_client.so Size: 16 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Rss: 16 kB Pss: 0 kB Shared_Clean: 16 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 0 kB Referenced: 16 kB Anonymous: 0 kB LazyFree: 0 kB AnonHugePages: 0 kB ShmemPmdMapped: 0 kB FilePmdMapped: 0 kB Shared_Hugetlb: 0 kB Private_Hugetlb: 0 kB Swap: 0 kB SwapPss: 0 kB Locked: 0 kB THPeligible: 0 VmFlags: rd ex mr mw me 7fc707398000-7fc707399000 r--p 00008000 fe:09 21935719 /system/lib64/libnetd_client.so Size: 4 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Rss: 4 kB Pss: 4 kB Shared_Clean: 0 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 4 kB Referenced: 4 kB Anonymous: 4 kB LazyFree: 0 kB AnonHugePages: 0 kB ShmemPmdMapped: 0 kB FilePmdMapped: 0 kB Shared_Hugetlb: 0 kB Private_Hugetlb: 0 kB Swap: 0 kB SwapPss: 0 kB Locked: 0 kB THPeligible: 0 VmFlags: rd mr mw me ac ?? ?? 7fc707399000-7fc70739c000 ---p 00000000 00:00 0 [page size compat] Size: 12 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Rss: 0 kB Pss: 0 kB Shared_Clean: 0 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 0 kB Referenced: 0 kB Anonymous: 0 kB LazyFree: 0 kB AnonHugePages: 0 kB ShmemPmdMapped: 0 kB FilePmdMapped: 0 kB Shared_Hugetlb: 0 kB Private_Hugetlb: 0 kB Swap: 0 kB SwapPss: 0 kB Locked: 0 kB THPeligible: 0 VmFlags: mr mw me ac 7fc70739c000-7fc70739d000 rw-p 0000c000 fe:09 21935719 /system/lib64/libnetd_client.so Size: 4 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Rss: 4 kB Pss: 4 kB Shared_Clean: 0 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 4 kB Referenced: 4 kB Anonymous: 4 kB LazyFree: 0 kB AnonHugePages: 0 kB ShmemPmdMapped: 0 kB FilePmdMapped: 0 kB Shared_Hugetlb: 0 kB Private_Hugetlb: 0 kB Swap: 0 kB SwapPss: 0 kB Locked: 0 kB THPeligible: 0 VmFlags: rd wr mr mw me ac Bug: 330117029 Bug: 327600007 Bug: 330767927 Bug: 328266487 Bug: 329803029 Change-Id: I12bf2c106fafc74a500d79155b81dde5db42661e Signed-off-by: Kalesh Singh --- fs/proc/task_mmu.c | 14 ++++- include/linux/pgsize_migration.h | 29 ++++++++++ mm/pgsize_migration.c | 92 ++++++++++++++++++++++++++++++++ 3 files changed, 133 insertions(+), 2 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 16a537a60c80..095aa7c80ee1 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -344,7 +345,13 @@ done: static int show_map(struct seq_file *m, void *v) { - show_map_vma(m, v); + struct vm_area_struct *pad_vma = get_pad_vma(v); + struct vm_area_struct *vma = get_data_vma(v); + + show_map_vma(m, vma); + + show_map_pad_vma(vma, pad_vma, m, show_map_vma); + return 0; } @@ -838,7 +845,8 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss, static int show_smap(struct seq_file *m, void *v) { - struct vm_area_struct *vma = v; + struct vm_area_struct *pad_vma = get_pad_vma(v); + struct vm_area_struct *vma = get_data_vma(v); struct mem_size_stats mss; memset(&mss, 0, sizeof(mss)); @@ -861,6 +869,8 @@ static int show_smap(struct seq_file *m, void *v) seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma)); show_smap_vma_flags(m, vma); + show_map_pad_vma(vma, pad_vma, m, (show_pad_vma_fn)show_smap); + return 0; } diff --git a/include/linux/pgsize_migration.h b/include/linux/pgsize_migration.h index fd1e74ea4283..7ab0f288bcf9 100644 --- a/include/linux/pgsize_migration.h +++ b/include/linux/pgsize_migration.h @@ -14,6 +14,7 @@ */ #include +#include #include /* @@ -39,6 +40,10 @@ #define VM_PAD_WIDTH 4 #define VM_PAD_SHIFT (BITS_PER_LONG - VM_PAD_WIDTH) #define VM_TOTAL_PAD_PAGES ((1ULL << VM_PAD_WIDTH) - 1) +#define VM_PAD_MASK (VM_TOTAL_PAD_PAGES << VM_PAD_SHIFT) +#define VMA_PAD_START(vma) (vma->vm_end - (vma_pad_pages(vma) << PAGE_SHIFT)) + +typedef void (*show_pad_vma_fn)(struct seq_file *m, struct vm_area_struct *vma); #if PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT) extern void vma_set_pad_pages(struct vm_area_struct *vma, @@ -48,6 +53,14 @@ extern unsigned long vma_pad_pages(struct vm_area_struct *vma); extern void madvise_vma_pad_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end); + +extern struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma); + +extern struct vm_area_struct *get_data_vma(struct vm_area_struct *vma); + +extern void show_map_pad_vma(struct vm_area_struct *vma, + struct vm_area_struct *pad, + struct seq_file *m, show_pad_vma_fn func); #else /* PAGE_SIZE != SZ_4K || !defined(CONFIG_64BIT) */ static inline void vma_set_pad_pages(struct vm_area_struct *vma, unsigned long nr_pages) @@ -63,6 +76,22 @@ static inline void madvise_vma_pad_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end) { } + +static inline struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma) +{ + return NULL; +} + +static inline struct vm_area_struct *get_data_vma(struct vm_area_struct *vma) +{ + return vma; +} + +static inline void show_map_pad_vma(struct vm_area_struct *vma, + struct vm_area_struct *pad, + struct seq_file *m, show_pad_vma_fn func) +{ +} #endif /* PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT) */ static inline unsigned long vma_data_pages(struct vm_area_struct *vma) diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c index 59d4fbfb96da..b7264f49a9cb 100644 --- a/mm/pgsize_migration.c +++ b/mm/pgsize_migration.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #ifdef CONFIG_64BIT @@ -177,5 +178,96 @@ void madvise_vma_pad_pages(struct vm_area_struct *vma, vma_set_pad_pages(vma, nr_pad_pages); } + +static const char *pad_vma_name(struct vm_area_struct *vma) +{ + return "[page size compat]"; +} + +static const struct vm_operations_struct pad_vma_ops = { + .name = pad_vma_name, +}; + +/* + * Returns a new VMA representing the padding in @vma, if no padding + * in @vma returns NULL. + */ +struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma) +{ + struct vm_area_struct *pad; + + if (!is_pgsize_migration_enabled() || !(vma->vm_flags & VM_PAD_MASK)) + return NULL; + + pad = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL); + + *pad = *vma; + + /* Remove file */ + pad->vm_file = NULL; + + /* Add vm_ops->name */ + pad->vm_ops = &pad_vma_ops; + + /* Adjust the start to begin at the start of the padding section */ + pad->vm_start = VMA_PAD_START(pad); + + /* Make the pad vma PROT_NONE */ + pad->vm_flags = pad->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); + + /* Remove padding bits */ + pad->vm_flags = pad->vm_flags & ~VM_PAD_MASK; + + return pad; +} + +/* + * Returns a new VMA exclusing the padding from @vma; if no padding in + * @vma returns @vma. + */ +struct vm_area_struct *get_data_vma(struct vm_area_struct *vma) +{ + struct vm_area_struct *data; + + if (!is_pgsize_migration_enabled() || !(vma->vm_flags & VM_PAD_MASK)) + return vma; + + data = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL); + + *data = *vma; + + /* Adjust the end to the start of the padding section */ + data->vm_end = VMA_PAD_START(data); + + return data; +} + +/* + * Calls the show_pad_vma_fn on the @pad VMA, and frees the copies of @vma + * and @pad. + */ +void show_map_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *pad, + struct seq_file *m, show_pad_vma_fn func) +{ + if (!pad) + return; + + /* + * This cannot happen. If @pad vma was allocated the corresponding + * @vma should have the VM_PAD_MASK bit(s) set. + */ + BUG_ON(!(vma->vm_flags & VM_PAD_MASK)); + + /* + * This cannot happen. @pad is a section of the original VMA. + * Therefore @vma cannot be null if @pad is not null. + */ + BUG_ON(!vma); + + func(m, pad); + + kfree(pad); + kfree(vma); +} #endif /* PAGE_SIZE == SZ_4K */ #endif /* CONFIG_64BIT */ From 264477e0d844ee272cbf7d1bc277705b2f7232ad Mon Sep 17 00:00:00 2001 From: Varad Gautam Date: Fri, 12 Apr 2024 14:32:14 +0000 Subject: [PATCH 60/98] ANDROID: Update the ABI symbol list Adding the following symbols: - iov_iter_kvec - seq_read_iter 1 function symbol(s) added 'ssize_t seq_read_iter(struct kiocb*, struct iov_iter*)' Bug: 332885803 Change-Id: I4068f8a28395deee9a7bcd1cccf786cdd169f0c1 Signed-off-by: Varad Gautam --- android/abi_gki_aarch64.stg | 10 ++++++++++ android/abi_gki_aarch64_pixel | 2 ++ 2 files changed, 12 insertions(+) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index a1c1af173972..97fca3ab154c 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -350351,6 +350351,15 @@ elf_symbol { type_id: 0x12e4741f full_name: "seq_read" } +elf_symbol { + id: 0x8ad22c43 + name: "seq_read_iter" + is_defined: true + symbol_type: FUNCTION + crc: 0x3bf0fbdb + type_id: 0x16637235 + full_name: "seq_read_iter" +} elf_symbol { id: 0x91763ae6 name: "seq_release" @@ -368839,6 +368848,7 @@ interface { symbol_id: 0x25bebf3b symbol_id: 0x59b4ca07 symbol_id: 0xba8007cd + symbol_id: 0x8ad22c43 symbol_id: 0x91763ae6 symbol_id: 0x2cc9ecc6 symbol_id: 0x56c495a4 diff --git a/android/abi_gki_aarch64_pixel b/android/abi_gki_aarch64_pixel index beed11452de5..96052847087d 100644 --- a/android/abi_gki_aarch64_pixel +++ b/android/abi_gki_aarch64_pixel @@ -1106,6 +1106,7 @@ io_schedule_timeout iounmap iov_iter_bvec + iov_iter_kvec ip_send_check iput __irq_alloc_descs @@ -1837,6 +1838,7 @@ seq_putc seq_puts seq_read + seq_read_iter seq_release seq_release_private seq_write From 25ebc0917814352a3a25edb0b1c0a1725d4cae55 Mon Sep 17 00:00:00 2001 From: Oven Date: Wed, 10 Apr 2024 19:23:08 +0800 Subject: [PATCH 61/98] ANDROID: mm: fix incorrect unlock mmap_lock for speculative swap fault In a20b68c396127cd6387f37845c5bc05e44e2fd0e, SPF is supported for swap fault. But in __lock_page_or_retry(), it will unlock mmap_lock unconditionally. That will cause unpaired lock release in handling SPF. Bug: 333508035 Change-Id: Ia1da66c85e0d58883cf518f10cd33fc5cad387b8 Signed-off-by: Oven (cherry picked from commit 63070883166ae63620a87d958319deba86f236ae) --- mm/filemap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index c659d7bf7a81..7b17a22943aa 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1710,7 +1710,8 @@ __sched int __lock_page_or_retry(struct page *page, struct mm_struct *mm, if (flags & FAULT_FLAG_RETRY_NOWAIT) return 0; - mmap_read_unlock(mm); + if (!(flags & FAULT_FLAG_SPECULATIVE)) + mmap_read_unlock(mm); if (flags & FAULT_FLAG_KILLABLE) wait_on_page_locked_killable(page); else @@ -1722,7 +1723,8 @@ __sched int __lock_page_or_retry(struct page *page, struct mm_struct *mm, ret = __lock_page_killable(page); if (ret) { - mmap_read_unlock(mm); + if (!(flags & FAULT_FLAG_SPECULATIVE)) + mmap_read_unlock(mm); return 0; } } else { From aa07d6b28d2ce69654e8da4e73fa7868dc7ce604 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Apr 2024 09:43:13 -0700 Subject: [PATCH 62/98] ANDROID: scsi: ufs: Unexport ufshcd_mcq_poll_cqe_nolock() Unexport this function because it is not used outside the UFSHCI core driver and because it is not possible to use this function from outside the UFSHCI core driver without triggering a race condition. Bug: 312786487 Bug: 326329246 Bug: 333069246 Bug: 333317508 Change-Id: I1bb504b0310c3618db94e9401ff4f7e13633d6a0 Signed-off-by: Bart Van Assche --- drivers/ufs/core/ufs-mcq.c | 1 - include/ufs/ufshcd.h | 2 -- 2 files changed, 3 deletions(-) diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 202ff71e1b58..cf06702e1b8f 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -294,7 +294,6 @@ unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba, return completed_reqs; } -EXPORT_SYMBOL_GPL(ufshcd_mcq_poll_cqe_nolock); unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba, struct ufs_hw_queue *hwq) diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index b381daaac32c..91f11f37b390 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1298,8 +1298,6 @@ void ufshcd_update_evt_hist(struct ufs_hba *hba, u32 id, u32 val); void ufshcd_hba_stop(struct ufs_hba *hba); void ufshcd_schedule_eh_work(struct ufs_hba *hba); void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i); -unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba, - struct ufs_hw_queue *hwq); void ufshcd_mcq_enable_esi(struct ufs_hba *hba); void ufshcd_mcq_config_esi(struct ufs_hba *hba, struct msi_msg *msg); From 0fcd7a1c7ca498527c2a56f0413594a797e451ac Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Apr 2024 15:49:11 -0700 Subject: [PATCH 63/98] BACKPORT: FROMLIST: scsi: ufs: Make ufshcd_poll() complain about unsupported arguments The ufshcd_poll() implementation does not support queue_num == UFSHCD_POLL_FROM_INTERRUPT_CONTEXT in MCQ mode. Hence complain if queue_num == UFSHCD_POLL_FROM_INTERRUPT_CONTEXT in MCQ mode. Bug: 312786487 Bug: 326329246 Bug: 333069246 Bug: 333317508 Link: https://lore.kernel.org/linux-scsi/20240416171357.1062583-1-bvanassche@acm.org/T/#mf141ffd0528e062eccaceb98f326abae709da3c1 Change-Id: I4182872aa86ed84f074a3f11364138cfde19e74b Signed-off-by: Bart Van Assche Signed-off-by: Bart Van Assche --- drivers/ufs/core/ufshcd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index fdf196d70658..fb9d88e5778a 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5540,6 +5540,7 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num) struct ufs_hw_queue *hwq; if (is_mcq_enabled(hba)) { + WARN_ON_ONCE(queue_num == UFSHCD_POLL_FROM_INTERRUPT_CONTEXT); hwq = &hba->uhq[queue_num + UFSHCD_MCQ_IO_QUEUE_OFFSET]; return ufshcd_mcq_poll_cqe_lock(hba, hwq); From 8563ce58954679187f0c269a504520d8def37e41 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Apr 2024 15:49:25 -0700 Subject: [PATCH 64/98] BACKPORT: FROMLIST: scsi: ufs: Make the polling code report which command has been completed Prepare for introducing a new __ufshcd_poll() caller that will need to know whether or not a specific command has been completed. Bug: 312786487 Bug: 326329246 Bug: 333069246 Bug: 333317508 Link: https://lore.kernel.org/linux-scsi/20240416171357.1062583-1-bvanassche@acm.org/T/#m68901e4f4e2437e7d0cb747049006ab19f57e038 Change-Id: I1b25b095b4bf9fbf175aa963ec85fcbbcb2be0ed Signed-off-by: Bart Van Assche Signed-off-by: Bart Van Assche --- drivers/ufs/core/ufs-mcq.c | 22 ++++++++++++++------- drivers/ufs/core/ufshcd-priv.h | 10 ++++++---- drivers/ufs/core/ufshcd.c | 36 ++++++++++++++++++++++++---------- 3 files changed, 47 insertions(+), 21 deletions(-) diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index cf06702e1b8f..693a7da672a0 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -268,23 +268,29 @@ static int ufshcd_mcq_get_tag(struct ufs_hba *hba, return div_u64(addr, sizeof(struct utp_transfer_cmd_desc)); } -static void ufshcd_mcq_process_cqe(struct ufs_hba *hba, - struct ufs_hw_queue *hwq) +/* Returns true if and only if @compl_cmd has been completed. */ +static bool ufshcd_mcq_process_cqe(struct ufs_hba *hba, + struct ufs_hw_queue *hwq, + struct scsi_cmnd *compl_cmd) { struct cq_entry *cqe = ufshcd_mcq_cur_cqe(hwq); int tag = ufshcd_mcq_get_tag(hba, hwq, cqe); - ufshcd_compl_one_cqe(hba, tag, cqe); + return ufshcd_compl_one_cqe(hba, tag, cqe, compl_cmd); } +/* Clears *@compl_cmd if and only if *@compl_cmd has been completed. */ unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba, - struct ufs_hw_queue *hwq) + struct ufs_hw_queue *hwq, + struct scsi_cmnd **compl_cmd) { unsigned long completed_reqs = 0; ufshcd_mcq_update_cq_tail_slot(hwq); while (!ufshcd_mcq_is_cq_empty(hwq)) { - ufshcd_mcq_process_cqe(hba, hwq); + if (ufshcd_mcq_process_cqe(hba, hwq, + compl_cmd ? *compl_cmd : NULL)) + *compl_cmd = NULL; ufshcd_mcq_inc_cq_head_slot(hwq); completed_reqs++; } @@ -295,13 +301,15 @@ unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba, return completed_reqs; } +/* Clears *@compl_cmd if and only if *@compl_cmd has been completed. */ unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba, - struct ufs_hw_queue *hwq) + struct ufs_hw_queue *hwq, + struct scsi_cmnd **compl_cmd) { unsigned long completed_reqs, flags; spin_lock_irqsave(&hwq->cq_lock, flags); - completed_reqs = ufshcd_mcq_poll_cqe_nolock(hba, hwq); + completed_reqs = ufshcd_mcq_poll_cqe_nolock(hba, hwq, compl_cmd); spin_unlock_irqrestore(&hwq->cq_lock, flags); return completed_reqs; diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h index 76e2d15ff698..8fd8156ce40a 100644 --- a/drivers/ufs/core/ufshcd-priv.h +++ b/drivers/ufs/core/ufshcd-priv.h @@ -56,8 +56,8 @@ int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode, int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode, enum flag_idn idn, u8 index, bool *flag_res); void ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit); -void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag, - struct cq_entry *cqe); +bool ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag, + struct cq_entry *cqe, struct scsi_cmnd *compl_cmd); int ufshcd_mcq_init(struct ufs_hba *hba); int ufshcd_mcq_decide_queue_depth(struct ufs_hba *hba); int ufshcd_mcq_memory_alloc(struct ufs_hba *hba); @@ -67,11 +67,13 @@ void ufshcd_mcq_select_mcq_mode(struct ufs_hba *hba); u32 ufshcd_mcq_read_cqis(struct ufs_hba *hba, int i); void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i); unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba, - struct ufs_hw_queue *hwq); + struct ufs_hw_queue *hwq, + struct scsi_cmnd **compl_cmd); struct ufs_hw_queue *ufshcd_mcq_req_to_hwq(struct ufs_hba *hba, struct request *req); unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba, - struct ufs_hw_queue *hwq); + struct ufs_hw_queue *hwq, + struct scsi_cmnd **compl_cmd); #define UFSHCD_MCQ_IO_QUEUE_OFFSET 1 #define SD_ASCII_STD true diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index fb9d88e5778a..320986753b13 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5463,9 +5463,12 @@ static void ufshcd_release_scsi_cmd(struct ufs_hba *hba, * @hba: per adapter instance * @task_tag: the task tag of the request to be completed * @cqe: pointer to the completion queue entry + * @compl_cmd: if not NULL, check whether this command has been completed + * + * Returns: true if and only if @compl_cmd has been completed. */ -void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag, - struct cq_entry *cqe) +bool ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag, + struct cq_entry *cqe, struct scsi_cmnd *compl_cmd) { struct ufshcd_lrb *lrbp; struct scsi_cmnd *cmd; @@ -5482,6 +5485,7 @@ void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag, ufshcd_release_scsi_cmd(hba, lrbp); /* Do not touch lrbp after scsi done */ cmd->scsi_done(cmd); + return cmd == compl_cmd; } else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE || lrbp->command_type == UTP_CMD_TYPE_UFS_STORAGE) { if (hba->dev_cmd.complete) { @@ -5492,20 +5496,26 @@ void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag, ufshcd_clk_scaling_update_busy(hba); } } + return false; } /** * __ufshcd_transfer_req_compl - handle SCSI and query command completion * @hba: per adapter instance * @completed_reqs: bitmask that indicates which requests to complete + * @compl_cmd: if not NULL, check whether *@compl_cmd has been completed. + * Clear *@compl_cmd if it has been completed. */ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, - unsigned long completed_reqs) + unsigned long completed_reqs, + struct scsi_cmnd **compl_cmd) { int tag; for_each_set_bit(tag, &completed_reqs, hba->nutrs) - ufshcd_compl_one_cqe(hba, tag, NULL); + if (ufshcd_compl_one_cqe(hba, tag, NULL, + compl_cmd ? *compl_cmd : NULL)) + *compl_cmd = NULL; } /* Any value that is not an existing queue number is fine for this constant. */ @@ -5532,7 +5542,8 @@ static void ufshcd_clear_polled(struct ufs_hba *hba, * Returns > 0 if one or more commands have been completed or 0 if no * requests have been completed. */ -static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num) +static int __ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num, + struct scsi_cmnd **compl_cmd) { struct ufs_hba *hba = shost_priv(shost); unsigned long completed_reqs, flags; @@ -5543,7 +5554,7 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num) WARN_ON_ONCE(queue_num == UFSHCD_POLL_FROM_INTERRUPT_CONTEXT); hwq = &hba->uhq[queue_num + UFSHCD_MCQ_IO_QUEUE_OFFSET]; - return ufshcd_mcq_poll_cqe_lock(hba, hwq); + return ufshcd_mcq_poll_cqe_lock(hba, hwq, compl_cmd); } spin_lock_irqsave(&hba->outstanding_lock, flags); @@ -5560,11 +5571,16 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num) spin_unlock_irqrestore(&hba->outstanding_lock, flags); if (completed_reqs) - __ufshcd_transfer_req_compl(hba, completed_reqs); + __ufshcd_transfer_req_compl(hba, completed_reqs, compl_cmd); return completed_reqs != 0; } +static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num) +{ + return __ufshcd_poll(shost, queue_num, NULL); +} + /** * ufshcd_transfer_req_compl - handle SCSI and query command completion * @hba: per adapter instance @@ -6820,7 +6836,7 @@ static irqreturn_t ufshcd_handle_mcq_cq_events(struct ufs_hba *hba) ufshcd_mcq_write_cqis(hba, events, i); if (events & UFSHCD_MCQ_CQIS_TAIL_ENT_PUSH_STS) - ufshcd_mcq_poll_cqe_nolock(hba, hwq); + ufshcd_mcq_poll_cqe_nolock(hba, hwq, NULL); } return IRQ_HANDLED; @@ -7361,7 +7377,7 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) dev_err(hba->dev, "%s: failed to clear requests %#lx\n", __func__, not_cleared); } - __ufshcd_transfer_req_compl(hba, pending_reqs & ~not_cleared); + __ufshcd_transfer_req_compl(hba, pending_reqs & ~not_cleared, NULL); out: hba->req_abort_count = 0; @@ -7522,7 +7538,7 @@ static int ufshcd_abort(struct scsi_cmnd *cmd) dev_err(hba->dev, "%s: cmd was completed, but without a notifying intr, tag = %d", __func__, tag); - __ufshcd_transfer_req_compl(hba, 1UL << tag); + __ufshcd_transfer_req_compl(hba, 1UL << tag, NULL); goto release; } From 5725caa2965e7f68b5145272c12ac0bbe99b49e4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Apr 2024 15:49:39 -0700 Subject: [PATCH 65/98] FROMLIST: scsi: ufs: Check for completion from the timeout handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If ufshcd_abort() returns SUCCESS for an already completed command then that command is completed twice. This results in a crash. Prevent this by checking whether a command has completed without completion interrupt from the timeout handler. This CL fixes the following kernel crash: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Call trace:  dma_direct_map_sg+0x70/0x274  scsi_dma_map+0x84/0x124  ufshcd_queuecommand+0x3fc/0x880  scsi_queue_rq+0x7d0/0x111c  blk_mq_dispatch_rq_list+0x440/0xebc  blk_mq_do_dispatch_sched+0x5a4/0x6b8  __blk_mq_sched_dispatch_requests+0x150/0x220  __blk_mq_run_hw_queue+0xf0/0x218  __blk_mq_delay_run_hw_queue+0x8c/0x18c  blk_mq_run_hw_queue+0x1a4/0x360  blk_mq_sched_insert_requests+0x130/0x334  blk_mq_flush_plug_list+0x138/0x234  blk_flush_plug_list+0x118/0x164  blk_finish_plug()  read_pages+0x38c/0x408  page_cache_ra_unbounded+0x230/0x2f8  do_sync_mmap_readahead+0x1a4/0x208  filemap_fault+0x27c/0x8f4  f2fs_filemap_fault+0x28/0xfc  __do_fault+0xc4/0x208  handle_pte_fault+0x290/0xe04  do_handle_mm_fault+0x52c/0x858  do_page_fault+0x5dc/0x798  do_translation_fault+0x40/0x54  do_mem_abort+0x60/0x134  el0_da+0x40/0xb8  el0t_64_sync_handler+0xc4/0xe4  el0t_64_sync+0x1b4/0x1b8 Bug: 312786487 Bug: 326329246 Bug: 333069246 Bug: 333317508 Link: https://lore.kernel.org/linux-scsi/20240416171357.1062583-1-bvanassche@acm.org/T/#mbfa6b7a56e07c792ddca7801fb8900f8370d4731 Change-Id: I48e93516d2aae3b2ad62b0b51144e8e2e39d7476 Signed-off-by: Bart Van Assche Signed-off-by: Bart Van Assche --- drivers/ufs/core/ufshcd.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 320986753b13..bdc1b3d34adb 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8781,6 +8781,25 @@ out: static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd) { struct ufs_hba *hba = shost_priv(scmd->device->host); + struct scsi_cmnd *cmd2 = scmd; + + WARN_ON_ONCE(!scmd); + + if (is_mcq_enabled(hba)) { + struct request *rq = scsi_cmd_to_rq(scmd); + struct ufs_hw_queue *hwq = ufshcd_mcq_req_to_hwq(hba, rq); + + ufshcd_mcq_poll_cqe_lock(hba, hwq, &cmd2); + } else { + __ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT, + &cmd2); + } + if (cmd2 == NULL) { + sdev_printk(KERN_INFO, scmd->device, + "%s: cmd with tag %#x has already been completed\n", + __func__, blk_mq_unique_tag(scsi_cmd_to_rq(scmd))); + return SCSI_EH_DONE; + } if (!hba->system_suspending) { /* Activate the error handler in the SCSI core. */ From 65e0a92c6d27d4cbaa0deef668df12b69853d65e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Jan 2024 09:08:53 -0800 Subject: [PATCH 66/98] UPSTREAM: af_unix: Do not use atomic ops for unix_sk(sk)->inflight. [ Upstream commit 97af84a6bba2ab2b9c704c08e67de3b5ea551bb2 ] When touching unix_sk(sk)->inflight, we are always under spin_lock(&unix_gc_lock). Let's convert unix_sk(sk)->inflight to the normal unsigned long. Bug: 336226035 Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240123170856.41348-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Stable-dep-of: 47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()") Signed-off-by: Sasha Levin (cherry picked from commit 301fdbaa0bba4653570f07789909939f977a7620) Signed-off-by: Lee Jones Change-Id: I0d965d5f2a863d798c06de9f21d0467f256b538e --- include/net/af_unix.h | 2 +- net/unix/af_unix.c | 4 ++-- net/unix/garbage.c | 17 ++++++++--------- net/unix/scm.c | 8 +++++--- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 7d142e8a0550..01f3aec7128c 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -62,7 +62,7 @@ struct unix_sock { struct mutex iolock, bindlock; struct sock *peer; struct list_head link; - atomic_long_t inflight; + unsigned long inflight; spinlock_t lock; unsigned long gc_flags; #define UNIX_GC_CANDIDATE 0 diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f079a5bd909d..23bfe5d22a9a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -877,11 +877,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; - u = unix_sk(sk); + u = unix_sk(sk); + u->inflight = 0; u->path.dentry = NULL; u->path.mnt = NULL; spin_lock_init(&u->lock); - atomic_long_set(&u->inflight, 0); INIT_LIST_HEAD(&u->link); mutex_init(&u->iolock); /* single task reading lock */ mutex_init(&u->bindlock); /* single task binding lock */ diff --git a/net/unix/garbage.c b/net/unix/garbage.c index dc2763540393..312474c23e9a 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -166,17 +166,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *), static void dec_inflight(struct unix_sock *usk) { - atomic_long_dec(&usk->inflight); + usk->inflight--; } static void inc_inflight(struct unix_sock *usk) { - atomic_long_inc(&usk->inflight); + usk->inflight++; } static void inc_inflight_move_tail(struct unix_sock *u) { - atomic_long_inc(&u->inflight); + u->inflight++; + /* If this still might be part of a cycle, move it to the end * of the list, so that it's checked even if it was already * passed over @@ -237,14 +238,12 @@ void unix_gc(void) */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { long total_refs; - long inflight_refs; total_refs = file_count(u->sk.sk_socket->file); - inflight_refs = atomic_long_read(&u->inflight); - BUG_ON(inflight_refs < 1); - BUG_ON(total_refs < inflight_refs); - if (total_refs == inflight_refs) { + BUG_ON(!u->inflight); + BUG_ON(total_refs < u->inflight); + if (total_refs == u->inflight) { list_move_tail(&u->link, &gc_candidates); __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); @@ -271,7 +270,7 @@ void unix_gc(void) /* Move cursor to after the current position. */ list_move(&cursor, &u->link); - if (atomic_long_read(&u->inflight) > 0) { + if (u->inflight) { list_move_tail(&u->link, ¬_cycle_list); __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); scan_children(&u->sk, inc_inflight_move_tail, NULL); diff --git a/net/unix/scm.c b/net/unix/scm.c index e8e2a00bb0f5..bdcda4e41f10 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -54,12 +54,13 @@ void unix_inflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); - if (atomic_long_inc_return(&u->inflight) == 1) { + if (!u->inflight) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); } else { BUG_ON(list_empty(&u->link)); } + u->inflight++; /* Paired with READ_ONCE() in wait_for_unix_gc() */ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); } @@ -76,10 +77,11 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); - BUG_ON(!atomic_long_read(&u->inflight)); + BUG_ON(!u->inflight); BUG_ON(list_empty(&u->link)); - if (atomic_long_dec_and_test(&u->inflight)) + u->inflight--; + if (!u->inflight) list_del_init(&u->link); /* Paired with READ_ONCE() in wait_for_unix_gc() */ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); From e8e652b8c81afc06e9bc801f6eb4896516be2d62 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Tue, 9 Apr 2024 22:09:39 +0200 Subject: [PATCH 67/98] UPSTREAM: af_unix: Fix garbage collector racing against connect() [ Upstream commit 47d8ac011fe1c9251070e1bd64cb10b48193ec51 ] Garbage collector does not take into account the risk of embryo getting enqueued during the garbage collection. If such embryo has a peer that carries SCM_RIGHTS, two consecutive passes of scan_children() may see a different set of children. Leading to an incorrectly elevated inflight count, and then a dangling pointer within the gc_inflight_list. sockets are AF_UNIX/SOCK_STREAM S is an unconnected socket L is a listening in-flight socket bound to addr, not in fdtable V's fd will be passed via sendmsg(), gets inflight count bumped connect(S, addr) sendmsg(S, [V]); close(V) __unix_gc() ---------------- ------------------------- ----------- NS = unix_create1() skb1 = sock_wmalloc(NS) L = unix_find_other(addr) unix_state_lock(L) unix_peer(S) = NS // V count=1 inflight=0 NS = unix_peer(S) skb2 = sock_alloc() skb_queue_tail(NS, skb2[V]) // V became in-flight // V count=2 inflight=1 close(V) // V count=1 inflight=1 // GC candidate condition met for u in gc_inflight_list: if (total_refs == inflight_refs) add u to gc_candidates // gc_candidates={L, V} for u in gc_candidates: scan_children(u, dec_inflight) // embryo (skb1) was not // reachable from L yet, so V's // inflight remains unchanged __skb_queue_tail(L, skb1) unix_state_unlock(L) for u in gc_candidates: if (u.inflight) scan_children(u, inc_inflight_move_tail) // V count=1 inflight=2 (!) If there is a GC-candidate listening socket, lock/unlock its state. This makes GC wait until the end of any ongoing connect() to that socket. After flipping the lock, a possibly SCM-laden embryo is already enqueued. And if there is another embryo coming, it can not possibly carry SCM_RIGHTS. At this point, unix_inflight() can not happen because unix_gc_lock is already taken. Inflight graph remains unaffected. Bug: 336226035 Fixes: 1fd05ba5a2f2 ("[AF_UNIX]: Rewrite garbage collector, fixes race.") Signed-off-by: Michal Luczaj Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240409201047.1032217-1-mhal@rbox.co Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin (cherry picked from commit 507cc232ffe53a352847893f8177d276c3b532a9) Signed-off-by: Lee Jones Change-Id: If321f78b8b3220f5a1caea4b5e9450f1235b0770 --- net/unix/garbage.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 312474c23e9a..67b2c3bfa113 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -235,11 +235,22 @@ void unix_gc(void) * receive queues. Other, non candidate sockets _can_ be * added to queue, so we must make sure only to touch * candidates. + * + * Embryos, though never candidates themselves, affect which + * candidates are reachable by the garbage collector. Before + * being added to a listener's queue, an embryo may already + * receive data carrying SCM_RIGHTS, potentially making the + * passed socket a candidate that is not yet reachable by the + * collector. It becomes reachable once the embryo is + * enqueued. Therefore, we must ensure that no SCM-laden + * embryo appears in a (candidate) listener's queue between + * consecutive scan_children() calls. */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { + struct sock *sk = &u->sk; long total_refs; - total_refs = file_count(u->sk.sk_socket->file); + total_refs = file_count(sk->sk_socket->file); BUG_ON(!u->inflight); BUG_ON(total_refs < u->inflight); @@ -247,6 +258,11 @@ void unix_gc(void) list_move_tail(&u->link, &gc_candidates); __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); + + if (sk->sk_state == TCP_LISTEN) { + unix_state_lock(sk); + unix_state_unlock(sk); + } } } From a02278f9908eff05b0d6e3988ab2cee076b0f770 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 12 Apr 2024 15:26:59 +0100 Subject: [PATCH 68/98] FROMGIT: coresight: etm4x: Do not hardcode IOMEM access for register restore When we restore the register state for ETM4x, while coming back from CPU idle, we hardcode IOMEM access. This is wrong and could blow up for an ETM with system instructions access (and for ETE). Fixes: f5bd523690d2 ("coresight: etm4x: Convert all register accesses") Reported-by: Yabin Cui Reviewed-by: Mike Leach Signed-off-by: Suzuki K Poulose Tested-by: Yabin Cui Link: https://lore.kernel.org/r/20240412142702.2882478-2-suzuki.poulose@arm.com Bug: 335234033 (cherry picked from commit 1e7ba33fa591de1cf60afffcabb45600b3607025 https://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git next) Change-Id: Id2ea066374933de51a90f1fca8304338b741845d Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etm4x-core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index e7f32b655dde..b40d56c9203f 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -1745,8 +1745,10 @@ static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata) { int i; struct etmv4_save_state *state = drvdata->save_state; - struct csdev_access tmp_csa = CSDEV_ACCESS_IOMEM(drvdata->base); - struct csdev_access *csa = &tmp_csa; + struct csdev_access *csa = &drvdata->csdev->access; + + if (WARN_ON(!drvdata->csdev)) + return; etm4_cs_unlock(drvdata, csa); etm4x_relaxed_write32(csa, state->trcclaimset, TRCCLAIMSET); From 6a08c9fb9d47ecd90d4f2401d9ba0e3f447614f0 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 12 Apr 2024 15:27:00 +0100 Subject: [PATCH 69/98] FROMGIT: coresight: etm4x: Do not save/restore Data trace control registers ETM4x doesn't support Data trace on A class CPUs. As such do not access the Data trace control registers during CPU idle. This could cause problems for ETE. While at it, remove all references to the Data trace control registers. Fixes: f188b5e76aae ("coresight: etm4x: Save/restore state across CPU low power states") Reported-by: Yabin Cui Reviewed-by: Mike Leach Signed-off-by: Suzuki K Poulose Tested-by: Yabin Cui Link: https://lore.kernel.org/r/20240412142702.2882478-3-suzuki.poulose@arm.com Bug: 335234033 (cherry picked from commit 5eb3a0c2c52368cb9902e9a6ea04888e093c487d https://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git next) Change-Id: I06977d86aa2d876d166db0fac8fbccf48fd07229 Signed-off-by: Yabin Cui --- .../coresight/coresight-etm4x-core.c | 6 ---- drivers/hwtracing/coresight/coresight-etm4x.h | 28 ------------------- 2 files changed, 34 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index b40d56c9203f..248c63e0e15b 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -1641,9 +1641,6 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata) state->trcvissctlr = etm4x_read32(csa, TRCVISSCTLR); if (drvdata->nr_pe_cmp) state->trcvipcssctlr = etm4x_read32(csa, TRCVIPCSSCTLR); - state->trcvdctlr = etm4x_read32(csa, TRCVDCTLR); - state->trcvdsacctlr = etm4x_read32(csa, TRCVDSACCTLR); - state->trcvdarcctlr = etm4x_read32(csa, TRCVDARCCTLR); for (i = 0; i < drvdata->nrseqstate - 1; i++) state->trcseqevr[i] = etm4x_read32(csa, TRCSEQEVRn(i)); @@ -1774,9 +1771,6 @@ static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata) etm4x_relaxed_write32(csa, state->trcvissctlr, TRCVISSCTLR); if (drvdata->nr_pe_cmp) etm4x_relaxed_write32(csa, state->trcvipcssctlr, TRCVIPCSSCTLR); - etm4x_relaxed_write32(csa, state->trcvdctlr, TRCVDCTLR); - etm4x_relaxed_write32(csa, state->trcvdsacctlr, TRCVDSACCTLR); - etm4x_relaxed_write32(csa, state->trcvdarcctlr, TRCVDARCCTLR); for (i = 0; i < drvdata->nrseqstate - 1; i++) etm4x_relaxed_write32(csa, state->trcseqevr[i], TRCSEQEVRn(i)); diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index 1170fbe04d06..32daf11ec856 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -43,9 +43,6 @@ #define TRCVIIECTLR 0x084 #define TRCVISSCTLR 0x088 #define TRCVIPCSSCTLR 0x08C -#define TRCVDCTLR 0x0A0 -#define TRCVDSACCTLR 0x0A4 -#define TRCVDARCCTLR 0x0A8 /* Derived resources registers */ #define TRCSEQEVRn(n) (0x100 + (n * 4)) /* n = 0-2 */ #define TRCSEQRSTEVR 0x118 @@ -90,9 +87,6 @@ /* Address Comparator registers n = 0-15 */ #define TRCACVRn(n) (0x400 + (n * 8)) #define TRCACATRn(n) (0x480 + (n * 8)) -/* Data Value Comparator Value registers, n = 0-7 */ -#define TRCDVCVRn(n) (0x500 + (n * 16)) -#define TRCDVCMRn(n) (0x580 + (n * 16)) /* ContextID/Virtual ContextID comparators, n = 0-7 */ #define TRCCIDCVRn(n) (0x600 + (n * 8)) #define TRCVMIDCVRn(n) (0x640 + (n * 8)) @@ -174,9 +168,6 @@ /* List of registers accessible via System instructions */ #define ETM4x_ONLY_SYSREG_LIST(op, val) \ CASE_##op((val), TRCPROCSELR) \ - CASE_##op((val), TRCVDCTLR) \ - CASE_##op((val), TRCVDSACCTLR) \ - CASE_##op((val), TRCVDARCCTLR) \ CASE_##op((val), TRCOSLAR) #define ETM_COMMON_SYSREG_LIST(op, val) \ @@ -324,22 +315,6 @@ CASE_##op((val), TRCACATRn(13)) \ CASE_##op((val), TRCACATRn(14)) \ CASE_##op((val), TRCACATRn(15)) \ - CASE_##op((val), TRCDVCVRn(0)) \ - CASE_##op((val), TRCDVCVRn(1)) \ - CASE_##op((val), TRCDVCVRn(2)) \ - CASE_##op((val), TRCDVCVRn(3)) \ - CASE_##op((val), TRCDVCVRn(4)) \ - CASE_##op((val), TRCDVCVRn(5)) \ - CASE_##op((val), TRCDVCVRn(6)) \ - CASE_##op((val), TRCDVCVRn(7)) \ - CASE_##op((val), TRCDVCMRn(0)) \ - CASE_##op((val), TRCDVCMRn(1)) \ - CASE_##op((val), TRCDVCMRn(2)) \ - CASE_##op((val), TRCDVCMRn(3)) \ - CASE_##op((val), TRCDVCMRn(4)) \ - CASE_##op((val), TRCDVCMRn(5)) \ - CASE_##op((val), TRCDVCMRn(6)) \ - CASE_##op((val), TRCDVCMRn(7)) \ CASE_##op((val), TRCCIDCVRn(0)) \ CASE_##op((val), TRCCIDCVRn(1)) \ CASE_##op((val), TRCCIDCVRn(2)) \ @@ -821,9 +796,6 @@ struct etmv4_save_state { u32 trcviiectlr; u32 trcvissctlr; u32 trcvipcssctlr; - u32 trcvdctlr; - u32 trcvdsacctlr; - u32 trcvdarcctlr; u32 trcseqevr[ETM_MAX_SEQ_STATES]; u32 trcseqrstevr; From 8ba180228744f31ed67611eae51d86c62a1f7bae Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 12 Apr 2024 15:27:01 +0100 Subject: [PATCH 70/98] BACKPORT: FROMGIT: coresight: etm4x: Safe access for TRCQCLTR ETM4x implements TRCQCLTR only when the Q elements are supported and the Q element filtering is supported (TRCIDR0.QFILT). Access to the register otherwise could be fatal. Fix this by tracking the availability, like the others. Fixes: f188b5e76aae ("coresight: etm4x: Save/restore state across CPU low power states") Reported-by: Yabin Cui Reviewed-by: Mike Leach Signed-off-by: Suzuki K Poulose Tested-by: Yabin Cui Link: https://lore.kernel.org/r/20240412142702.2882478-4-suzuki.poulose@arm.com Bug: 335234033 (cherry picked from commit 46bf8d7cd8530eca607379033b9bc4ac5590a0cd https://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git next) Change-Id: Id848fa14ba8003149f76b5ca54562593f6164150 Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etm4x-core.c | 8 ++++++-- drivers/hwtracing/coresight/coresight-etm4x.h | 4 ++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 248c63e0e15b..44c785c296ee 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -1113,6 +1113,8 @@ static void etm4_init_arch_data(void *info) drvdata->nr_event = BMVAL(etmidr0, 10, 11); /* QSUPP, bits[16:15] Q element support field */ drvdata->q_support = BMVAL(etmidr0, 15, 16); + if (drvdata->q_support) + drvdata->q_filt = !!(etmidr0 & TRCIDR0_QFILT); /* TSSIZE, bits[28:24] Global timestamp size field */ drvdata->ts_size = BMVAL(etmidr0, 24, 28); @@ -1634,7 +1636,8 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata) state->trcccctlr = etm4x_read32(csa, TRCCCCTLR); state->trcbbctlr = etm4x_read32(csa, TRCBBCTLR); state->trctraceidr = etm4x_read32(csa, TRCTRACEIDR); - state->trcqctlr = etm4x_read32(csa, TRCQCTLR); + if (drvdata->q_filt) + state->trcqctlr = etm4x_read32(csa, TRCQCTLR); state->trcvictlr = etm4x_read32(csa, TRCVICTLR); state->trcviiectlr = etm4x_read32(csa, TRCVIIECTLR); @@ -1764,7 +1767,8 @@ static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata) etm4x_relaxed_write32(csa, state->trcccctlr, TRCCCCTLR); etm4x_relaxed_write32(csa, state->trcbbctlr, TRCBBCTLR); etm4x_relaxed_write32(csa, state->trctraceidr, TRCTRACEIDR); - etm4x_relaxed_write32(csa, state->trcqctlr, TRCQCTLR); + if (drvdata->q_filt) + etm4x_relaxed_write32(csa, state->trcqctlr, TRCQCTLR); etm4x_relaxed_write32(csa, state->trcvictlr, TRCVICTLR); etm4x_relaxed_write32(csa, state->trcviiectlr, TRCVIIECTLR); diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index 32daf11ec856..6e8d4f0efab6 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -125,6 +125,8 @@ #define TRCRSR_TA BIT(12) +#define TRCIDR0_QFILT BIT(14) + /* * System instructions to access ETM registers. * See ETMv4.4 spec ARM IHI0064F section 4.3.6 System instructions @@ -867,6 +869,7 @@ struct etmv4_save_state { * @os_unlock: True if access to management registers is allowed. * @instrp0: Tracing of load and store instructions * as P0 elements is supported. + * @q_filt: Q element filtering support, if Q elements are supported. * @trcbb: Indicates if the trace unit supports branch broadcast tracing. * @trccond: If the trace unit supports conditional * instruction tracing. @@ -929,6 +932,7 @@ struct etmv4_drvdata { bool boot_enable; bool os_unlock; bool instrp0; + bool q_filt; bool trcbb; bool trccond; bool retstack; From 0ae4f32634d02104dcd1d7703c1f106f47722049 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 12 Apr 2024 15:27:02 +0100 Subject: [PATCH 71/98] FROMGIT: coresight: etm4x: Fix access to resource selector registers Resource selector pair 0 is always implemented and reserved. We must not touch it, even during save/restore for CPU Idle. Rest of the driver is well behaved. Fix the offending ones. Reported-by: Yabin Cui Fixes: f188b5e76aae ("coresight: etm4x: Save/restore state across CPU low power states") Signed-off-by: Suzuki K Poulose Tested-by: Yabin Cui Reviewed-by: Mike Leach Link: https://lore.kernel.org/r/20240412142702.2882478-5-suzuki.poulose@arm.com Bug: 335234033 (cherry picked from commit d6fc00d0f640d6010b51054aa8b0fd191177dbc9 https://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git next) Change-Id: I5f3385cb269969a299402fa258b30ab43e95805f Signed-off-by: Yabin Cui --- drivers/hwtracing/coresight/coresight-etm4x-core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 44c785c296ee..2608453620e8 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -1660,7 +1660,8 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata) state->trccntvr[i] = etm4x_read32(csa, TRCCNTVRn(i)); } - for (i = 0; i < drvdata->nr_resource * 2; i++) + /* Resource selector pair 0 is reserved */ + for (i = 2; i < drvdata->nr_resource * 2; i++) state->trcrsctlr[i] = etm4x_read32(csa, TRCRSCTLRn(i)); for (i = 0; i < drvdata->nr_ss_cmp; i++) { @@ -1791,7 +1792,8 @@ static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata) etm4x_relaxed_write32(csa, state->trccntvr[i], TRCCNTVRn(i)); } - for (i = 0; i < drvdata->nr_resource * 2; i++) + /* Resource selector pair 0 is reserved */ + for (i = 2; i < drvdata->nr_resource * 2; i++) etm4x_relaxed_write32(csa, state->trcrsctlr[i], TRCRSCTLRn(i)); for (i = 0; i < drvdata->nr_ss_cmp; i++) { From 978f805a2d4b5a0bed9576750152af33f1b4fe45 Mon Sep 17 00:00:00 2001 From: seanwang1 Date: Thu, 25 Apr 2024 17:08:03 +0800 Subject: [PATCH 72/98] ANDROID: GKI: Export css_task_iter_start() Export css_task_iter_start() and css_task_iter_next() and css_task_iter_end() inorder to support task iteration in a cgroup in vendor modules. Bug: 336967294 Change-Id: Id93963ddd30ab02c7a4d5086f19d15310e4eda14 Signed-off-by: seanwang1 --- kernel/cgroup/cgroup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 532561757795..3de209d57c19 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4802,6 +4802,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags, spin_unlock_irq(&css_set_lock); } +EXPORT_SYMBOL_GPL(css_task_iter_start); /** * css_task_iter_next - return the next task for the iterator @@ -4835,6 +4836,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it) return it->cur_task; } +EXPORT_SYMBOL_GPL(css_task_iter_next); /** * css_task_iter_end - finish task iteration @@ -4857,6 +4859,7 @@ void css_task_iter_end(struct css_task_iter *it) if (it->cur_task) put_task_struct(it->cur_task); } +EXPORT_SYMBOL_GPL(css_task_iter_end); static void cgroup_procs_release(struct kernfs_open_file *of) { From 444a497469b1c30429daafdc3e4df7d52dc30ffa Mon Sep 17 00:00:00 2001 From: seanwang1 Date: Sun, 28 Apr 2024 15:18:48 +0800 Subject: [PATCH 73/98] ANDROID: GKI: Update lenovo symbol list 3 function symbols added 'void css_task_iter_end(struct css_task_iter*)' 'struct task_struct* css_task_iter_next(struct css_task_iter*)' 'void css_task_iter_start(struct cgroup_subsys_state*, unsigned int, struct css_task_iter*)' Bug: 336967294 Change-Id: I7258e06fe9f1e21d73481d47a5cc54bb95e40646 Signed-off-by: seanwang1 --- android/abi_gki_aarch64.stg | 138 +++++++++++++++++++++++++++++++++ android/abi_gki_aarch64_lenovo | 3 + 2 files changed, 141 insertions(+) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 97fca3ab154c..5d9f00e5ac55 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -2498,6 +2498,11 @@ pointer_reference { kind: POINTER pointee_type_id: 0x02fa8d0a } +pointer_reference { + id: 0x0a3309af + kind: POINTER + pointee_type_id: 0x028cc020 +} pointer_reference { id: 0x0a4d20f4 kind: POINTER @@ -62346,12 +62351,24 @@ member { type_id: 0x4585663f offset: 1376 } +member { + id: 0x6e05f60a + name: "cset_head" + type_id: 0x3e6239e1 + offset: 192 +} member { id: 0xe75624b8 name: "cset_links" type_id: 0xd3c80119 offset: 4032 } +member { + id: 0x66e22acd + name: "cset_pos" + type_id: 0x3e6239e1 + offset: 128 +} member { id: 0xf69ec4bf name: "csets" @@ -63037,6 +63054,18 @@ member { type_id: 0x0d821a01 offset: 384 } +member { + id: 0x07e78da0 + name: "cur_cset" + type_id: 0x0d821a01 + offset: 512 +} +member { + id: 0xace31076 + name: "cur_dcset" + type_id: 0x0d821a01 + offset: 576 +} member { id: 0xc5936017 name: "cur_format" @@ -63137,12 +63166,24 @@ member { type_id: 0x31114896 offset: 320 } +member { + id: 0x47642a41 + name: "cur_task" + type_id: 0x1d19a9d5 + offset: 640 +} member { id: 0x47642b4f name: "cur_task" type_id: 0x1d19a9d5 offset: 448 } +member { + id: 0x2742c6d9 + name: "cur_tasks_head" + type_id: 0x3e6239e1 + offset: 448 +} member { id: 0x8b687155 name: "curchunk_hdrlen" @@ -102535,6 +102576,12 @@ member { type_id: 0x18bd6530 offset: 320 } +member { + id: 0x46869f17 + name: "iters_node" + type_id: 0xd3c80119 + offset: 704 +} member { id: 0xa331f04a name: "itree" @@ -166264,6 +166311,11 @@ member { type_id: 0x0a85fcb6 offset: 704 } +member { + id: 0x8c864b85 + name: "ss" + type_id: 0x0a85fcb6 +} member { id: 0x8c864f64 name: "ss" @@ -173254,6 +173306,12 @@ member { type_id: 0x49b889e7 offset: 896 } +member { + id: 0x83eb8339 + name: "task_pos" + type_id: 0x3e6239e1 + offset: 384 +} member { id: 0x95bea2ba name: "task_running" @@ -173821,6 +173879,18 @@ member { type_id: 0xc9082b19 offset: 32 } +member { + id: 0x0e555c39 + name: "tcset_head" + type_id: 0x3e6239e1 + offset: 320 +} +member { + id: 0x70b82471 + name: "tcset_pos" + type_id: 0x3e6239e1 + offset: 256 +} member { id: 0x3660523a name: "tctx_list" @@ -203585,6 +203655,27 @@ struct_union { member_id: 0x56a72c33 } } +struct_union { + id: 0x028cc020 + kind: STRUCT + name: "css_task_iter" + definition { + bytesize: 112 + member_id: 0x8c864b85 + member_id: 0x2d2d05d9 + member_id: 0x66e22acd + member_id: 0x6e05f60a + member_id: 0x70b82471 + member_id: 0x0e555c39 + member_id: 0x83eb8339 + member_id: 0x2742c6d9 + member_id: 0x07e78da0 + member_id: 0xace31076 + member_id: 0x47642a41 + member_id: 0x46869f17 + member_id: 0x2d081688 + } +} struct_union { id: 0x209b5a60 kind: STRUCT @@ -264797,6 +264888,11 @@ function { return_type_id: 0x48b5725f parameter_id: 0x0a2e9ae5 } +function { + id: 0x121493f8 + return_type_id: 0x48b5725f + parameter_id: 0x0a3309af +} function { id: 0x12186f16 return_type_id: 0x48b5725f @@ -269831,6 +269927,13 @@ function { parameter_id: 0x2efd5036 parameter_id: 0x18bd6530 } +function { + id: 0x1b4978c4 + return_type_id: 0x48b5725f + parameter_id: 0x2b16c036 + parameter_id: 0x4585663f + parameter_id: 0x0a3309af +} function { id: 0x1b4c69f5 return_type_id: 0x48b5725f @@ -277489,6 +277592,11 @@ function { return_type_id: 0x19341e7e parameter_id: 0x2efe8065 } +function { + id: 0x79c16494 + return_type_id: 0x1d19a9d5 + parameter_id: 0x0a3309af +} function { id: 0x79e2d3b1 return_type_id: 0x18bd6530 @@ -319239,6 +319347,33 @@ elf_symbol { type_id: 0xc867c639 full_name: "css_next_descendant_pre" } +elf_symbol { + id: 0xcf7b4665 + name: "css_task_iter_end" + is_defined: true + symbol_type: FUNCTION + crc: 0x95e443ef + type_id: 0x121493f8 + full_name: "css_task_iter_end" +} +elf_symbol { + id: 0x4223a490 + name: "css_task_iter_next" + is_defined: true + symbol_type: FUNCTION + crc: 0x5929335f + type_id: 0x79c16494 + full_name: "css_task_iter_next" +} +elf_symbol { + id: 0x559ad4b8 + name: "css_task_iter_start" + is_defined: true + symbol_type: FUNCTION + crc: 0x53460a34 + type_id: 0x1b4978c4 + full_name: "css_task_iter_start" +} elf_symbol { id: 0x81bdacc5 name: "csum_ipv6_magic" @@ -365396,6 +365531,9 @@ interface { symbol_id: 0x9b2d60fa symbol_id: 0xd36400de symbol_id: 0x2c08983d + symbol_id: 0xcf7b4665 + symbol_id: 0x4223a490 + symbol_id: 0x559ad4b8 symbol_id: 0x81bdacc5 symbol_id: 0x554af0ba symbol_id: 0xbb54c900 diff --git a/android/abi_gki_aarch64_lenovo b/android/abi_gki_aarch64_lenovo index 7a8be12c77d4..1cc068b7f779 100644 --- a/android/abi_gki_aarch64_lenovo +++ b/android/abi_gki_aarch64_lenovo @@ -205,6 +205,9 @@ cpu_topology crc32_le css_next_child + css_task_iter_end + css_task_iter_next + css_task_iter_start csum_partial _ctype debugfs_attr_read From fb310d468a41c61f9dc9c0be165b7e021a5d2ca9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 10 Apr 2024 21:05:13 +0200 Subject: [PATCH 74/98] UPSTREAM: netfilter: nft_set_pipapo: do not free live element [ Upstream commit 3cfc9ec039af60dbd8965ae085b2c2ccdcfbe1cc ] Pablo reports a crash with large batches of elements with a back-to-back add/remove pattern. Quoting Pablo: add_elem("00000000") timeout 100 ms ... add_elem("0000000X") timeout 100 ms del_elem("0000000X") <---------------- delete one that was just added ... add_elem("00005000") timeout 100 ms 1) nft_pipapo_remove() removes element 0000000X Then, KASAN shows a splat. Looking at the remove function there is a chance that we will drop a rule that maps to a non-deactivated element. Removal happens in two steps, first we do a lookup for key k and return the to-be-removed element and mark it as inactive in the next generation. Then, in a second step, the element gets removed from the set/map. The _remove function does not work correctly if we have more than one element that share the same key. This can happen if we insert an element into a set when the set already holds an element with same key, but the element mapping to the existing key has timed out or is not active in the next generation. In such case its possible that removal will unmap the wrong element. If this happens, we will leak the non-deactivated element, it becomes unreachable. The element that got deactivated (and will be freed later) will remain reachable in the set data structure, this can result in a crash when such an element is retrieved during lookup (stale pointer). Add a check that the fully matching key does in fact map to the element that we have marked as inactive in the deactivation step. If not, we need to continue searching. Add a bug/warn trap at the end of the function as well, the remove function must not ever be called with an invisible/unreachable/non-existent element. v2: avoid uneeded temporary variable (Stefano) Bug: 336735501 Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Reported-by: Pablo Neira Ayuso Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin (cherry picked from commit ebf7c9746f073035ee26209e38c3a1170f7b349a) Signed-off-by: Lee Jones Change-Id: Ic9a48ac9ac0f9960fea9e066d9a0a9fb93f7b633 --- net/netfilter/nft_set_pipapo.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 050672ccfa7e..381c1871be27 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1975,6 +1975,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, rules_fx = rules_f0; nft_pipapo_for_each_field(f, i, m) { + bool last = i == m->field_count - 1; + if (!pipapo_match_field(f, start, rules_fx, match_start, match_end)) break; @@ -1987,16 +1989,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); - } - if (i == m->field_count) { - priv->dirty = true; - pipapo_drop(m, rulemap); - return; + if (last && f->mt[rulemap[i].to].e == e) { + priv->dirty = true; + pipapo_drop(m, rulemap); + return; + } } first_rule += rules_f0; } + + WARN_ON_ONCE(1); /* elem_priv not found */ } /** From 24e6758060b8a5bf7366892080d968962a5420e2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Oct 2022 16:22:53 +0000 Subject: [PATCH 75/98] BACKPORT: rcu: Fix missing nocb gp wake on rcu_barrier() In preparation for RCU lazy changes, wake up the RCU nocb gp thread if needed after an entrain. This change prevents the RCU barrier callback from waiting in the queue for several seconds before the lazy callbacks in front of it are serviced. Reported-by: Joel Fernandes (Google) Signed-off-by: Frederic Weisbecker Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney (cherry picked from commit b8f7aca3f0e0e6223094ba2662bac90353674b04 https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git rcu/next) (Backport: Conflicts: kernel/rcu/tree.c Due to missing 'rcu: Rework rcu_barrier() and callback-migration logic' Chose not to backport that.) Bug: 258241771 Bug: 222463781 Test: CQ Change-Id: Ib55c5886764b74df22531eca35f076ef7acc08dd Signed-off-by: Joel Fernandes Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4062165 Reviewed-by: Vineeth Pillai (cherry picked from commit fc6e55ea65dca9cc52bda6081341f3fcc87f6ee7) [Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style] Signed-off-by: Qais Yousef --- kernel/rcu/tree.c | 11 +++++++++++ kernel/rcu/tree.h | 1 + kernel/rcu/tree_nocb.h | 5 +++++ 3 files changed, 17 insertions(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index be627fb32a91..a0989afc9980 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4003,12 +4003,21 @@ static void rcu_barrier_func(void *cpu_in) { uintptr_t cpu = (uintptr_t)cpu_in; struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + bool wake_nocb = false; + bool was_alldone = false; rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence); rdp->barrier_head.func = rcu_barrier_callback; debug_rcu_head_queue(&rdp->barrier_head); rcu_nocb_lock(rdp); + /* + * Flush bypass and wakeup rcuog if we add callbacks to an empty regular + * queue. This way we don't wait for bypass timer that can reach seconds + * if it's fully lazy. + */ + was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist); WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); + wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist); if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) { atomic_inc(&rcu_state.barrier_cpu_count); } else { @@ -4017,6 +4026,8 @@ static void rcu_barrier_func(void *cpu_in) rcu_state.barrier_sequence); } rcu_nocb_unlock(rdp); + if (wake_nocb) + wake_nocb_gp(rdp, false); } /** diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 222a5a59f535..168b1b84b138 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -437,6 +437,7 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp); static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp); static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq); static void rcu_init_one_nocb(struct rcu_node *rnp); +static bool wake_nocb_gp(struct rcu_data *rdp, bool force); static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, unsigned long j); static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 8fdf44f8523f..1b74e65399e0 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1449,6 +1449,11 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) { } +static bool wake_nocb_gp(struct rcu_data *rdp, bool force) +{ + return false; +} + static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, unsigned long j) { From 276d33f21a0ebbe96332dac67a2cd9ba1958346d Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sat, 17 Sep 2022 16:41:59 +0000 Subject: [PATCH 76/98] UPSTREAM: rcu: Fix late wakeup when flush of bypass cblist happens When the bypass cblist gets too big or its timeout has occurred, it is flushed into the main cblist. However, the bypass timer is still running and the behavior is that it would eventually expire and wake the GP thread. Since we are going to use the bypass cblist for lazy CBs, do the wakeup soon as the flush for "too big or too long" bypass list happens. Otherwise, long delays can happen for callbacks which get promoted from lazy to non-lazy. This is a good thing to do anyway (regardless of future lazy patches), since it makes the behavior consistent with behavior of other code paths where flushing into the ->cblist makes the GP kthread into a non-sleeping state quickly. [ Frederic Weisbecker: Changes to avoid unnecessary GP-thread wakeups plus comment changes. ] Reviewed-by: Frederic Weisbecker Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney (cherry picked from commit b50606f35f4b73c8e4c6b9c64fe7ba72ea919134) Bug: 258241771 Bug: 222463781 Test: powerIdle lab tests. Change-Id: If8da96d7ba6ed90a2a70f7d56f7bb03af44fd649 Signed-off-by: Joel Fernandes Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4065239 Reviewed-by: Vineeth Pillai (cherry picked from commit 75db04e1eed1756a4ee5fb87ef8dd494d19bf53f) [Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style] Signed-off-by: Qais Yousef --- kernel/rcu/tree_nocb.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 1b74e65399e0..869f28eaddcf 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -438,8 +438,9 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) || ncbs >= qhimark) { rcu_nocb_lock(rdp); + *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); + if (!rcu_nocb_flush_bypass(rdp, rhp, j)) { - *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); if (*was_alldone) trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstQ")); @@ -452,7 +453,12 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, rcu_advance_cbs_nowake(rdp->mynode, rdp); rdp->nocb_gp_adv_time = j; } - rcu_nocb_unlock_irqrestore(rdp, flags); + + // The flush succeeded and we moved CBs into the regular list. + // Don't wait for the wake up timer as it may be too far ahead. + // Wake up the GP thread now instead, if the cblist was empty. + __call_rcu_nocb_wake(rdp, *was_alldone, flags); + return true; // Callback already enqueued. } From e0297c38a54d51304c722405823a5e029ab6a091 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sun, 16 Oct 2022 16:22:54 +0000 Subject: [PATCH 77/98] BACKPORT: rcu: Make call_rcu() lazy to save power Implement timer-based RCU callback batching (also known as lazy callbacks). With this we save about 5-10% of power consumed due to RCU requests that happen when system is lightly loaded or idle. By default, all async callbacks (queued via call_rcu) are marked lazy. An alternate API call_rcu_hurry() is provided for the few users, for example synchronize_rcu(), that need the old behavior. The batch is flushed whenever a certain amount of time has passed, or the batch on a particular CPU grows too big. Also memory pressure will flush it in a future patch. To handle several corner cases automagically (such as rcu_barrier() and hotplug), we re-use bypass lists which were originally introduced to address lock contention, to handle lazy CBs as well. The bypass list length has the lazy CB length included in it. A separate lazy CB length counter is also introduced to keep track of the number of lazy CBs. [ paulmck: Fix formatting of inline call_rcu_lazy() definition. ] [ paulmck: Apply Zqiang feedback. ] [ paulmck: Apply s/call_rcu_flush/call_rcu_hurry/ feedback from Tejun Heo. ] [ joelaf: Small changes for 5.15 backport. ] Suggested-by: Paul McKenney Acked-by: Frederic Weisbecker Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney Bug: 258241771 Bug: 222463781 Test: CQ (cherry picked from commit 3cb278e73be58bfb780ecd55129296d2f74c1fb7 https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master) Change-Id: I557d5af2a5d317bd66e9ec55ed40822bb5c54390 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318045 Reviewed-by: Vineeth Pillai Commit-Queue: Joel Fernandes Tested-by: Joel Fernandes (cherry picked from commit b30e520b9da88a5de115ed5b2c1b2aa89de9e214) [Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style] Signed-off-by: Qais Yousef --- include/linux/rcupdate.h | 9 +++ kernel/rcu/Kconfig | 8 ++ kernel/rcu/rcu.h | 8 ++ kernel/rcu/tiny.c | 2 +- kernel/rcu/tree.c | 46 +++++++++-- kernel/rcu/tree.h | 11 ++- kernel/rcu/tree_exp.h | 2 +- kernel/rcu/tree_nocb.h | 159 +++++++++++++++++++++++++++++++-------- 8 files changed, 201 insertions(+), 44 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 13bddb841ceb..3aad9ebfa7af 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -81,6 +81,15 @@ static inline int rcu_preempt_depth(void) #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_RCU_LAZY +void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func); +#else +static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) +{ + call_rcu(head, func); +} +#endif + /* Internal to kernel */ void rcu_init(void); extern int rcu_scheduler_active __read_mostly; diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index 066be3bb9d77..8e31e315a6f5 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -274,4 +274,12 @@ config TASKS_TRACE_RCU_READ_MB Say N here if you hate read-side memory barriers. Take the default if you are unsure. +config RCU_LAZY + bool "RCU callback lazy invocation functionality" + depends on RCU_NOCB_CPU + default n + help + To save power, batch RCU callbacks and flush after delay, memory + pressure, or callback list growing too big. + endmenu # "RCU Subsystem" diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 5510d2231c32..075c4e3ebab9 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -459,6 +459,14 @@ enum rcutorture_type { INVALID_RCU_FLAVOR }; +#if defined(CONFIG_RCU_LAZY) +unsigned long rcu_lazy_get_jiffies_till_flush(void); +void rcu_lazy_set_jiffies_till_flush(unsigned long j); +#else +static inline unsigned long rcu_lazy_get_jiffies_till_flush(void) { return 0; } +static inline void rcu_lazy_set_jiffies_till_flush(unsigned long j) { } +#endif + #if defined(CONFIG_TREE_RCU) void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, unsigned long *gp_seq); diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 340b3f8b090d..457684ad1627 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -44,7 +44,7 @@ static struct rcu_ctrlblk rcu_ctrlblk = { void rcu_barrier(void) { - wait_rcu_gp(call_rcu); + wait_rcu_gp(call_rcu_hurry); } EXPORT_SYMBOL(rcu_barrier); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a0989afc9980..2c7138ac0b60 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2976,9 +2976,8 @@ static void check_cb_ovld(struct rcu_data *rdp) raw_spin_unlock_rcu_node(rnp); } -/* Helper function for call_rcu() and friends. */ static void -__call_rcu(struct rcu_head *head, rcu_callback_t func) +__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy) { static atomic_t doublefrees; unsigned long flags; @@ -3019,7 +3018,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func) } check_cb_ovld(rdp); - if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags)) + if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy)) return; // Enqueued onto ->nocb_bypass, so just leave. // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock. rcu_segcblist_enqueue(&rdp->cblist, head); @@ -3042,8 +3041,40 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func) } } +#ifdef CONFIG_RCU_LAZY +/** + * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and + * flush all lazy callbacks (including the new one) to the main ->cblist while + * doing so. + * + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed. + * + * Use this API instead of call_rcu() if you don't want the callback to be + * invoked after very long periods of time, which can happen on systems without + * memory pressure and on systems which are lightly loaded or mostly idle. + * This function will cause callbacks to be invoked sooner than later at the + * expense of extra power. Other than that, this function is identical to, and + * reuses call_rcu()'s logic. Refer to call_rcu() for more details about memory + * ordering and other functionality. + */ +void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) +{ + return __call_rcu_common(head, func, false); +} +EXPORT_SYMBOL_GPL(call_rcu_hurry); +#endif + /** * call_rcu() - Queue an RCU callback for invocation after a grace period. + * By default the callbacks are 'lazy' and are kept hidden from the main + * ->cblist to prevent starting of grace periods too soon. + * If you desire grace periods to start very soon, use call_rcu_hurry(). + * * @head: structure to be used for queueing the RCU updates. * @func: actual callback function to be invoked after the grace period * @@ -3084,11 +3115,10 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func) */ void call_rcu(struct rcu_head *head, rcu_callback_t func) { - __call_rcu(head, func); + return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY)); } EXPORT_SYMBOL_GPL(call_rcu); - /* Maximum number of jiffies to wait before draining a batch. */ #define KFREE_DRAIN_JIFFIES (HZ / 50) #define KFREE_N_BATCHES 2 @@ -3797,7 +3827,7 @@ void synchronize_rcu(void) if (rcu_gp_is_expedited()) synchronize_rcu_expedited(); else - wait_rcu_gp(call_rcu); + wait_rcu_gp(call_rcu_hurry); } EXPORT_SYMBOL_GPL(synchronize_rcu); @@ -4016,7 +4046,7 @@ static void rcu_barrier_func(void *cpu_in) * if it's fully lazy. */ was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist); - WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); + WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false)); wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist); if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) { atomic_inc(&rcu_state.barrier_cpu_count); @@ -4410,7 +4440,7 @@ void rcutree_migrate_callbacks(int cpu) my_rdp = this_cpu_ptr(&rcu_data); my_rnp = my_rdp->mynode; rcu_nocb_lock(my_rdp); /* irqs already disabled. */ - WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies)); + WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies, false)); raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */ /* Leverage recent GPs and set GP for new callbacks. */ needwake = rcu_advance_cbs(my_rnp, rdp) || diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 168b1b84b138..19809d07d92a 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -258,14 +258,16 @@ struct rcu_data { short rcu_onl_gp_flags; /* ->gp_flags at last online. */ unsigned long last_fqs_resched; /* Time of last rcu_resched(). */ + long lazy_len; /* Length of buffered lazy callbacks. */ int cpu; }; /* Values for nocb_defer_wakeup field in struct rcu_data. */ #define RCU_NOCB_WAKE_NOT 0 #define RCU_NOCB_WAKE_BYPASS 1 -#define RCU_NOCB_WAKE 2 -#define RCU_NOCB_WAKE_FORCE 3 +#define RCU_NOCB_WAKE_LAZY 2 +#define RCU_NOCB_WAKE 3 +#define RCU_NOCB_WAKE_FORCE 4 #define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500)) /* For jiffies_till_first_fqs and */ @@ -439,9 +441,10 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq); static void rcu_init_one_nocb(struct rcu_node *rnp); static bool wake_nocb_gp(struct rcu_data *rdp, bool force); static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - unsigned long j); + unsigned long j, bool lazy); static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - bool *was_alldone, unsigned long flags); + bool *was_alldone, unsigned long flags, + bool lazy); static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, unsigned long flags); static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level); diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index f36b812b595b..444f3b47f0b0 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -922,7 +922,7 @@ void synchronize_rcu_expedited(void) /* If expedited grace periods are prohibited, fall back to normal. */ if (rcu_gp_is_normal()) { - wait_rcu_gp(call_rcu); + wait_rcu_gp(call_rcu_hurry); return; } diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 869f28eaddcf..2c30f32df7d1 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -261,6 +261,31 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force) return __wake_nocb_gp(rdp_gp, rdp, force, flags); } +/* + * LAZY_FLUSH_JIFFIES decides the maximum amount of time that + * can elapse before lazy callbacks are flushed. Lazy callbacks + * could be flushed much earlier for a number of other reasons + * however, LAZY_FLUSH_JIFFIES will ensure no lazy callbacks are + * left unsubmitted to RCU after those many jiffies. + */ +#define LAZY_FLUSH_JIFFIES (10 * HZ) +static unsigned long jiffies_till_flush = LAZY_FLUSH_JIFFIES; + +#ifdef CONFIG_RCU_LAZY +// To be called only from test code. +void rcu_lazy_set_jiffies_till_flush(unsigned long jif) +{ + jiffies_till_flush = jif; +} +EXPORT_SYMBOL(rcu_lazy_set_jiffies_till_flush); + +unsigned long rcu_lazy_get_jiffies_till_flush(void) +{ + return jiffies_till_flush; +} +EXPORT_SYMBOL(rcu_lazy_get_jiffies_till_flush); +#endif + /* * Arrange to wake the GP kthread for this NOCB group at some future * time when it is safe to do so. @@ -274,10 +299,14 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype, raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); /* - * Bypass wakeup overrides previous deferments. In case - * of callback storm, no need to wake up too early. + * Bypass wakeup overrides previous deferments. In case of + * callback storms, no need to wake up too early. */ - if (waketype == RCU_NOCB_WAKE_BYPASS) { + if (waketype == RCU_NOCB_WAKE_LAZY && + rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) { + mod_timer(&rdp_gp->nocb_timer, jiffies + jiffies_till_flush); + WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); + } else if (waketype == RCU_NOCB_WAKE_BYPASS) { mod_timer(&rdp_gp->nocb_timer, jiffies + 2); WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); } else { @@ -298,10 +327,13 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype, * proves to be initially empty, just return false because the no-CB GP * kthread may need to be awakened in this case. * + * Return true if there was something to be flushed and it succeeded, otherwise + * false. + * * Note that this function always returns true if rhp is NULL. */ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - unsigned long j) + unsigned long j, bool lazy) { struct rcu_cblist rcl; @@ -315,7 +347,20 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, /* Note: ->cblist.len already accounts for ->nocb_bypass contents. */ if (rhp) rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */ - rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp); + + /* + * If the new CB requested was a lazy one, queue it onto the main + * ->cblist so we can take advantage of a sooner grade period. + */ + if (lazy && rhp) { + rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, NULL); + rcu_cblist_enqueue(&rcl, rhp); + WRITE_ONCE(rdp->lazy_len, 0); + } else { + rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp); + WRITE_ONCE(rdp->lazy_len, 0); + } + rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl); WRITE_ONCE(rdp->nocb_bypass_first, j); rcu_nocb_bypass_unlock(rdp); @@ -331,13 +376,13 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, * Note that this function always returns true if rhp is NULL. */ static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - unsigned long j) + unsigned long j, bool lazy) { if (!rcu_rdp_is_offloaded(rdp)) return true; rcu_lockdep_assert_cblist_protected(rdp); rcu_nocb_bypass_lock(rdp); - return rcu_nocb_do_flush_bypass(rdp, rhp, j); + return rcu_nocb_do_flush_bypass(rdp, rhp, j, lazy); } /* @@ -350,7 +395,7 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j) if (!rcu_rdp_is_offloaded(rdp) || !rcu_nocb_bypass_trylock(rdp)) return; - WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j)); + WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false)); } /* @@ -372,12 +417,14 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j) * there is only one CPU in operation. */ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - bool *was_alldone, unsigned long flags) + bool *was_alldone, unsigned long flags, + bool lazy) { unsigned long c; unsigned long cur_gp_seq; unsigned long j = jiffies; long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); + bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len)); lockdep_assert_irqs_disabled(); @@ -422,25 +469,29 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, // If there hasn't yet been all that many ->cblist enqueues // this jiffy, tell the caller to enqueue onto ->cblist. But flush // ->nocb_bypass first. - if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy) { + // Lazy CBs throttle this back and do immediate bypass queuing. + if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy && !lazy) { rcu_nocb_lock(rdp); *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); if (*was_alldone) trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstQ")); - WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j)); + + WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j, false)); WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); return false; // Caller must enqueue the callback. } // If ->nocb_bypass has been used too long or is too full, // flush ->nocb_bypass to ->cblist. - if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) || + if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) || + (ncbs && bypass_is_lazy && + (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush))) || ncbs >= qhimark) { rcu_nocb_lock(rdp); *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); - if (!rcu_nocb_flush_bypass(rdp, rhp, j)) { + if (!rcu_nocb_flush_bypass(rdp, rhp, j, lazy)) { if (*was_alldone) trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstQ")); @@ -468,13 +519,24 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */ rcu_cblist_enqueue(&rdp->nocb_bypass, rhp); + + if (lazy) + WRITE_ONCE(rdp->lazy_len, rdp->lazy_len + 1); + if (!ncbs) { WRITE_ONCE(rdp->nocb_bypass_first, j); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ")); } rcu_nocb_bypass_unlock(rdp); smp_mb(); /* Order enqueue before wake. */ - if (ncbs) { + // A wake up of the grace period kthread or timer adjustment + // needs to be done only if: + // 1. Bypass list was fully empty before (this is the first + // bypass list entry), or: + // 2. Both of these conditions are met: + // a. The bypass list previously had only lazy CBs, and: + // b. The new CB is non-lazy. + if (ncbs && (!bypass_is_lazy || lazy)) { local_irq_restore(flags); } else { // No-CBs GP kthread might be indefinitely asleep, if so, wake. @@ -502,8 +564,10 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, unsigned long flags) __releases(rdp->nocb_lock) { + long bypass_len; unsigned long cur_gp_seq; unsigned long j; + long lazy_len; long len; struct task_struct *t; @@ -517,9 +581,16 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, } // Need to actually to a wakeup. len = rcu_segcblist_n_cbs(&rdp->cblist); + bypass_len = rcu_cblist_n_cbs(&rdp->nocb_bypass); + lazy_len = READ_ONCE(rdp->lazy_len); if (was_alldone) { rdp->qlen_last_fqs_check = len; - if (!irqs_disabled_flags(flags)) { + // Only lazy CBs in bypass list + if (lazy_len && bypass_len == lazy_len) { + rcu_nocb_unlock_irqrestore(rdp, flags); + wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY, + TPS("WakeLazy")); + } else if (!irqs_disabled_flags(flags)) { /* ... if queue was empty ... */ rcu_nocb_unlock_irqrestore(rdp, flags); wake_nocb_gp(rdp, false); @@ -612,12 +683,12 @@ static inline bool nocb_gp_update_state_deoffloading(struct rcu_data *rdp, static void nocb_gp_wait(struct rcu_data *my_rdp) { bool bypass = false; - long bypass_ncbs; int __maybe_unused cpu = my_rdp->cpu; unsigned long cur_gp_seq; unsigned long flags; bool gotcbs = false; unsigned long j = jiffies; + bool lazy = false; bool needwait_gp = false; // This prevents actual uninitialized use. bool needwake; bool needwake_gp; @@ -634,9 +705,13 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp); for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) { bool needwake_state = false; + long bypass_ncbs; + bool flush_bypass = false; + long lazy_ncbs; if (!nocb_gp_enabled_cb(rdp)) continue; + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check")); rcu_nocb_lock_irqsave(rdp, flags); if (nocb_gp_update_state_deoffloading(rdp, &needwake_state)) { @@ -646,22 +721,37 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) continue; } bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); - if (bypass_ncbs && + lazy_ncbs = READ_ONCE(rdp->lazy_len); + + if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) && + (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush) || + bypass_ncbs > 2 * qhimark)) { + flush_bypass = true; + } else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) && (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) || bypass_ncbs > 2 * qhimark)) { - // Bypass full or old, so flush it. - (void)rcu_nocb_try_flush_bypass(rdp, j); - bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); + flush_bypass = true; } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) { rcu_nocb_unlock_irqrestore(rdp, flags); if (needwake_state) swake_up_one(&rdp->nocb_state_wq); continue; /* No callbacks here, try next. */ } + + if (flush_bypass) { + // Bypass full or old, so flush it. + (void)rcu_nocb_try_flush_bypass(rdp, j); + bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); + lazy_ncbs = READ_ONCE(rdp->lazy_len); + } + if (bypass_ncbs) { trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, - TPS("Bypass")); - bypass = true; + bypass_ncbs == lazy_ncbs ? TPS("Lazy") : TPS("Bypass")); + if (bypass_ncbs == lazy_ncbs) + lazy = true; + else + bypass = true; } rnp = rdp->mynode; @@ -711,12 +801,20 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) my_rdp->nocb_gp_gp = needwait_gp; my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0; - if (bypass && !rcu_nocb_poll) { - // At least one child with non-empty ->nocb_bypass, so set - // timer in order to avoid stranding its callbacks. - wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS, - TPS("WakeBypassIsDeferred")); + // At least one child with non-empty ->nocb_bypass, so set + // timer in order to avoid stranding its callbacks. + if (!rcu_nocb_poll) { + // If bypass list only has lazy CBs. Add a deferred lazy wake up. + if (lazy && !bypass) { + wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_LAZY, + TPS("WakeLazyIsDeferred")); + // Otherwise add a deferred bypass wake up. + } else if (bypass) { + wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS, + TPS("WakeBypassIsDeferred")); + } } + if (rcu_nocb_poll) { /* Polling, so trace if first poll in the series. */ if (gotcbs) @@ -995,7 +1093,7 @@ static long rcu_nocb_rdp_deoffload(void *arg) * return false, which means that future calls to rcu_nocb_try_bypass() * will refuse to put anything into the bypass. */ - WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); + WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false)); ret = rdp_offload_toggle(rdp, false, flags); swait_event_exclusive(rdp->nocb_state_wq, !rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB | @@ -1177,6 +1275,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) raw_spin_lock_init(&rdp->nocb_gp_lock); timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0); rcu_cblist_init(&rdp->nocb_bypass); + WRITE_ONCE(rdp->lazy_len, 0); } /* @@ -1461,13 +1560,13 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force) } static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - unsigned long j) + unsigned long j, bool lazy) { return true; } static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - bool *was_alldone, unsigned long flags) + bool *was_alldone, unsigned long flags, bool lazy) { return false; } From f4abe7bb5f0077047cf2e650a841c837c3bfe7b6 Mon Sep 17 00:00:00 2001 From: Vineeth Pillai Date: Sun, 16 Oct 2022 16:22:56 +0000 Subject: [PATCH 78/98] BACKPORT: rcu: Shrinker for lazy rcu The shrinker is used to speed up the free'ing of memory potentially held by RCU lazy callbacks. RCU kernel module test cases show this to be effective. Test is introduced in a later patch. [Joel: register_shrinker() argument list change.] Bug: 258241771 Bug: 222463781 Test: CQ Change-Id: I6a73a9dae79ff35feca37abe2663e55a0f46dda8 Signed-off-by: Vineeth Pillai Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney (cherry picked from commit c945b4da7a448a9a56becc5a8745d942b2b83d3c) Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318046 Tested-by: Joel Fernandes Reviewed-by: Vineeth Pillai Commit-Queue: Joel Fernandes (cherry picked from commit 2cf50ca2e7c3bc08f5182fc517a89a65e8dca7e3) [Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style] Signed-off-by: Qais Yousef --- kernel/rcu/tree_nocb.h | 52 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 2c30f32df7d1..a4daed924191 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1215,6 +1215,55 @@ int rcu_nocb_cpu_offload(int cpu) } EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload); +static unsigned long +lazy_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + int cpu; + unsigned long count = 0; + + /* Snapshot count of all CPUs */ + for_each_possible_cpu(cpu) { + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + + count += READ_ONCE(rdp->lazy_len); + } + + return count ? count : SHRINK_EMPTY; +} + +static unsigned long +lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + int cpu; + unsigned long flags; + unsigned long count = 0; + + /* Snapshot count of all CPUs */ + for_each_possible_cpu(cpu) { + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + int _count = READ_ONCE(rdp->lazy_len); + + if (_count == 0) + continue; + rcu_nocb_lock_irqsave(rdp, flags); + WRITE_ONCE(rdp->lazy_len, 0); + rcu_nocb_unlock_irqrestore(rdp, flags); + wake_nocb_gp(rdp, false); + sc->nr_to_scan -= _count; + count += _count; + if (sc->nr_to_scan <= 0) + break; + } + return count ? count : SHRINK_STOP; +} + +static struct shrinker lazy_rcu_shrinker = { + .count_objects = lazy_rcu_shrink_count, + .scan_objects = lazy_rcu_shrink_scan, + .batch = 0, + .seeks = DEFAULT_SEEKS, +}; + void __init rcu_init_nohz(void) { int cpu; @@ -1240,6 +1289,9 @@ void __init rcu_init_nohz(void) cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask); #endif /* #if defined(CONFIG_NO_HZ_FULL) */ + if (register_shrinker(&lazy_rcu_shrinker)) + pr_err("Failed to register lazy_rcu shrinker!\n"); + if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n"); cpumask_and(rcu_nocb_mask, cpu_possible_mask, From 222a4cd66cd4483e3d28f0efe1284b6942b02e59 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sun, 16 Oct 2022 16:22:55 +0000 Subject: [PATCH 79/98] UPSTREAM: rcu: Refactor code a bit in rcu_nocb_do_flush_bypass() This consolidates the code a bit and makes it cleaner. Functionally it is the same. Bug: 258241771 Bug: 222463781 Test: CQ Reported-by: Paul E. McKenney Signed-off-by: Joel Fernandes (Google) Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney (cherry picked from commit 3d222a0c0cfef85bad2c9cff5d541836cb81cfbd) Change-Id: I8422c7138edd6a476fc46374beefdf46dd76b8b0 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318047 Tested-by: Joel Fernandes Reviewed-by: Sean Paul Reviewed-by: Vineeth Pillai Commit-Queue: Joel Fernandes (cherry picked from commit 58cb433d445d2416ba26645e8df63d86afa15f8c) [Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style] Signed-off-by: Qais Yousef --- kernel/rcu/tree_nocb.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index a4daed924191..b210af04c08a 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -332,10 +332,11 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype, * * Note that this function always returns true if rhp is NULL. */ -static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, +static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp_in, unsigned long j, bool lazy) { struct rcu_cblist rcl; + struct rcu_head *rhp = rhp_in; WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp)); rcu_lockdep_assert_cblist_protected(rdp); @@ -350,16 +351,16 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, /* * If the new CB requested was a lazy one, queue it onto the main - * ->cblist so we can take advantage of a sooner grade period. + * ->cblist so that we can take advantage of the grace-period that will + * happen regardless. But queue it onto the bypass list first so that + * the lazy CB is ordered with the existing CBs in the bypass list. */ if (lazy && rhp) { - rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, NULL); - rcu_cblist_enqueue(&rcl, rhp); - WRITE_ONCE(rdp->lazy_len, 0); - } else { - rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp); - WRITE_ONCE(rdp->lazy_len, 0); + rcu_cblist_enqueue(&rdp->nocb_bypass, rhp); + rhp = NULL; } + rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp); + WRITE_ONCE(rdp->lazy_len, 0); rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl); WRITE_ONCE(rdp->nocb_bypass_first, j); From a4cc1aa22dd3c7146a2f67f089b0ee76ebbb9696 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sun, 16 Oct 2022 16:22:59 +0000 Subject: [PATCH 80/98] UPSTREAM: rcu/sync: Use call_rcu_hurry() instead of call_rcu call_rcu() changes to save power will slow down rcu sync. Use the call_rcu_hurry() API instead which reverts to the old behavior. [ paulmck: Apply s/call_rcu_flush/call_rcu_hurry/ feedback from Tejun Heo. ] Bug: 258241771 Bug: 222463781 Test: CQ Change-Id: I5123ba52f47676305dbcfa1233bf3b41f140766c Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney (cherry picked from commit 7651d6b25086656eacfdd8356bfe3a21c0c2d79d) Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318048 Reviewed-by: Sean Paul Commit-Queue: Joel Fernandes Reviewed-by: Vineeth Pillai Tested-by: Joel Fernandes (cherry picked from commit 183fce4e1bfbbae1266ec90c6bb871b51d7af81c) [Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style] Signed-off-by: Qais Yousef --- kernel/rcu/sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index 33d896d85902..c6bae4050d53 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -44,7 +44,7 @@ static void rcu_sync_func(struct rcu_head *rhp); static void rcu_sync_call(struct rcu_sync *rsp) { - call_rcu(&rsp->cb_head, rcu_sync_func); + call_rcu_hurry(&rsp->cb_head, rcu_sync_func); } /** From ff22b562f0a6573bfb42acaacf777d68a76bde79 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sun, 16 Oct 2022 16:22:58 +0000 Subject: [PATCH 81/98] UPSTREAM: percpu-refcount: Use call_rcu_hurry() for atomic switch Earlier commits in this series allow battery-powered systems to build their kernels with the default-disabled CONFIG_RCU_LAZY=y Kconfig option. This Kconfig option causes call_rcu() to delay its callbacks in order to batch callbacks. This means that a given RCU grace period covers more callbacks, thus reducing the number of grace periods, in turn reducing the amount of energy consumed, which increases battery lifetime which can be a very good thing. This is not a subtle effect: In some important use cases, the battery lifetime is increased by more than 10%. This CONFIG_RCU_LAZY=y option is available only for CPUs that offload callbacks, for example, CPUs mentioned in the rcu_nocbs kernel boot parameter passed to kernels built with CONFIG_RCU_NOCB_CPU=y. Delaying callbacks is normally not a problem because most callbacks do nothing but free memory. If the system is short on memory, a shrinker will kick all currently queued lazy callbacks out of their laziness, thus freeing their memory in short order. Similarly, the rcu_barrier() function, which blocks until all currently queued callbacks are invoked, will also kick lazy callbacks, thus enabling rcu_barrier() to complete in a timely manner. However, there are some cases where laziness is not a good option. For example, synchronize_rcu() invokes call_rcu(), and blocks until the newly queued callback is invoked. It would not be a good for synchronize_rcu() to block for ten seconds, even on an idle system. Therefore, synchronize_rcu() invokes call_rcu_hurry() instead of call_rcu(). The arrival of a non-lazy call_rcu_hurry() callback on a given CPU kicks any lazy callbacks that might be already queued on that CPU. After all, if there is going to be a grace period, all callbacks might as well get full benefit from it. Yes, this could be done the other way around by creating a call_rcu_lazy(), but earlier experience with this approach and feedback at the 2022 Linux Plumbers Conference shifted the approach to call_rcu() being lazy with call_rcu_hurry() for the few places where laziness is inappropriate. And another call_rcu() instance that cannot be lazy is the one on the percpu refcounter's "per-CPU to atomic switch" code path, which uses RCU when switching to atomic mode. The enqueued callback wakes up waiters waiting in the percpu_ref_switch_waitq. Allowing this callback to be lazy would result in unacceptable slowdowns for users of per-CPU refcounts, such as blk_pre_runtime_suspend(). Therefore, make __percpu_ref_switch_to_atomic() use call_rcu_hurry() in order to revert to the old behavior. [ paulmck: Apply s/call_rcu_flush/call_rcu_hurry/ feedback from Tejun Heo. ] Bug: 258241771 Bug: 222463781 Test: CQ Change-Id: Icc325f69d0df1a37b6f1de02a284e1fabf20e366 Signed-off-by: Joel Fernandes (Google) Acked-by: Tejun Heo Signed-off-by: Paul E. McKenney Cc: Dennis Zhou Cc: Christoph Lameter Cc: (cherry picked from commit 343a72e5e37d380b70534fae3acd7e5e39adb769) Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318049 Reviewed-by: Vineeth Pillai Reviewed-by: Sean Paul Tested-by: Joel Fernandes Commit-Queue: Joel Fernandes (cherry picked from commit dfd536f499642cd18679cc64c79a8fb275137f45) [Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style] Signed-off-by: Qais Yousef --- lib/percpu-refcount.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index e5c5315da274..668f6aa6a75d 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -230,7 +230,8 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, percpu_ref_noop_confirm_switch; percpu_ref_get(ref); /* put after confirmation */ - call_rcu(&ref->data->rcu, percpu_ref_switch_to_atomic_rcu); + call_rcu_hurry(&ref->data->rcu, + percpu_ref_switch_to_atomic_rcu); } static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) From f12c162eac7c951a5548439fda98aeac53f1bf33 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Fri, 18 Nov 2022 19:19:08 +0000 Subject: [PATCH 82/98] UPSTREAM: net: Use call_rcu_hurry() for dst_release() In a networking test on ChromeOS, kernels built with the new CONFIG_RCU_LAZY=y Kconfig option fail a networking test in the teardown phase. This failure may be reproduced as follows: ip netns del The CONFIG_RCU_LAZY=y Kconfig option was introduced by earlier commits in this series for the benefit of certain battery-powered systems. This Kconfig option causes call_rcu() to delay its callbacks in order to batch them. This means that a given RCU grace period covers more callbacks, thus reducing the number of grace periods, in turn reducing the amount of energy consumed, which increases battery lifetime which can be a very good thing. This is not a subtle effect: In some important use cases, the battery lifetime is increased by more than 10%. This CONFIG_RCU_LAZY=y option is available only for CPUs that offload callbacks, for example, CPUs mentioned in the rcu_nocbs kernel boot parameter passed to kernels built with CONFIG_RCU_NOCB_CPU=y. Delaying callbacks is normally not a problem because most callbacks do nothing but free memory. If the system is short on memory, a shrinker will kick all currently queued lazy callbacks out of their laziness, thus freeing their memory in short order. Similarly, the rcu_barrier() function, which blocks until all currently queued callbacks are invoked, will also kick lazy callbacks, thus enabling rcu_barrier() to complete in a timely manner. However, there are some cases where laziness is not a good option. For example, synchronize_rcu() invokes call_rcu(), and blocks until the newly queued callback is invoked. It would not be a good for synchronize_rcu() to block for ten seconds, even on an idle system. Therefore, synchronize_rcu() invokes call_rcu_hurry() instead of call_rcu(). The arrival of a non-lazy call_rcu_hurry() callback on a given CPU kicks any lazy callbacks that might be already queued on that CPU. After all, if there is going to be a grace period, all callbacks might as well get full benefit from it. Yes, this could be done the other way around by creating a call_rcu_lazy(), but earlier experience with this approach and feedback at the 2022 Linux Plumbers Conference shifted the approach to call_rcu() being lazy with call_rcu_hurry() for the few places where laziness is inappropriate. Returning to the test failure, use of ftrace showed that this failure cause caused by the aadded delays due to this new lazy behavior of call_rcu() in kernels built with CONFIG_RCU_LAZY=y. Therefore, make dst_release() use call_rcu_hurry() in order to revert to the old test-failure-free behavior. [ paulmck: Apply s/call_rcu_flush/call_rcu_hurry/ feedback from Tejun Heo. ] Bug: 258241771 Bug: 222463781 Test: CQ Change-Id: Ifd64083bd210a9dfe94c179152f27d310c179507 Signed-off-by: Joel Fernandes (Google) Cc: David Ahern Cc: "David S. Miller" Cc: Hideaki YOSHIFUJI Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Reviewed-by: Eric Dumazet Signed-off-by: Paul E. McKenney (cherry picked from commit 483c26ff63f42e8898ed43aca0b9953bc91f0cd4) Signed-off-by: Joel Fernandes Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318050 Reviewed-by: Sean Paul Reviewed-by: Vineeth Pillai (cherry picked from commit e0886387489fed8a60e7e0f107b95fb9c0241930) [Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style] Signed-off-by: Qais Yousef --- net/core/dst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dst.c b/net/core/dst.c index 497ef9b3fc6a..a64acdb69f56 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -174,7 +174,7 @@ void dst_release(struct dst_entry *dst) net_warn_ratelimited("%s: dst:%p refcnt:%d\n", __func__, dst, newrefcnt); if (!newrefcnt) - call_rcu(&dst->rcu_head, dst_destroy_rcu); + call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu); } } EXPORT_SYMBOL(dst_release); From 856859371956b4d39ad7affe286f516d8253e736 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Thu, 12 Jan 2023 00:52:22 +0000 Subject: [PATCH 83/98] UPSTREAM: rcu: Track laziness during boot and suspend Boot and suspend/resume should not be slowed down in kernels built with CONFIG_RCU_LAZY=y. In particular, suspend can sometimes fail in such kernels. This commit therefore adds rcu_async_hurry(), rcu_async_relax(), and rcu_async_should_hurry() functions that track whether or not either a boot or a suspend/resume operation is in progress. This will enable a later commit to refrain from laziness during those times. Export rcu_async_should_hurry(), rcu_async_hurry(), and rcu_async_relax() for later use by rcutorture. [ paulmck: Apply feedback from Steve Rostedt. ] Bug: 258241771 Bug: 222463781 Test: CQ Fixes: 3cb278e73be5 ("rcu: Make call_rcu() lazy to save power") Change-Id: Ieb2f2d484a33cfbd71f71c8e3dbcfc05cd7efe8c Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney (cherry picked from commit 6efdda8bec2900ce5166ee4ff4b1844b47b529cd) Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318051 Reviewed-by: Vineeth Pillai Reviewed-by: Sean Paul Tested-by: Joel Fernandes Commit-Queue: Joel Fernandes (cherry picked from commit 8bc7efc64c84da753f2174a7071c8f1a7823d2bb) [Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style] Signed-off-by: Qais Yousef --- kernel/rcu/rcu.h | 6 ++++++ kernel/rcu/tree.c | 2 ++ kernel/rcu/update.c | 40 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 075c4e3ebab9..fd19addb1d27 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -428,14 +428,20 @@ do { \ /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ static inline bool rcu_gp_is_normal(void) { return true; } static inline bool rcu_gp_is_expedited(void) { return false; } +static inline bool rcu_async_should_hurry(void) { return false; } static inline void rcu_expedite_gp(void) { } static inline void rcu_unexpedite_gp(void) { } +static inline void rcu_async_hurry(void) { } +static inline void rcu_async_relax(void) { } static inline void rcu_request_urgent_qs_task(struct task_struct *t) { } #else /* #ifdef CONFIG_TINY_RCU */ bool rcu_gp_is_normal(void); /* Internal RCU use. */ bool rcu_gp_is_expedited(void); /* Internal RCU use. */ +bool rcu_async_should_hurry(void); /* Internal RCU use. */ void rcu_expedite_gp(void); void rcu_unexpedite_gp(void); +void rcu_async_hurry(void); +void rcu_async_relax(void); void rcupdate_announce_bootup_oddness(void); #ifdef CONFIG_TASKS_RCU_GENERIC void show_rcu_tasks_gp_kthreads(void); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 2c7138ac0b60..f033bffd47e7 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4478,11 +4478,13 @@ static int rcu_pm_notify(struct notifier_block *self, switch (action) { case PM_HIBERNATION_PREPARE: case PM_SUSPEND_PREPARE: + rcu_async_hurry(); rcu_expedite_gp(); break; case PM_POST_HIBERNATION: case PM_POST_SUSPEND: rcu_unexpedite_gp(); + rcu_async_relax(); break; default: break; diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index c21b38cc25e9..a90458c7b4f2 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -144,8 +144,45 @@ bool rcu_gp_is_normal(void) } EXPORT_SYMBOL_GPL(rcu_gp_is_normal); -static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1); +static atomic_t rcu_async_hurry_nesting = ATOMIC_INIT(1); +/* + * Should call_rcu() callbacks be processed with urgency or are + * they OK being executed with arbitrary delays? + */ +bool rcu_async_should_hurry(void) +{ + return !IS_ENABLED(CONFIG_RCU_LAZY) || + atomic_read(&rcu_async_hurry_nesting); +} +EXPORT_SYMBOL_GPL(rcu_async_should_hurry); +/** + * rcu_async_hurry - Make future async RCU callbacks not lazy. + * + * After a call to this function, future calls to call_rcu() + * will be processed in a timely fashion. + */ +void rcu_async_hurry(void) +{ + if (IS_ENABLED(CONFIG_RCU_LAZY)) + atomic_inc(&rcu_async_hurry_nesting); +} +EXPORT_SYMBOL_GPL(rcu_async_hurry); + +/** + * rcu_async_relax - Make future async RCU callbacks lazy. + * + * After a call to this function, future calls to call_rcu() + * will be processed in a lazy fashion. + */ +void rcu_async_relax(void) +{ + if (IS_ENABLED(CONFIG_RCU_LAZY)) + atomic_dec(&rcu_async_hurry_nesting); +} +EXPORT_SYMBOL_GPL(rcu_async_relax); + +static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1); /* * Should normal grace-period primitives be expedited? Intended for * use within RCU. Note that this function takes the rcu_expedited @@ -195,6 +232,7 @@ static bool rcu_boot_ended __read_mostly; void rcu_end_inkernel_boot(void) { rcu_unexpedite_gp(); + rcu_async_relax(); if (rcu_normal_after_boot) WRITE_ONCE(rcu_normal, 1); rcu_boot_ended = true; From 706e751b3331d85ba38dbe5654961fa0eb155715 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Thu, 12 Jan 2023 00:52:23 +0000 Subject: [PATCH 84/98] UPSTREAM: rcu: Disable laziness if lazy-tracking says so During suspend, we see failures to suspend 1 in 300-500 suspends. Looking closer, it appears that asynchronous RCU callbacks are being queued as lazy even though synchronous callbacks are expedited. These delays appear to not be very welcome by the suspend/resume code as evidenced by these occasional suspend failures. This commit modifies call_rcu() to check if rcu_async_should_hurry(), which will return true if we are in suspend or in-kernel boot. [ paulmck: Alphabetize local variables. ] Ignoring the lazy hint makes the 3000 suspend/resume cycles pass reliably on a 12th gen 12-core Intel CPU, and there is some evidence that it also slightly speeds up boot performance. Bug: 258241771 Bug: 222463781 Test: CQ Fixes: 3cb278e73be5 ("rcu: Make call_rcu() lazy to save power") Change-Id: I4cfe6f43de8bae9a6c034831c79d9773199d6d29 Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney (cherry picked from commit cf7066b97e27b2319af1ae2ef6889c4a1704312d) Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318052 Reviewed-by: Sean Paul Reviewed-by: Vineeth Pillai Tested-by: Joel Fernandes Commit-Queue: Joel Fernandes (cherry picked from commit e59686da91b689d3771a09f3eae37db5f40d3f75) [Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style] Signed-off-by: Qais Yousef --- kernel/rcu/tree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f033bffd47e7..1f7f05aa11a5 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2977,10 +2977,11 @@ static void check_cb_ovld(struct rcu_data *rdp) } static void -__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy) +__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) { static atomic_t doublefrees; unsigned long flags; + bool lazy; struct rcu_data *rdp; bool was_alldone; @@ -3005,6 +3006,7 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy) local_irq_save(flags); kasan_record_aux_stack_noalloc(head); rdp = this_cpu_ptr(&rcu_data); + lazy = lazy_in && !rcu_async_should_hurry(); /* Add the callback to our list. */ if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist))) { From 930bdc0924849b9e0d7f660e440dff182ab04259 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Nov 2022 19:19:09 +0000 Subject: [PATCH 85/98] UPSTREAM: net: devinet: Reduce refcount before grace period Currently, the inetdev_destroy() function waits for an RCU grace period before decrementing the refcount and freeing memory. This causes a delay with a new RCU configuration that tries to save power, which results in the network interface disappearing later than expected. The resulting delay causes test failures on ChromeOS. Refactor the code such that the refcount is freed before the grace period and memory is freed after. With this a ChromeOS network test passes that does 'ip netns del' and polls for an interface disappearing, now passes. Bug: 258241771 Bug: 222463781 Test: CQ Reported-by: Joel Fernandes (Google) Change-Id: I98b13c5a8fb9696c1111219d774cf91c8b14b4c5 Signed-off-by: Eric Dumazet Signed-off-by: Joel Fernandes (Google) Cc: David Ahern Cc: "David S. Miller" Cc: Hideaki YOSHIFUJI Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Signed-off-by: Paul E. McKenney (cherry picked from commit 9d40c84cf5bcb5b1d124921ded2056d76be7640d) Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318054 Tested-by: Joel Fernandes Reviewed-by: Vineeth Pillai Commit-Queue: Joel Fernandes Reviewed-by: Sean Paul (cherry picked from commit 3c0f4bb182d6b0be5424947b53019e92bea8b38c) [Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style] Signed-off-by: Qais Yousef --- net/ipv4/devinet.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c511751c2f41..b95ccab89acd 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -231,13 +231,20 @@ static void inet_free_ifa(struct in_ifaddr *ifa) call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); } +static void in_dev_free_rcu(struct rcu_head *head) +{ + struct in_device *idev = container_of(head, struct in_device, rcu_head); + + kfree(rcu_dereference_protected(idev->mc_hash, 1)); + kfree(idev); +} + void in_dev_finish_destroy(struct in_device *idev) { struct net_device *dev = idev->dev; WARN_ON(idev->ifa_list); WARN_ON(idev->mc_list); - kfree(rcu_dereference_protected(idev->mc_hash, 1)); #ifdef NET_REFCNT_DEBUG pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); #endif @@ -245,7 +252,7 @@ void in_dev_finish_destroy(struct in_device *idev) if (!idev->dead) pr_err("Freeing alive in_device %p\n", idev); else - kfree(idev); + call_rcu(&idev->rcu_head, in_dev_free_rcu); } EXPORT_SYMBOL(in_dev_finish_destroy); @@ -295,12 +302,6 @@ out_kfree: goto out; } -static void in_dev_rcu_put(struct rcu_head *head) -{ - struct in_device *idev = container_of(head, struct in_device, rcu_head); - in_dev_put(idev); -} - static void inetdev_destroy(struct in_device *in_dev) { struct net_device *dev; @@ -325,7 +326,7 @@ static void inetdev_destroy(struct in_device *in_dev) neigh_parms_release(&arp_tbl, in_dev->arp_parms); arp_ifdown(dev); - call_rcu(&in_dev->rcu_head, in_dev_rcu_put); + in_dev_put(in_dev); } int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) From a4124a21b121fef29df694a8771239a76bb87166 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sun, 16 Oct 2022 16:23:04 +0000 Subject: [PATCH 86/98] ANDROID: rxrpc: Use call_rcu_hurry() instead of call_rcu() call_rcu() changes to save power may cause slowness. Use the call_rcu_hurry() API instead which reverts to the old behavior. We find this via inspection that the RCU callback does a wakeup of a thread. This usually indicates that something is waiting on it. To be safe, let us use call_rcu_hurry() here instead. [ joel: Upstream is rewriting this code, so I am merging this as a CHROMIUM patch. There is no harm in including it. Link: https://lore.kernel.org/rcu/658624.1669849522@warthog.procyon.org.uk/#t ] Bug: 258241771 Bug: 222463781 Test: CQ Signed-off-by: Joel Fernandes (Google) Change-Id: Iaadfe2f9db189489915828c6f2f74522f4b90ea3 Signed-off-by: Joel Fernandes Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/3965078 Reviewed-by: Ross Zwisler Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318055 Reviewed-by: Vineeth Pillai (cherry picked from commit 1f98f32393f83d14bc290fef06d5b3132bee23e0) [Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style] Signed-off-by: Qais Yousef --- net/rxrpc/conn_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 22089e37e97f..9c5fae9ca106 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -253,7 +253,7 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn) * must carry a ref on the connection to prevent us getting here whilst * it is queued or running. */ - call_rcu(&conn->rcu, rxrpc_destroy_connection); + call_rcu_hurry(&conn->rcu, rxrpc_destroy_connection); } /* From 84828604c7299ecf420eed959cba81da19fa7fe1 Mon Sep 17 00:00:00 2001 From: Uladzislau Rezki Date: Sun, 16 Oct 2022 16:23:02 +0000 Subject: [PATCH 87/98] UPSTREAM: scsi/scsi_error: Use call_rcu_hurry() instead of call_rcu() Earlier commits in this series allow battery-powered systems to build their kernels with the default-disabled CONFIG_RCU_LAZY=y Kconfig option. This Kconfig option causes call_rcu() to delay its callbacks in order to batch them. This means that a given RCU grace period covers more callbacks, thus reducing the number of grace periods, in turn reducing the amount of energy consumed, which increases battery lifetime which can be a very good thing. This is not a subtle effect: In some important use cases, the battery lifetime is increased by more than 10%. This CONFIG_RCU_LAZY=y option is available only for CPUs that offload callbacks, for example, CPUs mentioned in the rcu_nocbs kernel boot parameter passed to kernels built with CONFIG_RCU_NOCB_CPU=y. Delaying callbacks is normally not a problem because most callbacks do nothing but free memory. If the system is short on memory, a shrinker will kick all currently queued lazy callbacks out of their laziness, thus freeing their memory in short order. Similarly, the rcu_barrier() function, which blocks until all currently queued callbacks are invoked, will also kick lazy callbacks, thus enabling rcu_barrier() to complete in a timely manner. However, there are some cases where laziness is not a good option. For example, synchronize_rcu() invokes call_rcu(), and blocks until the newly queued callback is invoked. It would not be a good for synchronize_rcu() to block for ten seconds, even on an idle system. Therefore, synchronize_rcu() invokes call_rcu_hurry() instead of call_rcu(). The arrival of a non-lazy call_rcu_hurry() callback on a given CPU kicks any lazy callbacks that might be already queued on that CPU. After all, if there is going to be a grace period, all callbacks might as well get full benefit from it. Yes, this could be done the other way around by creating a call_rcu_lazy(), but earlier experience with this approach and feedback at the 2022 Linux Plumbers Conference shifted the approach to call_rcu() being lazy with call_rcu_hurry() for the few places where laziness is inappropriate. And another call_rcu() instance that cannot be lazy is the one in the scsi_eh_scmd_add() function. Leaving this instance lazy results in unacceptably slow boot times. Therefore, make scsi_eh_scmd_add() use call_rcu_hurry() in order to revert to the old behavior. [ paulmck: Apply s/call_rcu_flush/call_rcu_hurry/ feedback from Tejun Heo. ] Bug: 258241771 Bug: 222463781 Test: CQ Tested-by: Joel Fernandes (Google) Change-Id: I95bba865e582b0a12b1c09ba1f0bd4f897401c07 Signed-off-by: Uladzislau Rezki Signed-off-by: Joel Fernandes (Google) Cc: "James E.J. Bottomley" Cc: Reviewed-by: Bart Van Assche Acked-by: Martin K. Petersen Signed-off-by: Paul E. McKenney (cherry picked from commit 54d87b0a0c19bc3f740e4cd4b87ba14ce2e4ea73) Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4318056 Commit-Queue: Joel Fernandes Reviewed-by: Sean Paul Reviewed-by: Vineeth Pillai Tested-by: Joel Fernandes (cherry picked from commit 5578f9ac27d25e3e57a5b9c4cf0346cfc5162994) [Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style] Signed-off-by: Qais Yousef --- drivers/scsi/scsi_error.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 8ab40c36bb88..ff1060fe44cd 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -314,7 +314,7 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd) * Ensure that all tasks observe the host state change before the * host_failed change. */ - call_rcu(&scmd->rcu, scsi_eh_inc_host_failed); + call_rcu_hurry(&scmd->rcu, scsi_eh_inc_host_failed); } /** From 5b47d8411d6cd0de35216a9edadedf4d12387715 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Thu, 2 Jun 2022 10:06:43 +0200 Subject: [PATCH 88/98] UPSTREAM: rcu/kvfree: Remove useless monitor_todo flag monitor_todo is not needed as the work struct already tracks if work is pending. Just use that to know if work is pending using schedule_delayed_work() helper. Signed-off-by: Joel Fernandes (Google) Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Paul E. McKenney Reviewed-by: Neeraj Upadhyay (cherry picked from commit 82d26c36cc68e781400eb4e541f943008208f2d6) Bug: 258241771 Bug: 222463781 Test: CQ Change-Id: I4c13f89da735a628a5030ab55a13e338b97da4b8 Signed-off-by: Joel Fernandes Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4332176 Reviewed-by: Sean Paul Reviewed-by: Vineeth Pillai (cherry picked from commit bb867be28d6a70b36ff1d6563f794c489072ab7e) [Minor conflict with 71cf9c983515549999229ba240e61fa20b471dae where it added a new function in the same location. Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style] Signed-off-by: Qais Yousef --- kernel/rcu/tree.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1f7f05aa11a5..78deb6a7844f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3168,7 +3168,6 @@ struct kfree_rcu_cpu_work { * @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period * @lock: Synchronize access to this structure * @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES - * @monitor_todo: Tracks whether a @monitor_work delayed work is pending * @initialized: The @rcu_work fields have been initialized * @count: Number of objects for which GP not started * @bkvcache: @@ -3193,7 +3192,6 @@ struct kfree_rcu_cpu { struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES]; raw_spinlock_t lock; struct delayed_work monitor_work; - bool monitor_todo; bool initialized; int count; @@ -3452,9 +3450,7 @@ static void kfree_rcu_monitor(struct work_struct *work) // of the channels that is still busy we should rearm the // work to repeat an attempt. Because previous batches are // still in progress. - if (!krcp->bkvhead[0] && !krcp->bkvhead[1] && !krcp->head) - krcp->monitor_todo = false; - else + if (need_offload_krc(krcp)) schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES); raw_spin_unlock_irqrestore(&krcp->lock, flags); @@ -3651,11 +3647,8 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) kmemleak_ignore(ptr); // Set timer to drain after KFREE_DRAIN_JIFFIES. - if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING && - !krcp->monitor_todo) { - krcp->monitor_todo = true; + if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING) schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES); - } unlock_return: krc_this_cpu_unlock(krcp, flags); @@ -3730,14 +3723,8 @@ void __init kfree_rcu_scheduler_running(void) struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu); raw_spin_lock_irqsave(&krcp->lock, flags); - if ((!krcp->bkvhead[0] && !krcp->bkvhead[1] && !krcp->head) || - krcp->monitor_todo) { - raw_spin_unlock_irqrestore(&krcp->lock, flags); - continue; - } - krcp->monitor_todo = true; - schedule_delayed_work_on(cpu, &krcp->monitor_work, - KFREE_DRAIN_JIFFIES); + if (need_offload_krc(krcp)) + schedule_delayed_work_on(cpu, &krcp->monitor_work, KFREE_DRAIN_JIFFIES); raw_spin_unlock_irqrestore(&krcp->lock, flags); } } From 88587c18386742c9dfe51e2e29065430bce3b26d Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Thu, 30 Jun 2022 18:33:35 +0200 Subject: [PATCH 89/98] UPSTREAM: rcu/kvfree: Update KFREE_DRAIN_JIFFIES interval Currently the monitor work is scheduled with a fixed interval of HZ/20, which is roughly 50 milliseconds. The drawback of this approach is low utilization of the 512 page slots in scenarios with infrequence kvfree_rcu() calls. For example on an Android system: kworker/3:3-507 [003] .... 470.286305: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000d0f0dde5 nr_records=6 kworker/6:1-76 [006] .... 470.416613: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000ea0d6556 nr_records=1 kworker/6:1-76 [006] .... 470.416625: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000003e025849 nr_records=9 kworker/3:3-507 [003] .... 471.390000: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000815a8713 nr_records=48 kworker/1:1-73 [001] .... 471.725785: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000fda9bf20 nr_records=3 kworker/1:1-73 [001] .... 471.725833: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000a425b67b nr_records=76 kworker/0:4-1411 [000] .... 472.085673: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000007996be9d nr_records=1 kworker/0:4-1411 [000] .... 472.085728: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000d0f0dde5 nr_records=5 kworker/6:1-76 [006] .... 472.260340: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000065630ee4 nr_records=102 In many cases, out of 512 slots, fewer than 10 were actually used. In order to improve batching and make utilization more efficient this commit sets a drain interval to a fixed 5-seconds interval. Floods are detected when a page fills quickly, and in that case, the reclaim work is re-scheduled for the next scheduling-clock tick (jiffy). After this change: kworker/7:1-371 [007] .... 5630.725708: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000005ab0ffb3 nr_records=121 kworker/7:1-371 [007] .... 5630.989702: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000060c84761 nr_records=47 kworker/7:1-371 [007] .... 5630.989714: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000000babf308 nr_records=510 kworker/7:1-371 [007] .... 5631.553790: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000bb7bd0ef nr_records=169 kworker/7:1-371 [007] .... 5631.553808: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x0000000044c78753 nr_records=510 kworker/5:6-9428 [005] .... 5631.746102: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000d98519aa nr_records=123 kworker/4:7-9434 [004] .... 5632.001758: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x00000000526c9d44 nr_records=322 kworker/4:7-9434 [004] .... 5632.002073: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000002c6a8afa nr_records=185 kworker/7:1-371 [007] .... 5632.277515: rcu_invoke_kfree_bulk_callback: rcu_preempt bulk=0x000000007f4a962f nr_records=510 Here, all but one of the cases, more than one hundreds slots were used, representing an order-of-magnitude improvement. Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Paul E. McKenney (cherry picked from commit 51824b780b719c53113dc39e027fbf670dc66028) Bug: 258241771 Bug: 222463781 Test: CQ Change-Id: I4635ba0dbece4e029d5271ef3950b8eaa1ae5e81 Signed-off-by: Joel Fernandes Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4332177 Reviewed-by: Vineeth Pillai Reviewed-by: Sean Paul (cherry picked from commit b1bf359877e084383be107bf0008d58d0a6b15e3) [Conflict due to 71cf9c983515549999229ba240e61fa20b471dae adding a new function in the same location. Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style] Signed-off-by: Qais Yousef --- kernel/rcu/tree.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 78deb6a7844f..ed2dce3f0176 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3122,7 +3122,7 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func) EXPORT_SYMBOL_GPL(call_rcu); /* Maximum number of jiffies to wait before draining a batch. */ -#define KFREE_DRAIN_JIFFIES (HZ / 50) +#define KFREE_DRAIN_JIFFIES (5 * HZ) #define KFREE_N_BATCHES 2 #define FREE_N_CHANNELS 2 @@ -3395,6 +3395,21 @@ need_wait_for_krwp_work(struct kfree_rcu_cpu_work *krwp) return !!krwp->head_free; } +static void +schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp) +{ + long delay, delay_left; + + delay = READ_ONCE(krcp->count) >= KVFREE_BULK_MAX_ENTR ? 1:KFREE_DRAIN_JIFFIES; + if (delayed_work_pending(&krcp->monitor_work)) { + delay_left = krcp->monitor_work.timer.expires - jiffies; + if (delay < delay_left) + mod_delayed_work(system_wq, &krcp->monitor_work, delay); + return; + } + queue_delayed_work(system_wq, &krcp->monitor_work, delay); +} + /* * This function is invoked after the KFREE_DRAIN_JIFFIES timeout. */ @@ -3451,7 +3466,7 @@ static void kfree_rcu_monitor(struct work_struct *work) // work to repeat an attempt. Because previous batches are // still in progress. if (need_offload_krc(krcp)) - schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES); + schedule_delayed_monitor_work(krcp); raw_spin_unlock_irqrestore(&krcp->lock, flags); } @@ -3648,7 +3663,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) // Set timer to drain after KFREE_DRAIN_JIFFIES. if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING) - schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES); + schedule_delayed_monitor_work(krcp); unlock_return: krc_this_cpu_unlock(krcp, flags); @@ -3724,7 +3739,7 @@ void __init kfree_rcu_scheduler_running(void) raw_spin_lock_irqsave(&krcp->lock, flags); if (need_offload_krc(krcp)) - schedule_delayed_work_on(cpu, &krcp->monitor_work, KFREE_DRAIN_JIFFIES); + schedule_delayed_monitor_work(krcp); raw_spin_unlock_irqrestore(&krcp->lock, flags); } } From 5d1a3986c20e8b28742e804a4a86c9942ac4e8ae Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Wed, 22 Jun 2022 22:51:02 +0000 Subject: [PATCH 90/98] UPSTREAM: rcu/kfree: Fix kfree_rcu_shrink_count() return value As per the comments in include/linux/shrinker.h, .count_objects callback should return the number of freeable items, but if there are no objects to free, SHRINK_EMPTY should be returned. The only time 0 is returned should be when we are unable to determine the number of objects, or the cache should be skipped for another reason. Signed-off-by: Joel Fernandes (Google) Reviewed-by: Uladzislau Rezki (Sony) Signed-off-by: Paul E. McKenney (cherry picked from commit 38269096351806bf7315f971c53205b676ada259) Bug: 258241771 Bug: 222463781 Test: CQ Change-Id: I5cb380fceaccc85971a47773d9058f0ea044c6dd Signed-off-by: Joel Fernandes Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4332178 Reviewed-by: Vineeth Pillai Reviewed-by: Sean Paul (cherry picked from commit 3243f1e22bf915c9b805a96cc4a8cbc03ed5d7a8) [Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style] Signed-off-by: Qais Yousef --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index ed2dce3f0176..5f0510e08c5b 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3696,7 +3696,7 @@ kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) atomic_set(&krcp->backoff_page_cache_fill, 1); } - return count; + return count == 0 ? SHRINK_EMPTY : count; } static unsigned long From 16ea06fe441069eee38d16662975e8cd86572d69 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Tue, 29 Nov 2022 16:58:21 +0100 Subject: [PATCH 91/98] UPSTREAM: rcu/kvfree: Move need_offload_krc() out of krcp->lock The need_offload_krc() function currently holds the krcp->lock in order to safely check krcp->head. This commit removes the need for this lock in that function by updating the krcp->head pointer using WRITE_ONCE() macro so that readers can carry out lockless loads of that pointer. Bug: 258241771 Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Paul E. McKenney (cherry picked from commit 8fc5494ad5face62747a3937db66b00db1e5d80b) Signed-off-by: Qais Yousef Change-Id: Iddde5ec15e8574216abc95d8c64efa5c66868508 --- kernel/rcu/tree.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 5f0510e08c5b..aa84ee9f1830 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3446,7 +3446,7 @@ static void kfree_rcu_monitor(struct work_struct *work) // objects queued on the linked list. if (!krwp->head_free) { krwp->head_free = krcp->head; - krcp->head = NULL; + WRITE_ONCE(krcp->head, NULL); } WRITE_ONCE(krcp->count, 0); @@ -3460,6 +3460,8 @@ static void kfree_rcu_monitor(struct work_struct *work) } } + raw_spin_unlock_irqrestore(&krcp->lock, flags); + // If there is nothing to detach, it means that our job is // successfully done here. In case of having at least one // of the channels that is still busy we should rearm the @@ -3467,8 +3469,6 @@ static void kfree_rcu_monitor(struct work_struct *work) // still in progress. if (need_offload_krc(krcp)) schedule_delayed_monitor_work(krcp); - - raw_spin_unlock_irqrestore(&krcp->lock, flags); } static enum hrtimer_restart @@ -3647,7 +3647,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) head->func = func; head->next = krcp->head; - krcp->head = head; + WRITE_ONCE(krcp->head, head); success = true; } @@ -3732,15 +3732,12 @@ static struct shrinker kfree_rcu_shrinker = { void __init kfree_rcu_scheduler_running(void) { int cpu; - unsigned long flags; for_each_possible_cpu(cpu) { struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu); - raw_spin_lock_irqsave(&krcp->lock, flags); if (need_offload_krc(krcp)) schedule_delayed_monitor_work(krcp); - raw_spin_unlock_irqrestore(&krcp->lock, flags); } } From 4adb60810c9f4092f53e5ddc4c1f6a0bf0a97838 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Fri, 3 Mar 2023 21:38:51 +0000 Subject: [PATCH 92/98] ANDROID: rcu: Add a minimum time for marking boot as completed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On many systems, a great deal of boot (in userspace) happens after the kernel thinks the boot has completed. It is difficult to determine if the system has really booted from the kernel side. Some features like lazy-RCU can risk slowing down boot time if, say, a callback has been added that the boot synchronously depends on. Further expedited callbacks can get unexpedited way earlier than it should be, thus slowing down boot (as shown in the data below). For these reasons, this commit adds a config option 'CONFIG_RCU_BOOT_END_DELAY' and a boot parameter rcupdate.boot_end_delay. Userspace can also make RCU's view of the system as booted, by writing the time in milliseconds to: /sys/module/rcupdate/parameters/android_rcu_boot_end_delay Or even just writing a value of 0 to this sysfs node. However, under no circumstance will the boot be allowed to end earlier than just before init is launched. The default value of CONFIG_RCU_BOOT_END_DELAY is chosen as 15s. This suites ChromeOS and also a PREEMPT_RT system below very well, which need no config or parameter changes, and just a simple application of this patch. A system designer can also choose a specific value here to keep RCU from marking boot completion. As noted earlier, RCU's perspective of the system as booted will not be marker until at least android_rcu_boot_end_delay milliseconds have passed or an update is made via writing a small value (or 0) in milliseconds to: /sys/module/rcupdate/parameters/android_rcu_boot_end_delay. One side-effect of this patch is, there is a risk that a real-time workload launched just after the kernel boots will suffer interruptions due to expedited RCU, which previous ended just before init was launched. However, to mitigate such an issue (however unlikely), the user should either tune CONFIG_RCU_BOOT_END_DELAY to a smaller value than 15 seconds or write a value of 0 to /sys/module/rcupdate/parameters/android_rcu_boot_end_delay, once userspace boots, and before launching the real-time workload. Qiuxu also noted impressive boot-time improvements with earlier version of patch. An excerpt from the data he shared: 1) Testing environment: OS : CentOS Stream 8 (non-RT OS) Kernel : v6.2 Machine : Intel Cascade Lake server (2 sockets, each with 44 logical threads) Qemu args : -cpu host -enable-kvm, -smp 88,threads=2,sockets=2, … 2) OS boot time definition: The time from the start of the kernel boot to the shell command line prompt is shown from the console. [ Different people may have different OS boot time definitions. ] 3) Measurement method (very rough method): A timer in the kernel periodically prints the boot time every 100ms. As soon as the shell command line prompt is shown from the console, we record the boot time printed by the timer, then the printed boot time is the OS boot time. 4) Measured OS boot time (in seconds) a) Measured 10 times w/o this patch: 8.7s, 8.4s, 8.6s, 8.2s, 9.0s, 8.7s, 8.8s, 9.3s, 8.8s, 8.3s The average OS boot time was: ~8.7s b) Measure 10 times w/ this patch: 8.5s, 8.2s, 7.6s, 8.2s, 8.7s, 8.2s, 7.8s, 8.2s, 9.3s, 8.4s The average OS boot time was: ~8.3s. (CHROMIUM tag rationale: Submitted upstream but got lots of pushback as it may harm a PREEMPT_RT system -- the concern is VERY theoretical and this improves things for ChromeOS. Plus we are not a PREEMPT_RT system. So I am strongly suggesting this mostly simple change for ChromeOS.) Bug: 258241771 Bug: 268129466 Test: boot Tested-by: Qiuxu Zhuo Change-Id: Ibd262189d7f92dbcc57f1508efe90fcfba95a6cc Signed-off-by: Joel Fernandes (Google) Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4350228 Commit-Queue: Joel Fernandes Commit-Queue: Vineeth Pillai Tested-by: Vineeth Pillai Tested-by: Joel Fernandes Reviewed-by: Vineeth Pillai (cherry picked from commit 7968079ec77b320ee9d4115fe13048a8f7afbc02) [Cherry picked from chromeos-5.15 tree. Minor tweaks to commit message to match Android style. Prefix boot param with android_] Signed-off-by: Qais Yousef --- .../admin-guide/kernel-parameters.txt | 15 ++++ kernel/rcu/Kconfig | 21 +++++ kernel/rcu/update.c | 76 ++++++++++++++++++- 3 files changed, 110 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ec80358a9faf..5d10bc873b37 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4881,6 +4881,21 @@ rcutorture.verbose= [KNL] Enable additional printk() statements. + rcupdate.android_rcu_boot_end_delay= [KNL] + Minimum time in milliseconds from the start of boot + that must elapse before the boot sequence can be marked + complete from RCU's perspective, after which RCU's + behavior becomes more relaxed. The default value is also + configurable via CONFIG_RCU_BOOT_END_DELAY. + Userspace can also mark the boot as completed + sooner by writing the time in milliseconds, say once + userspace considers the system as booted, to: + /sys/module/rcupdate/parameters/android_rcu_boot_end_delay + Or even just writing a value of 0 to this sysfs node. + The sysfs node can also be used to extend the delay + to be larger than the default, assuming the marking + of boot complete has not yet occurred. + rcupdate.rcu_cpu_stall_ftrace_dump= [KNL] Dump ftrace buffer after reporting RCU CPU stall warning. diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index 8e31e315a6f5..f0d0df3beedc 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -282,4 +282,25 @@ config RCU_LAZY To save power, batch RCU callbacks and flush after delay, memory pressure, or callback list growing too big. +config RCU_BOOT_END_DELAY + int "Minimum time before RCU may consider in-kernel boot as completed" + range 0 120000 + default 20000 + help + Default value of the minimum time in milliseconds from the start of boot + that must elapse before the boot sequence can be marked complete from RCU's + perspective, after which RCU's behavior becomes more relaxed. + Userspace can also mark the boot as completed sooner than this default + by writing the time in milliseconds, say once userspace considers + the system as booted, to: /sys/module/rcupdate/parameters/rcu_boot_end_delay. + Or even just writing a value of 0 to this sysfs node. The sysfs node can + also be used to extend the delay to be larger than the default, assuming + the marking of boot completion has not yet occurred. + + The actual delay for RCU's view of the system to be marked as booted can be + higher than this value if the kernel takes a long time to initialize but it + will never be smaller than this value. + + Accept the default if unsure. + endmenu # "RCU Subsystem" diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index a90458c7b4f2..699344c50506 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -43,6 +43,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS @@ -224,13 +225,51 @@ void rcu_unexpedite_gp(void) } EXPORT_SYMBOL_GPL(rcu_unexpedite_gp); +/* + * Minimum time in milliseconds from the start boot until RCU can consider + * in-kernel boot as completed. This can also be tuned at runtime to end the + * boot earlier, by userspace init code writing the time in milliseconds (even + * 0) to: /sys/module/rcupdate/parameters/android_rcu_boot_end_delay. The sysfs + * node can also be used to extend the delay to be larger than the default, + * assuming the marking of boot complete has not yet occurred. + */ +static int android_rcu_boot_end_delay = CONFIG_RCU_BOOT_END_DELAY; + static bool rcu_boot_ended __read_mostly; +static bool rcu_boot_end_called __read_mostly; +static DEFINE_MUTEX(rcu_boot_end_lock); /* - * Inform RCU of the end of the in-kernel boot sequence. + * Inform RCU of the end of the in-kernel boot sequence. The boot sequence will + * not be marked ended until at least android_rcu_boot_end_delay milliseconds + * have passed. */ -void rcu_end_inkernel_boot(void) +void rcu_end_inkernel_boot(void); +static void rcu_boot_end_work_fn(struct work_struct *work) { + rcu_end_inkernel_boot(); +} +static DECLARE_DELAYED_WORK(rcu_boot_end_work, rcu_boot_end_work_fn); + +/* Must be called with rcu_boot_end_lock held. */ +static void rcu_end_inkernel_boot_locked(void) +{ + rcu_boot_end_called = true; + + if (rcu_boot_ended) + return; + + if (android_rcu_boot_end_delay) { + u64 boot_ms = div_u64(ktime_get_boot_fast_ns(), 1000000UL); + + if (boot_ms < android_rcu_boot_end_delay) { + schedule_delayed_work(&rcu_boot_end_work, + msecs_to_jiffies(android_rcu_boot_end_delay - boot_ms)); + return; + } + } + + cancel_delayed_work(&rcu_boot_end_work); rcu_unexpedite_gp(); rcu_async_relax(); if (rcu_normal_after_boot) @@ -238,6 +277,39 @@ void rcu_end_inkernel_boot(void) rcu_boot_ended = true; } +void rcu_end_inkernel_boot(void) +{ + mutex_lock(&rcu_boot_end_lock); + rcu_end_inkernel_boot_locked(); + mutex_unlock(&rcu_boot_end_lock); +} + +static int param_set_rcu_boot_end(const char *val, const struct kernel_param *kp) +{ + uint end_ms; + int ret = kstrtouint(val, 0, &end_ms); + + if (ret) + return ret; + /* + * rcu_end_inkernel_boot() should be called at least once during init + * before we can allow param changes to end the boot. + */ + mutex_lock(&rcu_boot_end_lock); + android_rcu_boot_end_delay = end_ms; + if (!rcu_boot_ended && rcu_boot_end_called) { + rcu_end_inkernel_boot_locked(); + } + mutex_unlock(&rcu_boot_end_lock); + return ret; +} + +static const struct kernel_param_ops rcu_boot_end_ops = { + .set = param_set_rcu_boot_end, + .get = param_get_uint, +}; +module_param_cb(android_rcu_boot_end_delay, &rcu_boot_end_ops, &android_rcu_boot_end_delay, 0644); + /* * Let rcutorture know when it is OK to turn it up to eleven. */ From 37b02c190cbf74022651e3f5f8240c13796ad37c Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Sun, 3 Dec 2023 01:12:52 +0000 Subject: [PATCH 93/98] FROMLIST: rcu: Provide a boot time parameter to control lazy RCU To allow more flexible arrangements while still provide a single kernel for distros, provide a boot time parameter to enable/disable lazy RCU. Specify: rcutree.enable_rcu_lazy=[y|1|n|0] Which also requires rcu_nocbs=all at boot time to enable/disable lazy RCU. To disable it by default at build time when CONFIG_RCU_LAZY=y, the new CONFIG_RCU_LAZY_DEFAULT_OFF can be used. Bug: 258241771 Signed-off-by: Qais Yousef (Google) Tested-by: Andrea Righi Signed-off-by: Paul E. McKenney Link: https://lore.kernel.org/lkml/20231203011252.233748-1-qyousef@layalina.io/ [Fix trivial conflicts rejecting newer code that doesn't exist on 5.15] Signed-off-by: Qais Yousef Change-Id: Ib5585ae717a2ba7749f2802101b785c4e5de8a90 --- Documentation/admin-guide/kernel-parameters.txt | 5 +++++ kernel/rcu/Kconfig | 12 ++++++++++++ kernel/rcu/tree.c | 7 ++++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 5d10bc873b37..6d653ef073ad 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4637,6 +4637,11 @@ rcu_node tree with an eye towards determining why a new grace period has not yet started. + rcutree.enable_rcu_lazy= [KNL] + To save power, batch RCU callbacks and flush after + delay, memory pressure or callback list growing too + big. + rcuscale.gp_async= [KNL] Measure performance of asynchronous grace-period primitives such as call_rcu(). diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index f0d0df3beedc..4bc5b79ce1df 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -281,6 +281,18 @@ config RCU_LAZY help To save power, batch RCU callbacks and flush after delay, memory pressure, or callback list growing too big. + Requires rcu_nocbs=all to be set. + + Use rcutree.enable_rcu_lazy=0 to turn it off at boot time. + +config RCU_LAZY_DEFAULT_OFF + bool "Turn RCU lazy invocation off by default" + depends on RCU_LAZY + default n + help + Allows building the kernel with CONFIG_RCU_LAZY=y yet keep it default + off. Boot time param rcutree.enable_rcu_lazy=1 can be used to switch + it back on. config RCU_BOOT_END_DELAY int "Minimum time before RCU may consider in-kernel boot as completed" diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index aa84ee9f1830..ed0756afdf20 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3044,6 +3044,9 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) } #ifdef CONFIG_RCU_LAZY +static bool enable_rcu_lazy __read_mostly = !IS_ENABLED(CONFIG_RCU_LAZY_DEFAULT_OFF); +module_param(enable_rcu_lazy, bool, 0444); + /** * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and * flush all lazy callbacks (including the new one) to the main ->cblist while @@ -3069,6 +3072,8 @@ void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) return __call_rcu_common(head, func, false); } EXPORT_SYMBOL_GPL(call_rcu_hurry); +#else +#define enable_rcu_lazy false #endif /** @@ -3117,7 +3122,7 @@ EXPORT_SYMBOL_GPL(call_rcu_hurry); */ void call_rcu(struct rcu_head *head, rcu_callback_t func) { - return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY)); + __call_rcu_common(head, func, enable_rcu_lazy); } EXPORT_SYMBOL_GPL(call_rcu); From d38091b4ff772245bf965f21172024b3660c3be0 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 23 May 2023 19:22:19 +0000 Subject: [PATCH 94/98] ANDROID: Enable CONFIG_LAZY_RCU in arm64 gki_defconfig It is still disabled by default. Must specify rcutree.android_enable_rcu_lazy and rcu_nocbs=all in boot time parameter to actually enable it. Bug: 258241771 Change-Id: I11c920aa5edde2fc42ab54245cd198eb8cb47616 Signed-off-by: Qais Yousef --- arch/arm64/configs/gki_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index 733f1f2da58a..1a84753c54d9 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -15,6 +15,8 @@ CONFIG_RCU_EXPERT=y CONFIG_RCU_FAST_NO_HZ=y CONFIG_RCU_BOOST=y CONFIG_RCU_NOCB_CPU=y +CONFIG_RCU_LAZY=y +CONFIG_RCU_LAZY_DEFAULT_OFF=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_IKHEADERS=m From ae67f18944e363bcf4822fd0a39fcc41c911d8c9 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Fri, 19 Jan 2024 10:52:57 +0000 Subject: [PATCH 95/98] ANDROID: Enable CONFIG_LAZY_RCU in x86 gki_defconfig It is still disabled by default. Must specify rcutree.android_enable_rcu_lazy and rcu_nocbs=all in boot time parameter to actually enable it. Bug: 258241771 Change-Id: Ic9e15b846d58ffa3d5dd81842c568da79352ff2d Signed-off-by: Qais Yousef --- arch/x86/configs/gki_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index 2e3d924152bc..422d4ba05d4f 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -17,6 +17,8 @@ CONFIG_RCU_EXPERT=y CONFIG_RCU_FAST_NO_HZ=y CONFIG_RCU_BOOST=y CONFIG_RCU_NOCB_CPU=y +CONFIG_RCU_LAZY=y +CONFIG_RCU_LAZY_DEFAULT_OFF=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_IKHEADERS=m From ae44e8dac85115b05699c06e33ab337c0e82cf1e Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Fri, 19 Apr 2024 14:41:35 -0700 Subject: [PATCH 96/98] ANDROID: 16K: Only madvise padding from dynamic linker context Only preform padding advise from the execution context on bionic's dynamic linker. This ensures that madvise() doesn't have unwanted side effects. Also rearrange the order of fail checks in madvise_vma_pad_pages() in order of ascending cost. Bug: 330117029 Bug: 327600007 Bug: 330767927 Bug: 328266487 Bug: 329803029 Change-Id: I3e05b8780c6eda78007f86b613f8c11dd18ac28f Signed-off-by: Kalesh Singh --- mm/pgsize_migration.c | 75 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 65 insertions(+), 10 deletions(-) diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c index b7264f49a9cb..aecc109524c3 100644 --- a/mm/pgsize_migration.c +++ b/mm/pgsize_migration.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -134,6 +135,56 @@ static __always_inline bool str_has_suffix(const char *str, const char *suffix) return !strncmp(str + str_len - suffix_len, suffix, suffix_len); } +/* + * The dynamic linker, or interpreter, operates within the process context + * of the binary that necessitated dynamic linking. + * + * Consequently, process context identifiers; like PID, comm, ...; cannot + * be used to differentiate whether the execution context belongs to the + * dynamic linker or not. + * + * linker_ctx() deduces whether execution is currently in the dynamic linker's + * context by correlating the current userspace instruction pointer with the + * VMAs of the current task. + * + * Returns true if in linker context, otherwise false. + * + * Caller must hold mmap lock in read mode. + */ +static inline bool linker_ctx(void) +{ + struct pt_regs *regs = task_pt_regs(current); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct file *file; + + if (!regs) + return false; + + vma = find_vma(mm, instruction_pointer(regs)); + + /* Current execution context, the VMA must be present */ + BUG_ON(!vma); + + file = vma->vm_file; + if (!file) + return false; + + if ((vma->vm_flags & VM_EXEC)) { + char buf[64]; + const int bufsize = sizeof(buf); + char *path; + + memset(buf, 0, bufsize); + path = d_path(&file->f_path, buf, bufsize); + + if (!strcmp(path, "/system/bin/linker64")) + return true; + } + + return false; +} + /* * Saves the number of padding pages for an ELF segment mapping * in vm_flags. @@ -146,6 +197,7 @@ static __always_inline bool str_has_suffix(const char *str, const char *suffix) * 4) The number of the pages in the range does not exceed VM_TOTAL_PAD_PAGES. * 5) The VMA is a regular file backed VMA (filemap_fault) * 6) The file backing the VMA is a shared library (*.so) + * 7) The madvise was requested by bionic's dynamic linker. */ void madvise_vma_pad_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end) @@ -155,18 +207,9 @@ void madvise_vma_pad_pages(struct vm_area_struct *vma, if (!is_pgsize_migration_enabled()) return; - /* Only handle this for file backed VMAs */ - if (!vma->vm_file || !vma->vm_ops || vma->vm_ops->fault != filemap_fault) - return; - - - /* Limit this to only shared libraries (*.so) */ - if (!str_has_suffix(vma->vm_file->f_path.dentry->d_name.name, ".so")) - return; - /* * If the madvise range is it at the end of the file save the number of - * pages in vm_flags (only need 4 bits are needed for 16kB aligned ELFs). + * pages in vm_flags (only need 4 bits are needed for up to 64kB aligned ELFs). */ if (start <= vma->vm_start || end != vma->vm_end) return; @@ -176,6 +219,18 @@ void madvise_vma_pad_pages(struct vm_area_struct *vma, if (!nr_pad_pages || nr_pad_pages > VM_TOTAL_PAD_PAGES) return; + /* Only handle this for file backed VMAs */ + if (!vma->vm_file || !vma->vm_ops || vma->vm_ops->fault != filemap_fault) + return; + + /* Limit this to only shared libraries (*.so) */ + if (!str_has_suffix(vma->vm_file->f_path.dentry->d_name.name, ".so")) + return; + + /* Only bionic's dynamic linker needs to hint padding pages. */ + if (!linker_ctx()) + return; + vma_set_pad_pages(vma, nr_pad_pages); } From 19d6e7eb47dc0aeffc4a3b50ad9b65deb594a211 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Thu, 25 Apr 2024 09:59:08 -0700 Subject: [PATCH 97/98] ANDROID: 16K: madvise_vma_pad_pages: Remove filemap_fault check Some file systems like F2FS use a custom filemap_fault ops. Remove this check, as checking vm_file is sufficient. Bug: 330117029 Bug: 327600007 Bug: 330767927 Bug: 328266487 Bug: 329803029 Change-Id: Id6a584d934f06650c0a95afd1823669fc77ba2c2 Signed-off-by: Kalesh Singh --- mm/pgsize_migration.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c index aecc109524c3..f148918ee8f7 100644 --- a/mm/pgsize_migration.c +++ b/mm/pgsize_migration.c @@ -195,7 +195,7 @@ static inline bool linker_ctx(void) * 2) The range ends at the end address of the VMA * 3) The range starts at an address greater than the start address of the VMA * 4) The number of the pages in the range does not exceed VM_TOTAL_PAD_PAGES. - * 5) The VMA is a regular file backed VMA (filemap_fault) + * 5) The VMA is a file backed VMA. * 6) The file backing the VMA is a shared library (*.so) * 7) The madvise was requested by bionic's dynamic linker. */ @@ -220,7 +220,7 @@ void madvise_vma_pad_pages(struct vm_area_struct *vma, return; /* Only handle this for file backed VMAs */ - if (!vma->vm_file || !vma->vm_ops || vma->vm_ops->fault != filemap_fault) + if (!vma->vm_file) return; /* Limit this to only shared libraries (*.so) */ From d83231efe4bfcdee684acd7eb4f1cada88517b13 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Mon, 22 Apr 2024 14:24:59 -0700 Subject: [PATCH 98/98] ANDROID: 16K: Handle pad VMA splits and merges In some cases a VMA with padding representation may be split, and therefore the padding flags must be updated accordingly. There are 3 cases to handle: Given: | DDDDPPPP | where: - D represents 1 page of data; - P represents 1 page of padding; - | represents the boundaries (start/end) of the VMA 1) Split exactly at the padding boundary | DDDDPPPP | --> | DDDD | PPPP | - Remove padding flags from the first VMA. - The second VMA is all padding 2) Split within the padding area | DDDDPPPP | --> | DDDDPP | PP | - Subtract the length of the second VMA from the first VMA's padding. - The second VMA is all padding, adjust its padding length (flags) 3) Split within the data area | DDDDPPPP | --> | DD | DDPPPP | - Remove padding flags from the first VMA. - The second VMA is has the same padding as from before the split. To simplify the semantics merging of padding VMAs is not allowed. If a split produces a VMA that is entirely padding, show_[s]maps() only outputs the padding VMA entry (as the data entry is of length 0). Bug: 330117029 Bug: 327600007 Bug: 330767927 Bug: 328266487 Bug: 329803029 Change-Id: Ie2628ced5512e2c7f8af25fabae1f38730c8bb1a Signed-off-by: Kalesh Singh --- fs/proc/task_mmu.c | 7 +++- include/linux/pgsize_migration.h | 34 +++++++++++++++ mm/mlock.c | 3 +- mm/mmap.c | 7 +++- mm/mprotect.c | 4 +- mm/pgsize_migration.c | 72 +++++++++++++++++++++++++++++++- 6 files changed, 121 insertions(+), 6 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 095aa7c80ee1..0b7f73653ae3 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -348,7 +348,8 @@ static int show_map(struct seq_file *m, void *v) struct vm_area_struct *pad_vma = get_pad_vma(v); struct vm_area_struct *vma = get_data_vma(v); - show_map_vma(m, vma); + if (vma_pages(vma)) + show_map_vma(m, vma); show_map_pad_vma(vma, pad_vma, m, show_map_vma); @@ -851,6 +852,9 @@ static int show_smap(struct seq_file *m, void *v) memset(&mss, 0, sizeof(mss)); + if (!vma_pages(vma)) + goto show_pad; + smap_gather_stats(vma, &mss, 0); show_map_vma(m, vma); @@ -869,6 +873,7 @@ static int show_smap(struct seq_file *m, void *v) seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma)); show_smap_vma_flags(m, vma); +show_pad: show_map_pad_vma(vma, pad_vma, m, (show_pad_vma_fn)show_smap); return 0; diff --git a/include/linux/pgsize_migration.h b/include/linux/pgsize_migration.h index 7ab0f288bcf9..5c47ec28ea7d 100644 --- a/include/linux/pgsize_migration.h +++ b/include/linux/pgsize_migration.h @@ -61,6 +61,9 @@ extern struct vm_area_struct *get_data_vma(struct vm_area_struct *vma); extern void show_map_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *pad, struct seq_file *m, show_pad_vma_fn func); + +extern void split_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *new, + unsigned long addr, int new_below); #else /* PAGE_SIZE != SZ_4K || !defined(CONFIG_64BIT) */ static inline void vma_set_pad_pages(struct vm_area_struct *vma, unsigned long nr_pages) @@ -92,10 +95,41 @@ static inline void show_map_pad_vma(struct vm_area_struct *vma, struct seq_file *m, show_pad_vma_fn func) { } + +static inline void split_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *new, + unsigned long addr, int new_below) +{ +} #endif /* PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT) */ static inline unsigned long vma_data_pages(struct vm_area_struct *vma) { return vma_pages(vma) - vma_pad_pages(vma); } + +/* + * Sets the correct padding bits / flags for a VMA split. + */ +static inline unsigned long vma_pad_fixup_flags(struct vm_area_struct *vma, + unsigned long newflags) +{ + if (newflags & VM_PAD_MASK) + return (newflags & ~VM_PAD_MASK) | (vma->vm_flags & VM_PAD_MASK); + else + return newflags; +} + +/* + * Merging of padding VMAs is uncommon, as padding is only allowed + * from the linker context. + * + * To simplify the semantics, adjacent VMAs with padding are not + * allowed to merge. + */ +static inline bool is_mergable_pad_vma(struct vm_area_struct *vma, + unsigned long vm_flags) +{ + /* Padding VMAs cannot be merged with other padding or real VMAs */ + return !((vma->vm_flags | vm_flags) & VM_PAD_MASK); +} #endif /* _LINUX_PAGE_SIZE_MIGRATION_H */ diff --git a/mm/mlock.c b/mm/mlock.c index 0cc7fe053755..eec2418f3336 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -547,7 +548,7 @@ success: */ if (lock) - vma->vm_flags = newflags; + vma->vm_flags = vma_pad_fixup_flags(vma, newflags); else munlock_vma_pages_range(vma, start, end); diff --git a/mm/mmap.c b/mm/mmap.c index e3a10b3cc6be..e78cf663e559 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1053,6 +1054,8 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma, return 0; if (!anon_vma_name_eq(anon_vma_name(vma), anon_name)) return 0; + if (!is_mergable_pad_vma(vma, vm_flags)) + return 0; return 1; } @@ -2778,8 +2781,10 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); /* Success. */ - if (!err) + if (!err) { + split_pad_vma(vma, new, addr, new_below); return 0; + } /* Clean everything up if vma_adjust failed. */ if (new->vm_ops && new->vm_ops->close) diff --git a/mm/mprotect.c b/mm/mprotect.c index ba53529cdd5e..027cf7c10ce4 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -490,7 +491,8 @@ success: * vm_flags and vm_page_prot are protected by the mmap_lock * held in write mode. */ - vma->vm_flags = newflags; + vma->vm_flags = vma_pad_fixup_flags(vma, newflags); + dirty_accountable = vma_wants_writenotify(vma, vma->vm_page_prot); vma_set_page_prot(vma); diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c index f148918ee8f7..79c5e26aa141 100644 --- a/mm/pgsize_migration.c +++ b/mm/pgsize_migration.c @@ -113,6 +113,7 @@ void vma_set_pad_pages(struct vm_area_struct *vma, if (!is_pgsize_migration_enabled()) return; + vma->vm_flags &= ~VM_PAD_MASK; vma->vm_flags |= (nr_pages << VM_PAD_SHIFT); } @@ -268,10 +269,10 @@ struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma) pad->vm_start = VMA_PAD_START(pad); /* Make the pad vma PROT_NONE */ - pad->vm_flags = pad->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); + pad->vm_flags &= ~(VM_READ|VM_WRITE|VM_EXEC); /* Remove padding bits */ - pad->vm_flags = pad->vm_flags & ~VM_PAD_MASK; + pad->vm_flags &= ~VM_PAD_MASK; return pad; } @@ -324,5 +325,72 @@ void show_map_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *pad, kfree(pad); kfree(vma); } + +/* + * When splitting a padding VMA there are a couple of cases to handle. + * + * Given: + * + * | DDDDPPPP | + * + * where: + * - D represents 1 page of data; + * - P represents 1 page of padding; + * - | represents the boundaries (start/end) of the VMA + * + * + * 1) Split exactly at the padding boundary + * + * | DDDDPPPP | --> | DDDD | PPPP | + * + * - Remove padding flags from the first VMA. + * - The second VMA is all padding + * + * 2) Split within the padding area + * + * | DDDDPPPP | --> | DDDDPP | PP | + * + * - Subtract the length of the second VMA from the first VMA's padding. + * - The second VMA is all padding, adjust its padding length (flags) + * + * 3) Split within the data area + * + * | DDDDPPPP | --> | DD | DDPPPP | + * + * - Remove padding flags from the first VMA. + * - The second VMA is has the same padding as from before the split. + */ +void split_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *new, + unsigned long addr, int new_below) +{ + unsigned long nr_pad_pages = vma_pad_pages(vma); + unsigned long nr_vma2_pages; + struct vm_area_struct *first; + struct vm_area_struct *second; + + if (!nr_pad_pages) + return; + + if (new_below) { + first = new; + second = vma; + } else { + first = vma; + second = new; + } + + nr_vma2_pages = vma_pages(second); + + if (nr_vma2_pages == nr_pad_pages) { /* Case 1 */ + first->vm_flags &= ~VM_PAD_MASK; + vma_set_pad_pages(second, nr_pad_pages); + } else if (nr_vma2_pages < nr_pad_pages) { /* Case 2 */ + vma_set_pad_pages(first, nr_pad_pages - nr_vma2_pages); + vma_set_pad_pages(second, nr_vma2_pages); + } else { /* Case 3 */ + first->vm_flags &= ~VM_PAD_MASK; + vma_set_pad_pages(second, nr_pad_pages); + } +} #endif /* PAGE_SIZE == SZ_4K */ #endif /* CONFIG_64BIT */