From 1cf970483e1f1d7603d235fc84aac40098bf243d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 13 Jan 2023 11:12:15 +0000 Subject: [PATCH 001/174] mm/page_alloc: explicitly define what alloc flags deplete min reserves [ Upstream commit ab3508854353793cd35e348fde89a5c09b2fd8b5 ] As there are more ALLOC_ flags that affect reserves, define what flags affect reserves and clarify the effect of each flag. Link: https://lkml.kernel.org/r/20230113111217.14134-5-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Matthew Wilcox Cc: NeilBrown Cc: Thierry Reding Signed-off-by: Andrew Morton Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves") Signed-off-by: Sasha Levin --- mm/internal.h | 3 +++ mm/page_alloc.c | 36 +++++++++++++++++++++++------------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index f0f6198462cc..cd095ce2f199 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -768,6 +768,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, #define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */ #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */ +/* Flags that allow allocations below the min watermark. */ +#define ALLOC_RESERVES (ALLOC_HARDER|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM) + enum ttu_flags; struct tlbflush_unmap_batch; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8e1f4d779b26..6ab53e47ccea 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3956,15 +3956,14 @@ ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE); static inline long __zone_watermark_unusable_free(struct zone *z, unsigned int order, unsigned int alloc_flags) { - const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM)); long unusable_free = (1 << order) - 1; /* - * If the caller does not have rights to ALLOC_HARDER then subtract - * the high-atomic reserves. This will over-estimate the size of the - * atomic reserve but it avoids a search. + * If the caller does not have rights to reserves below the min + * watermark then subtract the high-atomic reserves. This will + * over-estimate the size of the atomic reserve but it avoids a search. */ - if (likely(!alloc_harder)) + if (likely(!(alloc_flags & ALLOC_RESERVES))) unusable_free += z->nr_reserved_highatomic; #ifdef CONFIG_CMA @@ -3988,25 +3987,36 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, { long min = mark; int o; - const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM)); /* free_pages may go negative - that's OK */ free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags); - if (alloc_flags & ALLOC_MIN_RESERVE) - min -= min / 2; - - if (unlikely(alloc_harder)) { + if (unlikely(alloc_flags & ALLOC_RESERVES)) { /* - * OOM victims can try even harder than normal ALLOC_HARDER + * __GFP_HIGH allows access to 50% of the min reserve as well + * as OOM. + */ + if (alloc_flags & ALLOC_MIN_RESERVE) + min -= min / 2; + + /* + * Non-blocking allocations can access some of the reserve + * with more access if also __GFP_HIGH. The reasoning is that + * a non-blocking caller may incur a more severe penalty + * if it cannot get memory quickly, particularly if it's + * also __GFP_HIGH. + */ + if (alloc_flags & ALLOC_HARDER) + min -= min / 4; + + /* + * OOM victims can try even harder than the normal reserve * users on the grounds that it's definitely going to be in * the exit path shortly and free memory. Any allocation it * makes during the free path will be small and short-lived. */ if (alloc_flags & ALLOC_OOM) min -= min / 2; - else - min -= min / 4; } /* From bb414b7f4194bd3deac120693a7f4ab74cef9bc0 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 13 Jan 2023 11:12:16 +0000 Subject: [PATCH 002/174] mm/page_alloc: explicitly define how __GFP_HIGH non-blocking allocations accesses reserves [ Upstream commit 1ebbb21811b76c3b932959787f37985af36f62fa ] GFP_ATOMIC allocations get flagged ALLOC_HARDER which is a vague description. In preparation for the removal of GFP_ATOMIC redefine __GFP_ATOMIC to simply mean non-blocking and renaming ALLOC_HARDER to ALLOC_NON_BLOCK accordingly. __GFP_HIGH is required for access to reserves but non-blocking is granted more access. For example, GFP_NOWAIT is non-blocking but has no special access to reserves. A __GFP_NOFAIL blocking allocation is granted access similar to __GFP_HIGH if the only alternative is an OOM kill. Link: https://lkml.kernel.org/r/20230113111217.14134-6-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Michal Hocko Cc: Matthew Wilcox Cc: NeilBrown Cc: Thierry Reding Cc: Vlastimil Babka Signed-off-by: Andrew Morton Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves") Signed-off-by: Sasha Levin --- mm/internal.h | 7 +++++-- mm/page_alloc.c | 44 ++++++++++++++++++++++++-------------------- 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index cd095ce2f199..a50bc08337d2 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -754,7 +754,10 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, #define ALLOC_OOM ALLOC_NO_WATERMARKS #endif -#define ALLOC_HARDER 0x10 /* try to alloc harder */ +#define ALLOC_NON_BLOCK 0x10 /* Caller cannot block. Allow access + * to 25% of the min watermark or + * 62.5% if __GFP_HIGH is set. + */ #define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50% * of the min watermark. */ @@ -769,7 +772,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */ /* Flags that allow allocations below the min watermark. */ -#define ALLOC_RESERVES (ALLOC_HARDER|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM) +#define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM) enum ttu_flags; struct tlbflush_unmap_batch; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6ab53e47ccea..49dc4ba88c27 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3996,18 +3996,19 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, * __GFP_HIGH allows access to 50% of the min reserve as well * as OOM. */ - if (alloc_flags & ALLOC_MIN_RESERVE) + if (alloc_flags & ALLOC_MIN_RESERVE) { min -= min / 2; - /* - * Non-blocking allocations can access some of the reserve - * with more access if also __GFP_HIGH. The reasoning is that - * a non-blocking caller may incur a more severe penalty - * if it cannot get memory quickly, particularly if it's - * also __GFP_HIGH. - */ - if (alloc_flags & ALLOC_HARDER) - min -= min / 4; + /* + * Non-blocking allocations (e.g. GFP_ATOMIC) can + * access more reserves than just __GFP_HIGH. Other + * non-blocking allocations requests such as GFP_NOWAIT + * or (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) do not get + * access to the min reserve. + */ + if (alloc_flags & ALLOC_NON_BLOCK) + min -= min / 4; + } /* * OOM victims can try even harder than the normal reserve @@ -4858,28 +4859,30 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order) * The caller may dip into page reserves a bit more if the caller * cannot run direct reclaim, or if the caller has realtime scheduling * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will - * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_MIN_RESERVE(__GFP_HIGH). + * set both ALLOC_NON_BLOCK and ALLOC_MIN_RESERVE(__GFP_HIGH). */ alloc_flags |= (__force int) (gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM)); - if (gfp_mask & __GFP_ATOMIC) { + if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) { /* * Not worth trying to allocate harder for __GFP_NOMEMALLOC even * if it can't schedule. */ if (!(gfp_mask & __GFP_NOMEMALLOC)) { - alloc_flags |= ALLOC_HARDER; + alloc_flags |= ALLOC_NON_BLOCK; if (order > 0) alloc_flags |= ALLOC_HIGHATOMIC; } /* - * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the - * comment for __cpuset_node_allowed(). + * Ignore cpuset mems for non-blocking __GFP_HIGH (probably + * GFP_ATOMIC) rather than fail, see the comment for + * __cpuset_node_allowed(). */ - alloc_flags &= ~ALLOC_CPUSET; + if (alloc_flags & ALLOC_MIN_RESERVE) + alloc_flags &= ~ALLOC_CPUSET; } else if (unlikely(rt_task(current)) && in_task()) alloc_flags |= ALLOC_MIN_RESERVE; @@ -5312,12 +5315,13 @@ nopage: WARN_ON_ONCE_GFP(costly_order, gfp_mask); /* - * Help non-failing allocations by giving them access to memory - * reserves but do not use ALLOC_NO_WATERMARKS because this + * Help non-failing allocations by giving some access to memory + * reserves normally used for high priority non-blocking + * allocations but do not use ALLOC_NO_WATERMARKS because this * could deplete whole memory reserves which would just make - * the situation worse + * the situation worse. */ - page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac); + page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE, ac); if (page) goto got_pg; From 189b954469cf82f8b8cf496f8de94b006d2d4746 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 11 Oct 2024 13:07:37 +0100 Subject: [PATCH 003/174] mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves [ Upstream commit 281dd25c1a018261a04d1b8bf41a0674000bfe38 ] Under memory pressure it's possible for GFP_ATOMIC order-0 allocations to fail even though free pages are available in the highatomic reserves. GFP_ATOMIC allocations cannot trigger unreserve_highatomic_pageblock() since it's only run from reclaim. Given that such allocations will pass the watermarks in __zone_watermark_unusable_free(), it makes sense to fallback to highatomic reserves the same way that ALLOC_OOM can. This fixes order-0 page allocation failures observed on Cloudflare's fleet when handling network packets: kswapd1: page allocation failure: order:0, mode:0x820(GFP_ATOMIC), nodemask=(null),cpuset=/,mems_allowed=0-7 CPU: 10 PID: 696 Comm: kswapd1 Kdump: loaded Tainted: G O 6.6.43-CUSTOM #1 Hardware name: MACHINE Call Trace: dump_stack_lvl+0x3c/0x50 warn_alloc+0x13a/0x1c0 __alloc_pages_slowpath.constprop.0+0xc9d/0xd10 __alloc_pages+0x327/0x340 __napi_alloc_skb+0x16d/0x1f0 bnxt_rx_page_skb+0x96/0x1b0 [bnxt_en] bnxt_rx_pkt+0x201/0x15e0 [bnxt_en] __bnxt_poll_work+0x156/0x2b0 [bnxt_en] bnxt_poll+0xd9/0x1c0 [bnxt_en] __napi_poll+0x2b/0x1b0 bpf_trampoline_6442524138+0x7d/0x1000 __napi_poll+0x5/0x1b0 net_rx_action+0x342/0x740 handle_softirqs+0xcf/0x2b0 irq_exit_rcu+0x6c/0x90 sysvec_apic_timer_interrupt+0x72/0x90 [mfleming@cloudflare.com: update comment] Link: https://lkml.kernel.org/r/20241015125158.3597702-1-matt@readmodwrite.com Link: https://lkml.kernel.org/r/20241011120737.3300370-1-matt@readmodwrite.com Link: https://lore.kernel.org/all/CAGis_TWzSu=P7QJmjD58WWiu3zjMTVKSzdOwWE8ORaGytzWJwQ@mail.gmail.com/ Fixes: 1d91df85f399 ("mm/page_alloc: handle a missing case for memalloc_nocma_{save/restore} APIs") Signed-off-by: Matt Fleming Suggested-by: Vlastimil Babka Reviewed-by: Vlastimil Babka Cc: Mel Gorman Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- mm/page_alloc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 49dc4ba88c27..b87b350b2f40 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3719,12 +3719,12 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, page = __rmqueue(zone, order, migratetype, alloc_flags); /* - * If the allocation fails, allow OOM handling access - * to HIGHATOMIC reserves as failing now is worse than - * failing a high-order atomic allocation in the - * future. + * If the allocation fails, allow OOM handling and + * order-0 (atomic) allocs access to HIGHATOMIC + * reserves as failing now is worse than failing a + * high-order atomic allocation in the future. */ - if (!page && (alloc_flags & ALLOC_OOM)) + if (!page && (alloc_flags & (ALLOC_OOM|ALLOC_NON_BLOCK))) page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); if (!page) { From 2fe5d62e122b040ce7fc4d31aa7fa96ae328cefc Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Wed, 16 Oct 2024 19:43:47 +0800 Subject: [PATCH 004/174] ocfs2: pass u64 to ocfs2_truncate_inline maybe overflow [ Upstream commit bc0a2f3a73fcdac651fca64df39306d1e5ebe3b0 ] Syzbot reported a kernel BUG in ocfs2_truncate_inline. There are two reasons for this: first, the parameter value passed is greater than ocfs2_max_inline_data_with_xattr, second, the start and end parameters of ocfs2_truncate_inline are "unsigned int". So, we need to add a sanity check for byte_start and byte_len right before ocfs2_truncate_inline() in ocfs2_remove_inode_range(), if they are greater than ocfs2_max_inline_data_with_xattr return -EINVAL. Link: https://lkml.kernel.org/r/tencent_D48DB5122ADDAEDDD11918CFB68D93258C07@qq.com Fixes: 1afc32b95233 ("ocfs2: Write support for inline data") Signed-off-by: Edward Adam Davis Reported-by: syzbot+81092778aac03460d6b7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=81092778aac03460d6b7 Reviewed-by: Joseph Qi Cc: Joel Becker Cc: Joseph Qi Cc: Mark Fasheh Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/ocfs2/file.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index f502bb2ce2ea..ea7c79e8ce42 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1784,6 +1784,14 @@ int ocfs2_remove_inode_range(struct inode *inode, return 0; if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + int id_count = ocfs2_max_inline_data_with_xattr(inode->i_sb, di); + + if (byte_start > id_count || byte_start + byte_len > id_count) { + ret = -EINVAL; + mlog_errno(ret); + goto out; + } + ret = ocfs2_truncate_inline(inode, di_bh, byte_start, byte_start + byte_len, 0); if (ret) { From 4707893315802a0917231b94cb20cbe50ccbfe03 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Tue, 22 Oct 2024 18:25:14 +0800 Subject: [PATCH 005/174] mctp i2c: handle NULL header address [ Upstream commit 01e215975fd80af81b5b79f009d49ddd35976c13 ] daddr can be NULL if there is no neighbour table entry present, in that case the tx packet should be dropped. saddr will usually be set by MCTP core, but check for NULL in case a packet is transmitted by a different protocol. Fixes: f5b8abf9fc3d ("mctp i2c: MCTP I2C binding driver") Cc: stable@vger.kernel.org Reported-by: Dung Cao Signed-off-by: Matt Johnston Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241022-mctp-i2c-null-dest-v3-1-e929709956c5@codeconstruct.com.au Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/mctp/mctp-i2c.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c index 1d67a3ca1fd1..7635a8b3c35c 100644 --- a/drivers/net/mctp/mctp-i2c.c +++ b/drivers/net/mctp/mctp-i2c.c @@ -547,6 +547,9 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, if (len > MCTP_I2C_MAXMTU) return -EMSGSIZE; + if (!daddr || !saddr) + return -EINVAL; + lldst = *((u8 *)daddr); llsrc = *((u8 *)saddr); From 618d1939243f535bad32dffd7488d44e28e159ef Mon Sep 17 00:00:00 2001 From: Christoffer Sandberg Date: Tue, 29 Oct 2024 16:16:53 +0100 Subject: [PATCH 006/174] ALSA: hda/realtek: Fix headset mic on TUXEDO Stellaris 16 Gen6 mb1 [ Upstream commit e49370d769e71456db3fbd982e95bab8c69f73e8 ] Quirk is needed to enable headset microphone on missing pin 0x19. Signed-off-by: Christoffer Sandberg Signed-off-by: Werner Sembach Cc: Link: https://patch.msgid.link/20241029151653.80726-2-wse@tuxedocomputers.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3cbd9cf80be9..d750c6e6eb98 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10214,6 +10214,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1d05, 0x1409, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), From c94e965f766321641ec38e4eece9ce8884543244 Mon Sep 17 00:00:00 2001 From: Vitaliy Shevtsov Date: Mon, 16 Sep 2024 22:41:37 +0500 Subject: [PATCH 007/174] nvmet-auth: assign dh_key to NULL after kfree_sensitive [ Upstream commit d2f551b1f72b4c508ab9298419f6feadc3b5d791 ] ctrl->dh_key might be used across multiple calls to nvmet_setup_dhgroup() for the same controller. So it's better to nullify it after release on error path in order to avoid double free later in nvmet_destroy_auth(). Found by Linux Verification Center (linuxtesting.org) with Svace. Fixes: 7a277c37d352 ("nvmet-auth: Diffie-Hellman key exchange support") Cc: stable@vger.kernel.org Signed-off-by: Vitaliy Shevtsov Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/auth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index aacc05ec00c2..74791078fdeb 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -101,6 +101,7 @@ int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id) pr_debug("%s: ctrl %d failed to generate private key, err %d\n", __func__, ctrl->cntlid, ret); kfree_sensitive(ctrl->dh_key); + ctrl->dh_key = NULL; return ret; } ctrl->dh_keysize = crypto_kpp_maxsize(ctrl->dh_tfm); From 8f6a0b1f41deaeb72643554d94f7f4535e1a4477 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Oct 2024 18:07:06 +0200 Subject: [PATCH 008/174] kasan: remove vmalloc_percpu test [ Upstream commit 330d8df81f3673d6fb74550bbc9bb159d81b35f7 ] Commit 1a2473f0cbc0 ("kasan: improve vmalloc tests") added the vmalloc_percpu KASAN test with the assumption that __alloc_percpu always uses vmalloc internally, which is tagged by KASAN. However, __alloc_percpu might allocate memory from the first per-CPU chunk, which is not allocated via vmalloc(). As a result, the test might fail. Remove the test until proper KASAN annotation for the per-CPU allocated are added; tracked in https://bugzilla.kernel.org/show_bug.cgi?id=215019. Link: https://lkml.kernel.org/r/20241022160706.38943-1-andrey.konovalov@linux.dev Fixes: 1a2473f0cbc0 ("kasan: improve vmalloc tests") Signed-off-by: Andrey Konovalov Reported-by: Samuel Holland Link: https://lore.kernel.org/all/4a245fff-cc46-44d1-a5f9-fd2f1c3764ae@sifive.com/ Reported-by: Sabyrzhan Tasbolatov Link: https://lore.kernel.org/all/CACzwLxiWzNqPBp4C1VkaXZ2wDwvY3yZeetCi1TLGFipKW77drA@mail.gmail.com/ Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Marco Elver Cc: Sabyrzhan Tasbolatov Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- mm/kasan/kasan_test.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c index cef683a2e0d2..df9658299a08 100644 --- a/mm/kasan/kasan_test.c +++ b/mm/kasan/kasan_test.c @@ -1260,32 +1260,6 @@ static void vm_map_ram_tags(struct kunit *test) free_pages((unsigned long)p_ptr, 1); } -static void vmalloc_percpu(struct kunit *test) -{ - char __percpu *ptr; - int cpu; - - /* - * This test is specifically crafted for the software tag-based mode, - * the only tag-based mode that poisons percpu mappings. - */ - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS); - - ptr = __alloc_percpu(PAGE_SIZE, PAGE_SIZE); - - for_each_possible_cpu(cpu) { - char *c_ptr = per_cpu_ptr(ptr, cpu); - - KUNIT_EXPECT_GE(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_MIN); - KUNIT_EXPECT_LT(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_KERNEL); - - /* Make sure that in-bounds accesses don't crash the kernel. */ - *c_ptr = 0; - } - - free_percpu(ptr); -} - /* * Check that the assigned pointer tag falls within the [KASAN_TAG_MIN, * KASAN_TAG_KERNEL) range (note: excluding the match-all tag) for tag-based @@ -1439,7 +1413,6 @@ static struct kunit_case kasan_kunit_test_cases[] = { KUNIT_CASE(vmalloc_oob), KUNIT_CASE(vmap_tags), KUNIT_CASE(vm_map_ram_tags), - KUNIT_CASE(vmalloc_percpu), KUNIT_CASE(match_all_not_assigned), KUNIT_CASE(match_all_ptr_tag), KUNIT_CASE(match_all_mem_tag), From 7468bd2c6e0a0e86e938ed691188457732edbb7e Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 13 Jan 2023 11:12:14 +0000 Subject: [PATCH 009/174] mm/page_alloc: explicitly record high-order atomic allocations in alloc_flags [ Upstream commit eb2e2b425c6984ca8034448a3f2c680622bd3d4d ] A high-order ALLOC_HARDER allocation is assumed to be atomic. While that is accurate, it changes later in the series. In preparation, explicitly record high-order atomic allocations in gfp_to_alloc_flags(). Link: https://lkml.kernel.org/r/20230113111217.14134-4-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Matthew Wilcox Cc: NeilBrown Cc: Thierry Reding Signed-off-by: Andrew Morton Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves") Signed-off-by: Sasha Levin --- mm/internal.h | 1 + mm/page_alloc.c | 29 +++++++++++++++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 1be79a514754..f0f6198462cc 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -765,6 +765,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, #else #define ALLOC_NOFRAGMENT 0x0 #endif +#define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */ #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */ enum ttu_flags; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e78ab23eb174..8e1f4d779b26 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3713,10 +3713,20 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, * reserved for high-order atomic allocation, so order-0 * request should skip it. */ - if (order > 0 && alloc_flags & ALLOC_HARDER) + if (alloc_flags & ALLOC_HIGHATOMIC) page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); if (!page) { page = __rmqueue(zone, order, migratetype, alloc_flags); + + /* + * If the allocation fails, allow OOM handling access + * to HIGHATOMIC reserves as failing now is worse than + * failing a high-order atomic allocation in the + * future. + */ + if (!page && (alloc_flags & ALLOC_OOM)) + page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); + if (!page) { spin_unlock_irqrestore(&zone->lock, flags); return NULL; @@ -4030,8 +4040,10 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, return true; } #endif - if (alloc_harder && !free_area_empty(area, MIGRATE_HIGHATOMIC)) + if ((alloc_flags & (ALLOC_HIGHATOMIC|ALLOC_OOM)) && + !free_area_empty(area, MIGRATE_HIGHATOMIC)) { return true; + } } return false; } @@ -4293,7 +4305,7 @@ try_this_zone: * If this is a high-order atomic allocation then check * if the pageblock should be reserved for the future */ - if (unlikely(order && (alloc_flags & ALLOC_HARDER))) + if (unlikely(alloc_flags & ALLOC_HIGHATOMIC)) reserve_highatomic_pageblock(page, zone, order); return page; @@ -4820,7 +4832,7 @@ static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask, } static inline unsigned int -gfp_to_alloc_flags(gfp_t gfp_mask) +gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order) { unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; @@ -4846,8 +4858,13 @@ gfp_to_alloc_flags(gfp_t gfp_mask) * Not worth trying to allocate harder for __GFP_NOMEMALLOC even * if it can't schedule. */ - if (!(gfp_mask & __GFP_NOMEMALLOC)) + if (!(gfp_mask & __GFP_NOMEMALLOC)) { alloc_flags |= ALLOC_HARDER; + + if (order > 0) + alloc_flags |= ALLOC_HIGHATOMIC; + } + /* * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the * comment for __cpuset_node_allowed(). @@ -5056,7 +5073,7 @@ restart: * kswapd needs to be woken up, and to avoid the cost of setting up * alloc_flags precisely. So we do that now. */ - alloc_flags = gfp_to_alloc_flags(gfp_mask); + alloc_flags = gfp_to_alloc_flags(gfp_mask, order); /* * We need to recalculate the starting point for the zonelist iterator From 45676b8299d1d2362f9b36bfa986ed00da96db01 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 17 Aug 2023 17:13:31 +0300 Subject: [PATCH 010/174] io_uring: rename kiocb_end_write() local helper [ Upstream commit a370167fe526123637965f60859a9f1f3e1a58b7 ] This helper does not take a kiocb as input and we want to create a common helper by that name that takes a kiocb as input. Signed-off-by: Amir Goldstein Reviewed-by: Jan Kara Reviewed-by: Jens Axboe Message-Id: <20230817141337.1025891-2-amir73il@gmail.com> Signed-off-by: Christian Brauner Stable-dep-of: 1d60d74e8526 ("io_uring/rw: fix missing NOWAIT check for O_DIRECT start write") Signed-off-by: Sasha Levin --- io_uring/rw.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index 038e6b13a749..4eb42fc29c15 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -220,7 +220,7 @@ static bool io_rw_should_reissue(struct io_kiocb *req) } #endif -static void kiocb_end_write(struct io_kiocb *req) +static void io_req_end_write(struct io_kiocb *req) { /* * Tell lockdep we inherited freeze protection from submission @@ -243,7 +243,7 @@ static void io_req_io_end(struct io_kiocb *req) struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); if (rw->kiocb.ki_flags & IOCB_WRITE) { - kiocb_end_write(req); + io_req_end_write(req); fsnotify_modify(req->file); } else { fsnotify_access(req->file); @@ -307,7 +307,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) struct io_kiocb *req = cmd_to_io_kiocb(rw); if (kiocb->ki_flags & IOCB_WRITE) - kiocb_end_write(req); + io_req_end_write(req); if (unlikely(res != req->cqe.res)) { if (res == -EAGAIN && io_rw_should_reissue(req)) { req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO; @@ -956,7 +956,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) io->bytes_done += ret2; if (kiocb->ki_flags & IOCB_WRITE) - kiocb_end_write(req); + io_req_end_write(req); return ret ? ret : -EAGAIN; } done: @@ -967,7 +967,7 @@ copy_iov: ret = io_setup_async_rw(req, iovec, s, false); if (!ret) { if (kiocb->ki_flags & IOCB_WRITE) - kiocb_end_write(req); + io_req_end_write(req); return -EAGAIN; } return ret; From 6d42982ad0b6e08dafe73b4a08c0ef8ee1b3a130 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 17 Aug 2023 17:13:33 +0300 Subject: [PATCH 011/174] fs: create kiocb_{start,end}_write() helpers [ Upstream commit ed0360bbab72b829437b67ebb2f9cfac19f59dfe ] aio, io_uring, cachefiles and overlayfs, all open code an ugly variant of file_{start,end}_write() to silence lockdep warnings. Create helpers for this lockdep dance so we can use the helpers in all the callers. Suggested-by: Jan Kara Signed-off-by: Amir Goldstein Reviewed-by: Jan Kara Reviewed-by: Jens Axboe Message-Id: <20230817141337.1025891-4-amir73il@gmail.com> Signed-off-by: Christian Brauner Stable-dep-of: 1d60d74e8526 ("io_uring/rw: fix missing NOWAIT check for O_DIRECT start write") Signed-off-by: Sasha Levin --- include/linux/fs.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/include/linux/fs.h b/include/linux/fs.h index 33c496130983..0d32634c5cf0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3029,6 +3029,42 @@ static inline void file_end_write(struct file *file) __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); } +/** + * kiocb_start_write - get write access to a superblock for async file io + * @iocb: the io context we want to submit the write with + * + * This is a variant of sb_start_write() for async io submission. + * Should be matched with a call to kiocb_end_write(). + */ +static inline void kiocb_start_write(struct kiocb *iocb) +{ + struct inode *inode = file_inode(iocb->ki_filp); + + sb_start_write(inode->i_sb); + /* + * Fool lockdep by telling it the lock got released so that it + * doesn't complain about the held lock when we return to userspace. + */ + __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); +} + +/** + * kiocb_end_write - drop write access to a superblock after async file io + * @iocb: the io context we sumbitted the write with + * + * Should be matched with a call to kiocb_start_write(). + */ +static inline void kiocb_end_write(struct kiocb *iocb) +{ + struct inode *inode = file_inode(iocb->ki_filp); + + /* + * Tell lockdep we inherited freeze protection from submission thread. + */ + __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); + sb_end_write(inode->i_sb); +} + /* * This is used for regular files where some users -- especially the * currently executed binary in a process, previously handled via From 0ed78d3a299edb8188bdd136b75d7ba7a12e1297 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 17 Aug 2023 17:13:34 +0300 Subject: [PATCH 012/174] io_uring: use kiocb_{start,end}_write() helpers [ Upstream commit e484fd73f4bdcb00c2188100c2d84e9f3f5c9f7d ] Use helpers instead of the open coded dance to silence lockdep warnings. Suggested-by: Jan Kara Signed-off-by: Amir Goldstein Reviewed-by: Jan Kara Reviewed-by: Jens Axboe Message-Id: <20230817141337.1025891-5-amir73il@gmail.com> Signed-off-by: Christian Brauner Stable-dep-of: 1d60d74e8526 ("io_uring/rw: fix missing NOWAIT check for O_DIRECT start write") Signed-off-by: Sasha Levin --- io_uring/rw.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index 4eb42fc29c15..c15c7873813b 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -222,15 +222,10 @@ static bool io_rw_should_reissue(struct io_kiocb *req) static void io_req_end_write(struct io_kiocb *req) { - /* - * Tell lockdep we inherited freeze protection from submission - * thread. - */ if (req->flags & REQ_F_ISREG) { - struct super_block *sb = file_inode(req->file)->i_sb; + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); - __sb_writers_acquired(sb, SB_FREEZE_WRITE); - sb_end_write(sb); + kiocb_end_write(&rw->kiocb); } } @@ -897,18 +892,8 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) return ret; } - /* - * Open-code file_start_write here to grab freeze protection, - * which will be released by another thread in - * io_complete_rw(). Fool lockdep by telling it the lock got - * released so that it doesn't complain about the held lock when - * we return to userspace. - */ - if (req->flags & REQ_F_ISREG) { - sb_start_write(file_inode(req->file)->i_sb); - __sb_writers_release(file_inode(req->file)->i_sb, - SB_FREEZE_WRITE); - } + if (req->flags & REQ_F_ISREG) + kiocb_start_write(kiocb); kiocb->ki_flags |= IOCB_WRITE; if (likely(req->file->f_op->write_iter)) From 9e8debb8e51354b201db494689198078ec2c1e75 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 31 Oct 2024 08:05:44 -0600 Subject: [PATCH 013/174] io_uring/rw: fix missing NOWAIT check for O_DIRECT start write [ Upstream commit 1d60d74e852647255bd8e76f5a22dc42531e4389 ] When io_uring starts a write, it'll call kiocb_start_write() to bump the super block rwsem, preventing any freezes from happening while that write is in-flight. The freeze side will grab that rwsem for writing, excluding any new writers from happening and waiting for existing writes to finish. But io_uring unconditionally uses kiocb_start_write(), which will block if someone is currently attempting to freeze the mount point. This causes a deadlock where freeze is waiting for previous writes to complete, but the previous writes cannot complete, as the task that is supposed to complete them is blocked waiting on starting a new write. This results in the following stuck trace showing that dependency with the write blocked starting a new write: task:fio state:D stack:0 pid:886 tgid:886 ppid:876 Call trace: __switch_to+0x1d8/0x348 __schedule+0x8e8/0x2248 schedule+0x110/0x3f0 percpu_rwsem_wait+0x1e8/0x3f8 __percpu_down_read+0xe8/0x500 io_write+0xbb8/0xff8 io_issue_sqe+0x10c/0x1020 io_submit_sqes+0x614/0x2110 __arm64_sys_io_uring_enter+0x524/0x1038 invoke_syscall+0x74/0x268 el0_svc_common.constprop.0+0x160/0x238 do_el0_svc+0x44/0x60 el0_svc+0x44/0xb0 el0t_64_sync_handler+0x118/0x128 el0t_64_sync+0x168/0x170 INFO: task fsfreeze:7364 blocked for more than 15 seconds. Not tainted 6.12.0-rc5-00063-g76aaf945701c #7963 with the attempting freezer stuck trying to grab the rwsem: task:fsfreeze state:D stack:0 pid:7364 tgid:7364 ppid:995 Call trace: __switch_to+0x1d8/0x348 __schedule+0x8e8/0x2248 schedule+0x110/0x3f0 percpu_down_write+0x2b0/0x680 freeze_super+0x248/0x8a8 do_vfs_ioctl+0x149c/0x1b18 __arm64_sys_ioctl+0xd0/0x1a0 invoke_syscall+0x74/0x268 el0_svc_common.constprop.0+0x160/0x238 do_el0_svc+0x44/0x60 el0_svc+0x44/0xb0 el0t_64_sync_handler+0x118/0x128 el0t_64_sync+0x168/0x170 Fix this by having the io_uring side honor IOCB_NOWAIT, and only attempt a blocking grab of the super block rwsem if it isn't set. For normal issue where IOCB_NOWAIT would always be set, this returns -EAGAIN which will have io_uring core issue a blocking attempt of the write. That will in turn also get completions run, ensuring forward progress. Since freezing requires CAP_SYS_ADMIN in the first place, this isn't something that can be triggered by a regular user. Cc: stable@vger.kernel.org # 5.10+ Reported-by: Peter Mann Link: https://lore.kernel.org/io-uring/38c94aec-81c9-4f62-b44e-1d87f5597644@sh.cz Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/rw.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index c15c7873813b..9d6e17a244ae 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -839,6 +839,25 @@ done: return kiocb_done(req, ret, issue_flags); } +static bool io_kiocb_start_write(struct io_kiocb *req, struct kiocb *kiocb) +{ + struct inode *inode; + bool ret; + + if (!(req->flags & REQ_F_ISREG)) + return true; + if (!(kiocb->ki_flags & IOCB_NOWAIT)) { + kiocb_start_write(kiocb); + return true; + } + + inode = file_inode(kiocb->ki_filp); + ret = sb_start_write_trylock(inode->i_sb); + if (ret) + __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); + return ret; +} + int io_write(struct io_kiocb *req, unsigned int issue_flags) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); @@ -892,8 +911,8 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) return ret; } - if (req->flags & REQ_F_ISREG) - kiocb_start_write(kiocb); + if (unlikely(!io_kiocb_start_write(req, kiocb))) + return -EAGAIN; kiocb->ki_flags |= IOCB_WRITE; if (likely(req->file->f_op->write_iter)) From 01a0c928832cf979205da9cc58d2e7d2fb9c65ae Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 24 Oct 2022 16:34:22 +0800 Subject: [PATCH 014/174] mm: migrate: try again if THP split is failed due to page refcnt [ Upstream commit fd4a7ac32918d3d7a2d17dc06c5520f45e36eb52 ] When creating a virtual machine, we will use memfd_create() to get a file descriptor which can be used to create share memory mappings using the mmap function, meanwhile the mmap() will set the MAP_POPULATE flag to allocate physical pages for the virtual machine. When allocating physical pages for the guest, the host can fallback to allocate some CMA pages for the guest when over half of the zone's free memory is in the CMA area. In guest os, when the application wants to do some data transaction with DMA, our QEMU will call VFIO_IOMMU_MAP_DMA ioctl to do longterm-pin and create IOMMU mappings for the DMA pages. However, when calling VFIO_IOMMU_MAP_DMA ioctl to pin the physical pages, we found it will be failed to longterm-pin sometimes. After some invetigation, we found the pages used to do DMA mapping can contain some CMA pages, and these CMA pages will cause a possible failure of the longterm-pin, due to failed to migrate the CMA pages. The reason of migration failure may be temporary reference count or memory allocation failure. So that will cause the VFIO_IOMMU_MAP_DMA ioctl returns error, which makes the application failed to start. I observed one migration failure case (which is not easy to reproduce) is that, the 'thp_migration_fail' count is 1 and the 'thp_split_page_failed' count is also 1. That means when migrating a THP which is in CMA area, but can not allocate a new THP due to memory fragmentation, so it will split the THP. However THP split is also failed, probably the reason is temporary reference count of this THP. And the temporary reference count can be caused by dropping page caches (I observed the drop caches operation in the system), but we can not drop the shmem page caches due to they are already dirty at that time. Especially for THP split failure, which is caused by temporary reference count, we can try again to mitigate the failure of migration in this case according to previous discussion [1]. [1] https://lore.kernel.org/all/470dc638-a300-f261-94b4-e27250e42f96@redhat.com/ Link: https://lkml.kernel.org/r/6784730480a1df82e8f4cba1ed088e4ac767994b.1666599848.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Reviewed-by: "Huang, Ying" Cc: Alistair Popple Cc: David Hildenbrand Cc: Yang Shi Cc: Zi Yan Signed-off-by: Andrew Morton Stable-dep-of: 35e41024c4c2 ("vmscan,migrate: fix page count imbalance on node stats when demoting pages") Signed-off-by: Sasha Levin --- mm/huge_memory.c | 4 ++-- mm/migrate.c | 19 ++++++++++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 98a1a05f2db2..f53bc54dacb3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2728,7 +2728,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) * split PMDs */ if (!can_split_folio(folio, &extra_pins)) { - ret = -EBUSY; + ret = -EAGAIN; goto out_unlock; } @@ -2780,7 +2780,7 @@ fail: xas_unlock(&xas); local_irq_enable(); remap_page(folio, folio_nr_pages(folio)); - ret = -EBUSY; + ret = -EAGAIN; } out_unlock: diff --git a/mm/migrate.c b/mm/migrate.c index 0252aa4ff572..b0caa89e67d5 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1518,9 +1518,22 @@ thp_subpage_migration: if (is_thp) { nr_thp_failed++; /* THP NUMA faulting doesn't split THP to retry. */ - if (!nosplit && !try_split_thp(page, &thp_split_pages)) { - nr_thp_split++; - break; + if (!nosplit) { + int ret = try_split_thp(page, &thp_split_pages); + + if (!ret) { + nr_thp_split++; + break; + } else if (reason == MR_LONGTERM_PIN && + ret == -EAGAIN) { + /* + * Try again to split THP to mitigate + * the failure of longterm pinning. + */ + thp_retry++; + nr_retry_pages += nr_subpages; + break; + } } } else if (!no_subpage_counting) { nr_failed++; From b0030b869954aba34be2e7d0f5de86564cf1c418 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 9 Nov 2022 09:23:47 +0800 Subject: [PATCH 015/174] migrate: convert unmap_and_move() to use folios [ Upstream commit 49f51859221a3dfee27488eaeaff800459cac6a9 ] Patch series "migrate: convert migrate_pages()/unmap_and_move() to use folios", v2. The conversion is quite straightforward, just replace the page API to the corresponding folio API. migrate_pages() and unmap_and_move() mostly work with folios (head pages) only. This patch (of 2): Quite straightforward, the page functions are converted to corresponding folio functions. Same for comments. Link: https://lkml.kernel.org/r/20221109012348.93849-1-ying.huang@intel.com Link: https://lkml.kernel.org/r/20221109012348.93849-2-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reviewed-by: Yang Shi Reviewed-by: Zi Yan Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Baolin Wang Cc: Oscar Salvador Signed-off-by: Andrew Morton Stable-dep-of: 35e41024c4c2 ("vmscan,migrate: fix page count imbalance on node stats when demoting pages") Signed-off-by: Sasha Levin --- mm/migrate.c | 54 ++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index b0caa89e67d5..16b456b927c1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1162,79 +1162,79 @@ out: } /* - * Obtain the lock on page, remove all ptes and migrate the page - * to the newly allocated page in newpage. + * Obtain the lock on folio, remove all ptes and migrate the folio + * to the newly allocated folio in dst. */ static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page, - unsigned long private, struct page *page, + unsigned long private, struct folio *src, int force, enum migrate_mode mode, enum migrate_reason reason, struct list_head *ret) { - struct folio *dst, *src = page_folio(page); + struct folio *dst; int rc = MIGRATEPAGE_SUCCESS; struct page *newpage = NULL; - if (!thp_migration_supported() && PageTransHuge(page)) + if (!thp_migration_supported() && folio_test_transhuge(src)) return -ENOSYS; - if (page_count(page) == 1) { - /* Page was freed from under us. So we are done. */ - ClearPageActive(page); - ClearPageUnevictable(page); + if (folio_ref_count(src) == 1) { + /* Folio was freed from under us. So we are done. */ + folio_clear_active(src); + folio_clear_unevictable(src); /* free_pages_prepare() will clear PG_isolated. */ goto out; } - newpage = get_new_page(page, private); + newpage = get_new_page(&src->page, private); if (!newpage) return -ENOMEM; dst = page_folio(newpage); - newpage->private = 0; + dst->private = 0; rc = __unmap_and_move(src, dst, force, mode); if (rc == MIGRATEPAGE_SUCCESS) - set_page_owner_migrate_reason(newpage, reason); + set_page_owner_migrate_reason(&dst->page, reason); out: if (rc != -EAGAIN) { /* - * A page that has been migrated has all references - * removed and will be freed. A page that has not been + * A folio that has been migrated has all references + * removed and will be freed. A folio that has not been * migrated will have kept its references and be restored. */ - list_del(&page->lru); + list_del(&src->lru); } /* * If migration is successful, releases reference grabbed during - * isolation. Otherwise, restore the page to right list unless + * isolation. Otherwise, restore the folio to right list unless * we want to retry. */ if (rc == MIGRATEPAGE_SUCCESS) { /* - * Compaction can migrate also non-LRU pages which are + * Compaction can migrate also non-LRU folios which are * not accounted to NR_ISOLATED_*. They can be recognized - * as __PageMovable + * as __folio_test_movable */ - if (likely(!__PageMovable(page))) - mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + - page_is_file_lru(page), -thp_nr_pages(page)); + if (likely(!__folio_test_movable(src))) + mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON + + folio_is_file_lru(src), -folio_nr_pages(src)); if (reason != MR_MEMORY_FAILURE) /* - * We release the page in page_handle_poison. + * We release the folio in page_handle_poison. */ - put_page(page); + folio_put(src); } else { if (rc != -EAGAIN) - list_add_tail(&page->lru, ret); + list_add_tail(&src->lru, ret); if (put_new_page) - put_new_page(newpage, private); + put_new_page(&dst->page, private); else - put_page(newpage); + folio_put(dst); } return rc; @@ -1471,7 +1471,7 @@ thp_subpage_migration: &ret_pages); else rc = unmap_and_move(get_new_page, put_new_page, - private, page, pass > 2, mode, + private, page_folio(page), pass > 2, mode, reason, &ret_pages); /* * The rules are: From 2a4b092d91ac8a51f23845c6dae684a757571891 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 9 Nov 2022 09:23:48 +0800 Subject: [PATCH 016/174] migrate: convert migrate_pages() to use folios [ Upstream commit eaec4e639f11413ce75fbf38affd1aa5c40979e9 ] Quite straightforward, the page functions are converted to corresponding folio functions. Same for comments. THP specific code are converted to be large folio. Link: https://lkml.kernel.org/r/20221109012348.93849-3-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Zi Yan Cc: Yang Shi Cc: Oscar Salvador Cc: Matthew Wilcox Signed-off-by: Andrew Morton Stable-dep-of: 35e41024c4c2 ("vmscan,migrate: fix page count imbalance on node stats when demoting pages") Signed-off-by: Sasha Levin --- mm/migrate.c | 210 +++++++++++++++++++++++++++------------------------ 1 file changed, 112 insertions(+), 98 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 16b456b927c1..562f819dc618 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1385,231 +1385,245 @@ out: return rc; } -static inline int try_split_thp(struct page *page, struct list_head *split_pages) +static inline int try_split_folio(struct folio *folio, struct list_head *split_folios) { int rc; - lock_page(page); - rc = split_huge_page_to_list(page, split_pages); - unlock_page(page); + folio_lock(folio); + rc = split_folio_to_list(folio, split_folios); + folio_unlock(folio); if (!rc) - list_move_tail(&page->lru, split_pages); + list_move_tail(&folio->lru, split_folios); return rc; } /* - * migrate_pages - migrate the pages specified in a list, to the free pages + * migrate_pages - migrate the folios specified in a list, to the free folios * supplied as the target for the page migration * - * @from: The list of pages to be migrated. - * @get_new_page: The function used to allocate free pages to be used - * as the target of the page migration. - * @put_new_page: The function used to free target pages if migration + * @from: The list of folios to be migrated. + * @get_new_page: The function used to allocate free folios to be used + * as the target of the folio migration. + * @put_new_page: The function used to free target folios if migration * fails, or NULL if no special handling is necessary. * @private: Private data to be passed on to get_new_page() * @mode: The migration mode that specifies the constraints for - * page migration, if any. - * @reason: The reason for page migration. - * @ret_succeeded: Set to the number of normal pages migrated successfully if + * folio migration, if any. + * @reason: The reason for folio migration. + * @ret_succeeded: Set to the number of folios migrated successfully if * the caller passes a non-NULL pointer. * - * The function returns after 10 attempts or if no pages are movable any more - * because the list has become empty or no retryable pages exist any more. - * It is caller's responsibility to call putback_movable_pages() to return pages + * The function returns after 10 attempts or if no folios are movable any more + * because the list has become empty or no retryable folios exist any more. + * It is caller's responsibility to call putback_movable_pages() to return folios * to the LRU or free list only if ret != 0. * - * Returns the number of {normal page, THP, hugetlb} that were not migrated, or - * an error code. The number of THP splits will be considered as the number of - * non-migrated THP, no matter how many subpages of the THP are migrated successfully. + * Returns the number of {normal folio, large folio, hugetlb} that were not + * migrated, or an error code. The number of large folio splits will be + * considered as the number of non-migrated large folio, no matter how many + * split folios of the large folio are migrated successfully. */ int migrate_pages(struct list_head *from, new_page_t get_new_page, free_page_t put_new_page, unsigned long private, enum migrate_mode mode, int reason, unsigned int *ret_succeeded) { int retry = 1; + int large_retry = 1; int thp_retry = 1; int nr_failed = 0; int nr_failed_pages = 0; int nr_retry_pages = 0; int nr_succeeded = 0; int nr_thp_succeeded = 0; + int nr_large_failed = 0; int nr_thp_failed = 0; int nr_thp_split = 0; int pass = 0; + bool is_large = false; bool is_thp = false; - struct page *page; - struct page *page2; - int rc, nr_subpages; - LIST_HEAD(ret_pages); - LIST_HEAD(thp_split_pages); + struct folio *folio, *folio2; + int rc, nr_pages; + LIST_HEAD(ret_folios); + LIST_HEAD(split_folios); bool nosplit = (reason == MR_NUMA_MISPLACED); - bool no_subpage_counting = false; + bool no_split_folio_counting = false; trace_mm_migrate_pages_start(mode, reason); -thp_subpage_migration: - for (pass = 0; pass < 10 && (retry || thp_retry); pass++) { +split_folio_migration: + for (pass = 0; pass < 10 && (retry || large_retry); pass++) { retry = 0; + large_retry = 0; thp_retry = 0; nr_retry_pages = 0; - list_for_each_entry_safe(page, page2, from, lru) { + list_for_each_entry_safe(folio, folio2, from, lru) { /* - * THP statistics is based on the source huge page. - * Capture required information that might get lost - * during migration. + * Large folio statistics is based on the source large + * folio. Capture required information that might get + * lost during migration. */ - is_thp = PageTransHuge(page) && !PageHuge(page); - nr_subpages = compound_nr(page); + is_large = folio_test_large(folio) && !folio_test_hugetlb(folio); + is_thp = is_large && folio_test_pmd_mappable(folio); + nr_pages = folio_nr_pages(folio); cond_resched(); - if (PageHuge(page)) + if (folio_test_hugetlb(folio)) rc = unmap_and_move_huge_page(get_new_page, - put_new_page, private, page, - pass > 2, mode, reason, - &ret_pages); + put_new_page, private, + &folio->page, pass > 2, mode, + reason, + &ret_folios); else rc = unmap_and_move(get_new_page, put_new_page, - private, page_folio(page), pass > 2, mode, - reason, &ret_pages); + private, folio, pass > 2, mode, + reason, &ret_folios); /* * The rules are: - * Success: non hugetlb page will be freed, hugetlb - * page will be put back + * Success: non hugetlb folio will be freed, hugetlb + * folio will be put back * -EAGAIN: stay on the from list * -ENOMEM: stay on the from list * -ENOSYS: stay on the from list - * Other errno: put on ret_pages list then splice to + * Other errno: put on ret_folios list then splice to * from list */ switch(rc) { /* - * THP migration might be unsupported or the - * allocation could've failed so we should - * retry on the same page with the THP split - * to base pages. + * Large folio migration might be unsupported or + * the allocation could've failed so we should retry + * on the same folio with the large folio split + * to normal folios. * - * Sub-pages are put in thp_split_pages, and + * Split folios are put in split_folios, and * we will migrate them after the rest of the * list is processed. */ case -ENOSYS: - /* THP migration is unsupported */ - if (is_thp) { - nr_thp_failed++; - if (!try_split_thp(page, &thp_split_pages)) { - nr_thp_split++; + /* Large folio migration is unsupported */ + if (is_large) { + nr_large_failed++; + nr_thp_failed += is_thp; + if (!try_split_folio(folio, &split_folios)) { + nr_thp_split += is_thp; break; } /* Hugetlb migration is unsupported */ - } else if (!no_subpage_counting) { + } else if (!no_split_folio_counting) { nr_failed++; } - nr_failed_pages += nr_subpages; - list_move_tail(&page->lru, &ret_pages); + nr_failed_pages += nr_pages; + list_move_tail(&folio->lru, &ret_folios); break; case -ENOMEM: /* * When memory is low, don't bother to try to migrate - * other pages, just exit. + * other folios, just exit. */ - if (is_thp) { - nr_thp_failed++; - /* THP NUMA faulting doesn't split THP to retry. */ + if (is_large) { + nr_large_failed++; + nr_thp_failed += is_thp; + /* Large folio NUMA faulting doesn't split to retry. */ if (!nosplit) { - int ret = try_split_thp(page, &thp_split_pages); + int ret = try_split_folio(folio, &split_folios); if (!ret) { - nr_thp_split++; + nr_thp_split += is_thp; break; } else if (reason == MR_LONGTERM_PIN && ret == -EAGAIN) { /* - * Try again to split THP to mitigate - * the failure of longterm pinning. + * Try again to split large folio to + * mitigate the failure of longterm pinning. */ - thp_retry++; - nr_retry_pages += nr_subpages; + large_retry++; + thp_retry += is_thp; + nr_retry_pages += nr_pages; break; } } - } else if (!no_subpage_counting) { + } else if (!no_split_folio_counting) { nr_failed++; } - nr_failed_pages += nr_subpages + nr_retry_pages; + nr_failed_pages += nr_pages + nr_retry_pages; /* - * There might be some subpages of fail-to-migrate THPs - * left in thp_split_pages list. Move them back to migration + * There might be some split folios of fail-to-migrate large + * folios left in split_folios list. Move them back to migration * list so that they could be put back to the right list by - * the caller otherwise the page refcnt will be leaked. + * the caller otherwise the folio refcnt will be leaked. */ - list_splice_init(&thp_split_pages, from); + list_splice_init(&split_folios, from); /* nr_failed isn't updated for not used */ + nr_large_failed += large_retry; nr_thp_failed += thp_retry; goto out; case -EAGAIN: - if (is_thp) - thp_retry++; - else if (!no_subpage_counting) + if (is_large) { + large_retry++; + thp_retry += is_thp; + } else if (!no_split_folio_counting) { retry++; - nr_retry_pages += nr_subpages; + } + nr_retry_pages += nr_pages; break; case MIGRATEPAGE_SUCCESS: - nr_succeeded += nr_subpages; - if (is_thp) - nr_thp_succeeded++; + nr_succeeded += nr_pages; + nr_thp_succeeded += is_thp; break; default: /* * Permanent failure (-EBUSY, etc.): - * unlike -EAGAIN case, the failed page is - * removed from migration page list and not + * unlike -EAGAIN case, the failed folio is + * removed from migration folio list and not * retried in the next outer loop. */ - if (is_thp) - nr_thp_failed++; - else if (!no_subpage_counting) + if (is_large) { + nr_large_failed++; + nr_thp_failed += is_thp; + } else if (!no_split_folio_counting) { nr_failed++; + } - nr_failed_pages += nr_subpages; + nr_failed_pages += nr_pages; break; } } } nr_failed += retry; + nr_large_failed += large_retry; nr_thp_failed += thp_retry; nr_failed_pages += nr_retry_pages; /* - * Try to migrate subpages of fail-to-migrate THPs, no nr_failed - * counting in this round, since all subpages of a THP is counted - * as 1 failure in the first round. + * Try to migrate split folios of fail-to-migrate large folios, no + * nr_failed counting in this round, since all split folios of a + * large folio is counted as 1 failure in the first round. */ - if (!list_empty(&thp_split_pages)) { + if (!list_empty(&split_folios)) { /* - * Move non-migrated pages (after 10 retries) to ret_pages + * Move non-migrated folios (after 10 retries) to ret_folios * to avoid migrating them again. */ - list_splice_init(from, &ret_pages); - list_splice_init(&thp_split_pages, from); - no_subpage_counting = true; + list_splice_init(from, &ret_folios); + list_splice_init(&split_folios, from); + no_split_folio_counting = true; retry = 1; - goto thp_subpage_migration; + goto split_folio_migration; } - rc = nr_failed + nr_thp_failed; + rc = nr_failed + nr_large_failed; out: /* - * Put the permanent failure page back to migration list, they + * Put the permanent failure folio back to migration list, they * will be put back to the right list by the caller. */ - list_splice(&ret_pages, from); + list_splice(&ret_folios, from); /* - * Return 0 in case all subpages of fail-to-migrate THPs are - * migrated successfully. + * Return 0 in case all split folios of fail-to-migrate large folios + * are migrated successfully. */ if (list_empty(from)) rc = 0; From de0a1554a45428ad0e10340d642608ce62d61c74 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 16 Nov 2022 09:23:45 +0800 Subject: [PATCH 017/174] mm/migrate.c: stop using 0 as NULL pointer [ Upstream commit 4c74b65f478dc9353780a6be17fc82f1b06cea80 ] mm/migrate.c:1198:24: warning: Using plain integer as NULL pointer Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3080 Link: https://lkml.kernel.org/r/20221116012345.84870-1-yang.lee@linux.alibaba.com Signed-off-by: Yang Li Reported-by: Abaci Robot Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton Stable-dep-of: 35e41024c4c2 ("vmscan,migrate: fix page count imbalance on node stats when demoting pages") Signed-off-by: Sasha Levin --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 562f819dc618..81444abf54db 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1192,7 +1192,7 @@ static int unmap_and_move(new_page_t get_new_page, return -ENOMEM; dst = page_folio(newpage); - dst->private = 0; + dst->private = NULL; rc = __unmap_and_move(src, dst, force, mode); if (rc == MIGRATEPAGE_SUCCESS) set_page_owner_migrate_reason(&dst->page, reason); From 6058d02a81a79cfd99a18f50d13df7494569286b Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 13 Feb 2023 20:34:36 +0800 Subject: [PATCH 018/174] migrate_pages: organize stats with struct migrate_pages_stats [ Upstream commit 5b855937096aea7f81e73ad6d40d433c9dd49577 ] Patch series "migrate_pages(): batch TLB flushing", v5. Now, migrate_pages() migrates folios one by one, like the fake code as follows, for each folio unmap flush TLB copy restore map If multiple folios are passed to migrate_pages(), there are opportunities to batch the TLB flushing and copying. That is, we can change the code to something as follows, for each folio unmap for each folio flush TLB for each folio copy for each folio restore map The total number of TLB flushing IPI can be reduced considerably. And we may use some hardware accelerator such as DSA to accelerate the folio copying. So in this patch, we refactor the migrate_pages() implementation and implement the TLB flushing batching. Base on this, hardware accelerated folio copying can be implemented. If too many folios are passed to migrate_pages(), in the naive batched implementation, we may unmap too many folios at the same time. The possibility for a task to wait for the migrated folios to be mapped again increases. So the latency may be hurt. To deal with this issue, the max number of folios be unmapped in batch is restricted to no more than HPAGE_PMD_NR in the unit of page. That is, the influence is at the same level of THP migration. We use the following test to measure the performance impact of the patchset, On a 2-socket Intel server, - Run pmbench memory accessing benchmark - Run `migratepages` to migrate pages of pmbench between node 0 and node 1 back and forth. With the patch, the TLB flushing IPI reduces 99.1% during the test and the number of pages migrated successfully per second increases 291.7%. Xin Hao helped to test the patchset on an ARM64 server with 128 cores, 2 NUMA nodes. Test results show that the page migration performance increases up to 78%. This patch (of 9): Define struct migrate_pages_stats to organize the various statistics in migrate_pages(). This makes it easier to collect and consume the statistics in multiple functions. This will be needed in the following patches in the series. Link: https://lkml.kernel.org/r/20230213123444.155149-1-ying.huang@intel.com Link: https://lkml.kernel.org/r/20230213123444.155149-2-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reviewed-by: Alistair Popple Reviewed-by: Zi Yan Reviewed-by: Baolin Wang Reviewed-by: Xin Hao Cc: Yang Shi Cc: Oscar Salvador Cc: Matthew Wilcox Cc: Bharata B Rao Cc: Minchan Kim Cc: Mike Kravetz Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Andrew Morton Stable-dep-of: 35e41024c4c2 ("vmscan,migrate: fix page count imbalance on node stats when demoting pages") Signed-off-by: Sasha Levin --- mm/migrate.c | 60 +++++++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 81444abf54db..b7596a0b4445 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1398,6 +1398,16 @@ static inline int try_split_folio(struct folio *folio, struct list_head *split_f return rc; } +struct migrate_pages_stats { + int nr_succeeded; /* Normal and large folios migrated successfully, in + units of base pages */ + int nr_failed_pages; /* Normal and large folios failed to be migrated, in + units of base pages. Untried folios aren't counted */ + int nr_thp_succeeded; /* THP migrated successfully */ + int nr_thp_failed; /* THP failed to be migrated */ + int nr_thp_split; /* THP split before migrating */ +}; + /* * migrate_pages - migrate the folios specified in a list, to the free folios * supplied as the target for the page migration @@ -1432,13 +1442,8 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, int large_retry = 1; int thp_retry = 1; int nr_failed = 0; - int nr_failed_pages = 0; int nr_retry_pages = 0; - int nr_succeeded = 0; - int nr_thp_succeeded = 0; int nr_large_failed = 0; - int nr_thp_failed = 0; - int nr_thp_split = 0; int pass = 0; bool is_large = false; bool is_thp = false; @@ -1448,9 +1453,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, LIST_HEAD(split_folios); bool nosplit = (reason == MR_NUMA_MISPLACED); bool no_split_folio_counting = false; + struct migrate_pages_stats stats; trace_mm_migrate_pages_start(mode, reason); + memset(&stats, 0, sizeof(stats)); split_folio_migration: for (pass = 0; pass < 10 && (retry || large_retry); pass++) { retry = 0; @@ -1504,9 +1511,9 @@ split_folio_migration: /* Large folio migration is unsupported */ if (is_large) { nr_large_failed++; - nr_thp_failed += is_thp; + stats.nr_thp_failed += is_thp; if (!try_split_folio(folio, &split_folios)) { - nr_thp_split += is_thp; + stats.nr_thp_split += is_thp; break; } /* Hugetlb migration is unsupported */ @@ -1514,7 +1521,7 @@ split_folio_migration: nr_failed++; } - nr_failed_pages += nr_pages; + stats.nr_failed_pages += nr_pages; list_move_tail(&folio->lru, &ret_folios); break; case -ENOMEM: @@ -1524,13 +1531,13 @@ split_folio_migration: */ if (is_large) { nr_large_failed++; - nr_thp_failed += is_thp; + stats.nr_thp_failed += is_thp; /* Large folio NUMA faulting doesn't split to retry. */ if (!nosplit) { int ret = try_split_folio(folio, &split_folios); if (!ret) { - nr_thp_split += is_thp; + stats.nr_thp_split += is_thp; break; } else if (reason == MR_LONGTERM_PIN && ret == -EAGAIN) { @@ -1548,7 +1555,7 @@ split_folio_migration: nr_failed++; } - nr_failed_pages += nr_pages + nr_retry_pages; + stats.nr_failed_pages += nr_pages + nr_retry_pages; /* * There might be some split folios of fail-to-migrate large * folios left in split_folios list. Move them back to migration @@ -1558,7 +1565,7 @@ split_folio_migration: list_splice_init(&split_folios, from); /* nr_failed isn't updated for not used */ nr_large_failed += large_retry; - nr_thp_failed += thp_retry; + stats.nr_thp_failed += thp_retry; goto out; case -EAGAIN: if (is_large) { @@ -1570,8 +1577,8 @@ split_folio_migration: nr_retry_pages += nr_pages; break; case MIGRATEPAGE_SUCCESS: - nr_succeeded += nr_pages; - nr_thp_succeeded += is_thp; + stats.nr_succeeded += nr_pages; + stats.nr_thp_succeeded += is_thp; break; default: /* @@ -1582,20 +1589,20 @@ split_folio_migration: */ if (is_large) { nr_large_failed++; - nr_thp_failed += is_thp; + stats.nr_thp_failed += is_thp; } else if (!no_split_folio_counting) { nr_failed++; } - nr_failed_pages += nr_pages; + stats.nr_failed_pages += nr_pages; break; } } } nr_failed += retry; nr_large_failed += large_retry; - nr_thp_failed += thp_retry; - nr_failed_pages += nr_retry_pages; + stats.nr_thp_failed += thp_retry; + stats.nr_failed_pages += nr_retry_pages; /* * Try to migrate split folios of fail-to-migrate large folios, no * nr_failed counting in this round, since all split folios of a @@ -1628,16 +1635,17 @@ out: if (list_empty(from)) rc = 0; - count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded); - count_vm_events(PGMIGRATE_FAIL, nr_failed_pages); - count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded); - count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed); - count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split); - trace_mm_migrate_pages(nr_succeeded, nr_failed_pages, nr_thp_succeeded, - nr_thp_failed, nr_thp_split, mode, reason); + count_vm_events(PGMIGRATE_SUCCESS, stats.nr_succeeded); + count_vm_events(PGMIGRATE_FAIL, stats.nr_failed_pages); + count_vm_events(THP_MIGRATION_SUCCESS, stats.nr_thp_succeeded); + count_vm_events(THP_MIGRATION_FAIL, stats.nr_thp_failed); + count_vm_events(THP_MIGRATION_SPLIT, stats.nr_thp_split); + trace_mm_migrate_pages(stats.nr_succeeded, stats.nr_failed_pages, + stats.nr_thp_succeeded, stats.nr_thp_failed, + stats.nr_thp_split, mode, reason); if (ret_succeeded) - *ret_succeeded = nr_succeeded; + *ret_succeeded = stats.nr_succeeded; return rc; } From 1145493ce58492846ffbf06f7255784251c3e94a Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 13 Feb 2023 20:34:37 +0800 Subject: [PATCH 019/174] migrate_pages: separate hugetlb folios migration [ Upstream commit e5bfff8b10e496378da4b7863479dd6fb907d4ea ] This is a preparation patch to batch the folio unmapping and moving for the non-hugetlb folios. Based on that we can batch the TLB shootdown during the folio migration and make it possible to use some hardware accelerator for the folio copying. In this patch the hugetlb folios and non-hugetlb folios migration is separated in migrate_pages() to make it easy to change the non-hugetlb folios migration implementation. Link: https://lkml.kernel.org/r/20230213123444.155149-3-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reviewed-by: Baolin Wang Reviewed-by: Xin Hao Cc: Zi Yan Cc: Yang Shi Cc: Oscar Salvador Cc: Matthew Wilcox Cc: Bharata B Rao Cc: Alistair Popple Cc: Minchan Kim Cc: Mike Kravetz Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Andrew Morton Stable-dep-of: 35e41024c4c2 ("vmscan,migrate: fix page count imbalance on node stats when demoting pages") Signed-off-by: Sasha Levin --- mm/migrate.c | 141 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 119 insertions(+), 22 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index b7596a0b4445..70d0b20d06a5 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1398,6 +1398,8 @@ static inline int try_split_folio(struct folio *folio, struct list_head *split_f return rc; } +#define NR_MAX_MIGRATE_PAGES_RETRY 10 + struct migrate_pages_stats { int nr_succeeded; /* Normal and large folios migrated successfully, in units of base pages */ @@ -1408,6 +1410,95 @@ struct migrate_pages_stats { int nr_thp_split; /* THP split before migrating */ }; +/* + * Returns the number of hugetlb folios that were not migrated, or an error code + * after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no hugetlb folios are movable + * any more because the list has become empty or no retryable hugetlb folios + * exist any more. It is caller's responsibility to call putback_movable_pages() + * only if ret != 0. + */ +static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page, + free_page_t put_new_page, unsigned long private, + enum migrate_mode mode, int reason, + struct migrate_pages_stats *stats, + struct list_head *ret_folios) +{ + int retry = 1; + int nr_failed = 0; + int nr_retry_pages = 0; + int pass = 0; + struct folio *folio, *folio2; + int rc, nr_pages; + + for (pass = 0; pass < NR_MAX_MIGRATE_PAGES_RETRY && retry; pass++) { + retry = 0; + nr_retry_pages = 0; + + list_for_each_entry_safe(folio, folio2, from, lru) { + if (!folio_test_hugetlb(folio)) + continue; + + nr_pages = folio_nr_pages(folio); + + cond_resched(); + + rc = unmap_and_move_huge_page(get_new_page, + put_new_page, private, + &folio->page, pass > 2, mode, + reason, ret_folios); + /* + * The rules are: + * Success: hugetlb folio will be put back + * -EAGAIN: stay on the from list + * -ENOMEM: stay on the from list + * -ENOSYS: stay on the from list + * Other errno: put on ret_folios list + */ + switch(rc) { + case -ENOSYS: + /* Hugetlb migration is unsupported */ + nr_failed++; + stats->nr_failed_pages += nr_pages; + list_move_tail(&folio->lru, ret_folios); + break; + case -ENOMEM: + /* + * When memory is low, don't bother to try to migrate + * other folios, just exit. + */ + stats->nr_failed_pages += nr_pages + nr_retry_pages; + return -ENOMEM; + case -EAGAIN: + retry++; + nr_retry_pages += nr_pages; + break; + case MIGRATEPAGE_SUCCESS: + stats->nr_succeeded += nr_pages; + break; + default: + /* + * Permanent failure (-EBUSY, etc.): + * unlike -EAGAIN case, the failed folio is + * removed from migration folio list and not + * retried in the next outer loop. + */ + nr_failed++; + stats->nr_failed_pages += nr_pages; + break; + } + } + } + /* + * nr_failed is number of hugetlb folios failed to be migrated. After + * NR_MAX_MIGRATE_PAGES_RETRY attempts, give up and count retried hugetlb + * folios as failed. + */ + nr_failed += retry; + stats->nr_failed_pages += nr_retry_pages; + + return nr_failed; +} + /* * migrate_pages - migrate the folios specified in a list, to the free folios * supplied as the target for the page migration @@ -1424,10 +1515,10 @@ struct migrate_pages_stats { * @ret_succeeded: Set to the number of folios migrated successfully if * the caller passes a non-NULL pointer. * - * The function returns after 10 attempts or if no folios are movable any more - * because the list has become empty or no retryable folios exist any more. - * It is caller's responsibility to call putback_movable_pages() to return folios - * to the LRU or free list only if ret != 0. + * The function returns after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no folios + * are movable any more because the list has become empty or no retryable folios + * exist any more. It is caller's responsibility to call putback_movable_pages() + * only if ret != 0. * * Returns the number of {normal folio, large folio, hugetlb} that were not * migrated, or an error code. The number of large folio splits will be @@ -1441,7 +1532,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, int retry = 1; int large_retry = 1; int thp_retry = 1; - int nr_failed = 0; + int nr_failed; int nr_retry_pages = 0; int nr_large_failed = 0; int pass = 0; @@ -1458,38 +1549,45 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, trace_mm_migrate_pages_start(mode, reason); memset(&stats, 0, sizeof(stats)); + rc = migrate_hugetlbs(from, get_new_page, put_new_page, private, mode, reason, + &stats, &ret_folios); + if (rc < 0) + goto out; + nr_failed = rc; + split_folio_migration: - for (pass = 0; pass < 10 && (retry || large_retry); pass++) { + for (pass = 0; + pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry); + pass++) { retry = 0; large_retry = 0; thp_retry = 0; nr_retry_pages = 0; list_for_each_entry_safe(folio, folio2, from, lru) { + /* Retried hugetlb folios will be kept in list */ + if (folio_test_hugetlb(folio)) { + list_move_tail(&folio->lru, &ret_folios); + continue; + } + /* * Large folio statistics is based on the source large * folio. Capture required information that might get * lost during migration. */ - is_large = folio_test_large(folio) && !folio_test_hugetlb(folio); + is_large = folio_test_large(folio); is_thp = is_large && folio_test_pmd_mappable(folio); nr_pages = folio_nr_pages(folio); + cond_resched(); - if (folio_test_hugetlb(folio)) - rc = unmap_and_move_huge_page(get_new_page, - put_new_page, private, - &folio->page, pass > 2, mode, - reason, - &ret_folios); - else - rc = unmap_and_move(get_new_page, put_new_page, - private, folio, pass > 2, mode, - reason, &ret_folios); + rc = unmap_and_move(get_new_page, put_new_page, + private, folio, pass > 2, mode, + reason, &ret_folios); /* * The rules are: - * Success: non hugetlb folio will be freed, hugetlb - * folio will be put back + * Success: folio will be freed * -EAGAIN: stay on the from list * -ENOMEM: stay on the from list * -ENOSYS: stay on the from list @@ -1516,7 +1614,6 @@ split_folio_migration: stats.nr_thp_split += is_thp; break; } - /* Hugetlb migration is unsupported */ } else if (!no_split_folio_counting) { nr_failed++; } @@ -1610,8 +1707,8 @@ split_folio_migration: */ if (!list_empty(&split_folios)) { /* - * Move non-migrated folios (after 10 retries) to ret_folios - * to avoid migrating them again. + * Move non-migrated folios (after NR_MAX_MIGRATE_PAGES_RETRY + * retries) to ret_folios to avoid migrating them again. */ list_splice_init(from, &ret_folios); list_splice_init(&split_folios, from); From f9e9725daffcbaafc815bc97101df84bd6f53ee5 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 13 Feb 2023 20:34:38 +0800 Subject: [PATCH 020/174] migrate_pages: restrict number of pages to migrate in batch [ Upstream commit 42012e0436d44aeb2e68f11a28ddd0ad3f38b61f ] This is a preparation patch to batch the folio unmapping and moving for non-hugetlb folios. If we had batched the folio unmapping, all folios to be migrated would be unmapped before copying the contents and flags of the folios. If the folios that were passed to migrate_pages() were too many in unit of pages, the execution of the processes would be stopped for too long time, thus too long latency. For example, migrate_pages() syscall will call migrate_pages() with all folios of a process. To avoid this possible issue, in this patch, we restrict the number of pages to be migrated to be no more than HPAGE_PMD_NR. That is, the influence is at the same level of THP migration. Link: https://lkml.kernel.org/r/20230213123444.155149-4-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reviewed-by: Baolin Wang Cc: Zi Yan Cc: Yang Shi Cc: Oscar Salvador Cc: Matthew Wilcox Cc: Bharata B Rao Cc: Alistair Popple Cc: Xin Hao Cc: Minchan Kim Cc: Mike Kravetz Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Andrew Morton Stable-dep-of: 35e41024c4c2 ("vmscan,migrate: fix page count imbalance on node stats when demoting pages") Signed-off-by: Sasha Levin --- mm/migrate.c | 400 ++++++++++++++++++++++++++++----------------------- 1 file changed, 219 insertions(+), 181 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 70d0b20d06a5..40ae91e1a026 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1398,6 +1398,11 @@ static inline int try_split_folio(struct folio *folio, struct list_head *split_f return rc; } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define NR_MAX_BATCHED_MIGRATION HPAGE_PMD_NR +#else +#define NR_MAX_BATCHED_MIGRATION 512 +#endif #define NR_MAX_MIGRATE_PAGES_RETRY 10 struct migrate_pages_stats { @@ -1499,6 +1504,186 @@ static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page, return nr_failed; } +static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, + free_page_t put_new_page, unsigned long private, + enum migrate_mode mode, int reason, struct list_head *ret_folios, + struct migrate_pages_stats *stats) +{ + int retry = 1; + int large_retry = 1; + int thp_retry = 1; + int nr_failed = 0; + int nr_retry_pages = 0; + int nr_large_failed = 0; + int pass = 0; + bool is_large = false; + bool is_thp = false; + struct folio *folio, *folio2; + int rc, nr_pages; + LIST_HEAD(split_folios); + bool nosplit = (reason == MR_NUMA_MISPLACED); + bool no_split_folio_counting = false; + +split_folio_migration: + for (pass = 0; + pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry); + pass++) { + retry = 0; + large_retry = 0; + thp_retry = 0; + nr_retry_pages = 0; + + list_for_each_entry_safe(folio, folio2, from, lru) { + /* + * Large folio statistics is based on the source large + * folio. Capture required information that might get + * lost during migration. + */ + is_large = folio_test_large(folio); + is_thp = is_large && folio_test_pmd_mappable(folio); + nr_pages = folio_nr_pages(folio); + + cond_resched(); + + rc = unmap_and_move(get_new_page, put_new_page, + private, folio, pass > 2, mode, + reason, ret_folios); + /* + * The rules are: + * Success: folio will be freed + * -EAGAIN: stay on the from list + * -ENOMEM: stay on the from list + * -ENOSYS: stay on the from list + * Other errno: put on ret_folios list + */ + switch(rc) { + /* + * Large folio migration might be unsupported or + * the allocation could've failed so we should retry + * on the same folio with the large folio split + * to normal folios. + * + * Split folios are put in split_folios, and + * we will migrate them after the rest of the + * list is processed. + */ + case -ENOSYS: + /* Large folio migration is unsupported */ + if (is_large) { + nr_large_failed++; + stats->nr_thp_failed += is_thp; + if (!try_split_folio(folio, &split_folios)) { + stats->nr_thp_split += is_thp; + break; + } + } else if (!no_split_folio_counting) { + nr_failed++; + } + + stats->nr_failed_pages += nr_pages; + list_move_tail(&folio->lru, ret_folios); + break; + case -ENOMEM: + /* + * When memory is low, don't bother to try to migrate + * other folios, just exit. + */ + if (is_large) { + nr_large_failed++; + stats->nr_thp_failed += is_thp; + /* Large folio NUMA faulting doesn't split to retry. */ + if (!nosplit) { + int ret = try_split_folio(folio, &split_folios); + + if (!ret) { + stats->nr_thp_split += is_thp; + break; + } else if (reason == MR_LONGTERM_PIN && + ret == -EAGAIN) { + /* + * Try again to split large folio to + * mitigate the failure of longterm pinning. + */ + large_retry++; + thp_retry += is_thp; + nr_retry_pages += nr_pages; + break; + } + } + } else if (!no_split_folio_counting) { + nr_failed++; + } + + stats->nr_failed_pages += nr_pages + nr_retry_pages; + /* + * There might be some split folios of fail-to-migrate large + * folios left in split_folios list. Move them to ret_folios + * list so that they could be put back to the right list by + * the caller otherwise the folio refcnt will be leaked. + */ + list_splice_init(&split_folios, ret_folios); + /* nr_failed isn't updated for not used */ + nr_large_failed += large_retry; + stats->nr_thp_failed += thp_retry; + goto out; + case -EAGAIN: + if (is_large) { + large_retry++; + thp_retry += is_thp; + } else if (!no_split_folio_counting) { + retry++; + } + nr_retry_pages += nr_pages; + break; + case MIGRATEPAGE_SUCCESS: + stats->nr_succeeded += nr_pages; + stats->nr_thp_succeeded += is_thp; + break; + default: + /* + * Permanent failure (-EBUSY, etc.): + * unlike -EAGAIN case, the failed folio is + * removed from migration folio list and not + * retried in the next outer loop. + */ + if (is_large) { + nr_large_failed++; + stats->nr_thp_failed += is_thp; + } else if (!no_split_folio_counting) { + nr_failed++; + } + + stats->nr_failed_pages += nr_pages; + break; + } + } + } + nr_failed += retry; + nr_large_failed += large_retry; + stats->nr_thp_failed += thp_retry; + stats->nr_failed_pages += nr_retry_pages; + /* + * Try to migrate split folios of fail-to-migrate large folios, no + * nr_failed counting in this round, since all split folios of a + * large folio is counted as 1 failure in the first round. + */ + if (!list_empty(&split_folios)) { + /* + * Move non-migrated folios (after NR_MAX_MIGRATE_PAGES_RETRY + * retries) to ret_folios to avoid migrating them again. + */ + list_splice_init(from, ret_folios); + list_splice_init(&split_folios, from); + no_split_folio_counting = true; + retry = 1; + goto split_folio_migration; + } + + rc = nr_failed + nr_large_failed; +out: + return rc; +} + /* * migrate_pages - migrate the folios specified in a list, to the free folios * supplied as the target for the page migration @@ -1529,195 +1714,48 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, free_page_t put_new_page, unsigned long private, enum migrate_mode mode, int reason, unsigned int *ret_succeeded) { - int retry = 1; - int large_retry = 1; - int thp_retry = 1; - int nr_failed; - int nr_retry_pages = 0; - int nr_large_failed = 0; - int pass = 0; - bool is_large = false; - bool is_thp = false; + int rc, rc_gather; + int nr_pages; struct folio *folio, *folio2; - int rc, nr_pages; + LIST_HEAD(folios); LIST_HEAD(ret_folios); - LIST_HEAD(split_folios); - bool nosplit = (reason == MR_NUMA_MISPLACED); - bool no_split_folio_counting = false; struct migrate_pages_stats stats; trace_mm_migrate_pages_start(mode, reason); memset(&stats, 0, sizeof(stats)); - rc = migrate_hugetlbs(from, get_new_page, put_new_page, private, mode, reason, - &stats, &ret_folios); - if (rc < 0) + + rc_gather = migrate_hugetlbs(from, get_new_page, put_new_page, private, + mode, reason, &stats, &ret_folios); + if (rc_gather < 0) goto out; - nr_failed = rc; - -split_folio_migration: - for (pass = 0; - pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry); - pass++) { - retry = 0; - large_retry = 0; - thp_retry = 0; - nr_retry_pages = 0; - - list_for_each_entry_safe(folio, folio2, from, lru) { - /* Retried hugetlb folios will be kept in list */ - if (folio_test_hugetlb(folio)) { - list_move_tail(&folio->lru, &ret_folios); - continue; - } - - /* - * Large folio statistics is based on the source large - * folio. Capture required information that might get - * lost during migration. - */ - is_large = folio_test_large(folio); - is_thp = is_large && folio_test_pmd_mappable(folio); - nr_pages = folio_nr_pages(folio); - - cond_resched(); - - rc = unmap_and_move(get_new_page, put_new_page, - private, folio, pass > 2, mode, - reason, &ret_folios); - /* - * The rules are: - * Success: folio will be freed - * -EAGAIN: stay on the from list - * -ENOMEM: stay on the from list - * -ENOSYS: stay on the from list - * Other errno: put on ret_folios list then splice to - * from list - */ - switch(rc) { - /* - * Large folio migration might be unsupported or - * the allocation could've failed so we should retry - * on the same folio with the large folio split - * to normal folios. - * - * Split folios are put in split_folios, and - * we will migrate them after the rest of the - * list is processed. - */ - case -ENOSYS: - /* Large folio migration is unsupported */ - if (is_large) { - nr_large_failed++; - stats.nr_thp_failed += is_thp; - if (!try_split_folio(folio, &split_folios)) { - stats.nr_thp_split += is_thp; - break; - } - } else if (!no_split_folio_counting) { - nr_failed++; - } - - stats.nr_failed_pages += nr_pages; - list_move_tail(&folio->lru, &ret_folios); - break; - case -ENOMEM: - /* - * When memory is low, don't bother to try to migrate - * other folios, just exit. - */ - if (is_large) { - nr_large_failed++; - stats.nr_thp_failed += is_thp; - /* Large folio NUMA faulting doesn't split to retry. */ - if (!nosplit) { - int ret = try_split_folio(folio, &split_folios); - - if (!ret) { - stats.nr_thp_split += is_thp; - break; - } else if (reason == MR_LONGTERM_PIN && - ret == -EAGAIN) { - /* - * Try again to split large folio to - * mitigate the failure of longterm pinning. - */ - large_retry++; - thp_retry += is_thp; - nr_retry_pages += nr_pages; - break; - } - } - } else if (!no_split_folio_counting) { - nr_failed++; - } - - stats.nr_failed_pages += nr_pages + nr_retry_pages; - /* - * There might be some split folios of fail-to-migrate large - * folios left in split_folios list. Move them back to migration - * list so that they could be put back to the right list by - * the caller otherwise the folio refcnt will be leaked. - */ - list_splice_init(&split_folios, from); - /* nr_failed isn't updated for not used */ - nr_large_failed += large_retry; - stats.nr_thp_failed += thp_retry; - goto out; - case -EAGAIN: - if (is_large) { - large_retry++; - thp_retry += is_thp; - } else if (!no_split_folio_counting) { - retry++; - } - nr_retry_pages += nr_pages; - break; - case MIGRATEPAGE_SUCCESS: - stats.nr_succeeded += nr_pages; - stats.nr_thp_succeeded += is_thp; - break; - default: - /* - * Permanent failure (-EBUSY, etc.): - * unlike -EAGAIN case, the failed folio is - * removed from migration folio list and not - * retried in the next outer loop. - */ - if (is_large) { - nr_large_failed++; - stats.nr_thp_failed += is_thp; - } else if (!no_split_folio_counting) { - nr_failed++; - } - - stats.nr_failed_pages += nr_pages; - break; - } +again: + nr_pages = 0; + list_for_each_entry_safe(folio, folio2, from, lru) { + /* Retried hugetlb folios will be kept in list */ + if (folio_test_hugetlb(folio)) { + list_move_tail(&folio->lru, &ret_folios); + continue; } - } - nr_failed += retry; - nr_large_failed += large_retry; - stats.nr_thp_failed += thp_retry; - stats.nr_failed_pages += nr_retry_pages; - /* - * Try to migrate split folios of fail-to-migrate large folios, no - * nr_failed counting in this round, since all split folios of a - * large folio is counted as 1 failure in the first round. - */ - if (!list_empty(&split_folios)) { - /* - * Move non-migrated folios (after NR_MAX_MIGRATE_PAGES_RETRY - * retries) to ret_folios to avoid migrating them again. - */ - list_splice_init(from, &ret_folios); - list_splice_init(&split_folios, from); - no_split_folio_counting = true; - retry = 1; - goto split_folio_migration; - } - rc = nr_failed + nr_large_failed; + nr_pages += folio_nr_pages(folio); + if (nr_pages > NR_MAX_BATCHED_MIGRATION) + break; + } + if (nr_pages > NR_MAX_BATCHED_MIGRATION) + list_cut_before(&folios, from, &folio->lru); + else + list_splice_init(from, &folios); + rc = migrate_pages_batch(&folios, get_new_page, put_new_page, private, + mode, reason, &ret_folios, &stats); + list_splice_tail_init(&folios, &ret_folios); + if (rc < 0) { + rc_gather = rc; + goto out; + } + rc_gather += rc; + if (!list_empty(from)) + goto again; out: /* * Put the permanent failure folio back to migration list, they @@ -1730,7 +1768,7 @@ out: * are migrated successfully. */ if (list_empty(from)) - rc = 0; + rc_gather = 0; count_vm_events(PGMIGRATE_SUCCESS, stats.nr_succeeded); count_vm_events(PGMIGRATE_FAIL, stats.nr_failed_pages); @@ -1744,7 +1782,7 @@ out: if (ret_succeeded) *ret_succeeded = stats.nr_succeeded; - return rc; + return rc_gather; } struct page *alloc_migration_target(struct page *page, unsigned long private) From 8ca5f0ea52e1eef5e7dcbdff1d1afe602764ea93 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 13 Feb 2023 20:34:39 +0800 Subject: [PATCH 021/174] migrate_pages: split unmap_and_move() to _unmap() and _move() [ Upstream commit 64c8902ed4418317cd416c566f896bd4a92b2efc ] This is a preparation patch to batch the folio unmapping and moving. In this patch, unmap_and_move() is split to migrate_folio_unmap() and migrate_folio_move(). So, we can batch _unmap() and _move() in different loops later. To pass some information between unmap and move, the original unused dst->mapping and dst->private are used. Link: https://lkml.kernel.org/r/20230213123444.155149-5-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reviewed-by: Baolin Wang Reviewed-by: Xin Hao Cc: Zi Yan Cc: Yang Shi Cc: Oscar Salvador Cc: Matthew Wilcox Cc: Bharata B Rao Cc: Alistair Popple Cc: Minchan Kim Cc: Mike Kravetz Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Andrew Morton Stable-dep-of: 35e41024c4c2 ("vmscan,migrate: fix page count imbalance on node stats when demoting pages") Signed-off-by: Sasha Levin --- include/linux/migrate.h | 1 + mm/migrate.c | 169 ++++++++++++++++++++++++++++++---------- 2 files changed, 129 insertions(+), 41 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 3ef77f52a4f0..7376074f2e1e 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -18,6 +18,7 @@ struct migration_target_control; * - zero on page migration success; */ #define MIGRATEPAGE_SUCCESS 0 +#define MIGRATEPAGE_UNMAP 1 /** * struct movable_operations - Driver page migration diff --git a/mm/migrate.c b/mm/migrate.c index 40ae91e1a026..46a1476e188c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1011,11 +1011,53 @@ out: return rc; } -static int __unmap_and_move(struct folio *src, struct folio *dst, +/* + * To record some information during migration, we use some unused + * fields (mapping and private) of struct folio of the newly allocated + * destination folio. This is safe because nobody is using them + * except us. + */ +static void __migrate_folio_record(struct folio *dst, + unsigned long page_was_mapped, + struct anon_vma *anon_vma) +{ + dst->mapping = (void *)anon_vma; + dst->private = (void *)page_was_mapped; +} + +static void __migrate_folio_extract(struct folio *dst, + int *page_was_mappedp, + struct anon_vma **anon_vmap) +{ + *anon_vmap = (void *)dst->mapping; + *page_was_mappedp = (unsigned long)dst->private; + dst->mapping = NULL; + dst->private = NULL; +} + +/* Cleanup src folio upon migration success */ +static void migrate_folio_done(struct folio *src, + enum migrate_reason reason) +{ + /* + * Compaction can migrate also non-LRU pages which are + * not accounted to NR_ISOLATED_*. They can be recognized + * as __PageMovable + */ + if (likely(!__folio_test_movable(src))) + mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON + + folio_is_file_lru(src), -folio_nr_pages(src)); + + if (reason != MR_MEMORY_FAILURE) + /* We release the page in page_handle_poison. */ + folio_put(src); +} + +static int __migrate_folio_unmap(struct folio *src, struct folio *dst, int force, enum migrate_mode mode) { int rc = -EAGAIN; - bool page_was_mapped = false; + int page_was_mapped = 0; struct anon_vma *anon_vma = NULL; bool is_lru = !__PageMovable(&src->page); @@ -1091,8 +1133,8 @@ static int __unmap_and_move(struct folio *src, struct folio *dst, goto out_unlock; if (unlikely(!is_lru)) { - rc = move_to_new_folio(dst, src, mode); - goto out_unlock_both; + __migrate_folio_record(dst, page_was_mapped, anon_vma); + return MIGRATEPAGE_UNMAP; } /* @@ -1117,11 +1159,42 @@ static int __unmap_and_move(struct folio *src, struct folio *dst, VM_BUG_ON_FOLIO(folio_test_anon(src) && !folio_test_ksm(src) && !anon_vma, src); try_to_migrate(src, 0); - page_was_mapped = true; + page_was_mapped = 1; } - if (!folio_mapped(src)) - rc = move_to_new_folio(dst, src, mode); + if (!folio_mapped(src)) { + __migrate_folio_record(dst, page_was_mapped, anon_vma); + return MIGRATEPAGE_UNMAP; + } + + if (page_was_mapped) + remove_migration_ptes(src, src, false); + +out_unlock_both: + folio_unlock(dst); +out_unlock: + /* Drop an anon_vma reference if we took one */ + if (anon_vma) + put_anon_vma(anon_vma); + folio_unlock(src); +out: + + return rc; +} + +static int __migrate_folio_move(struct folio *src, struct folio *dst, + enum migrate_mode mode) +{ + int rc; + int page_was_mapped = 0; + struct anon_vma *anon_vma = NULL; + bool is_lru = !__PageMovable(&src->page); + + __migrate_folio_extract(dst, &page_was_mapped, &anon_vma); + + rc = move_to_new_folio(dst, src, mode); + if (unlikely(!is_lru)) + goto out_unlock_both; /* * When successful, push dst to LRU immediately: so that if it @@ -1144,12 +1217,10 @@ static int __unmap_and_move(struct folio *src, struct folio *dst, out_unlock_both: folio_unlock(dst); -out_unlock: /* Drop an anon_vma reference if we took one */ if (anon_vma) put_anon_vma(anon_vma); folio_unlock(src); -out: /* * If migration is successful, decrease refcount of dst, * which will not free the page because new page owner increased @@ -1161,19 +1232,15 @@ out: return rc; } -/* - * Obtain the lock on folio, remove all ptes and migrate the folio - * to the newly allocated folio in dst. - */ -static int unmap_and_move(new_page_t get_new_page, - free_page_t put_new_page, - unsigned long private, struct folio *src, - int force, enum migrate_mode mode, - enum migrate_reason reason, - struct list_head *ret) +/* Obtain the lock on page, remove all ptes. */ +static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page, + unsigned long private, struct folio *src, + struct folio **dstp, int force, + enum migrate_mode mode, enum migrate_reason reason, + struct list_head *ret) { struct folio *dst; - int rc = MIGRATEPAGE_SUCCESS; + int rc = MIGRATEPAGE_UNMAP; struct page *newpage = NULL; if (!thp_migration_supported() && folio_test_transhuge(src)) @@ -1184,20 +1251,49 @@ static int unmap_and_move(new_page_t get_new_page, folio_clear_active(src); folio_clear_unevictable(src); /* free_pages_prepare() will clear PG_isolated. */ - goto out; + list_del(&src->lru); + migrate_folio_done(src, reason); + return MIGRATEPAGE_SUCCESS; } newpage = get_new_page(&src->page, private); if (!newpage) return -ENOMEM; dst = page_folio(newpage); + *dstp = dst; dst->private = NULL; - rc = __unmap_and_move(src, dst, force, mode); + rc = __migrate_folio_unmap(src, dst, force, mode); + if (rc == MIGRATEPAGE_UNMAP) + return rc; + + /* + * A folio that has not been unmapped will be restored to + * right list unless we want to retry. + */ + if (rc != -EAGAIN) + list_move_tail(&src->lru, ret); + + if (put_new_page) + put_new_page(&dst->page, private); + else + folio_put(dst); + + return rc; +} + +/* Migrate the folio to the newly allocated folio in dst. */ +static int migrate_folio_move(free_page_t put_new_page, unsigned long private, + struct folio *src, struct folio *dst, + enum migrate_mode mode, enum migrate_reason reason, + struct list_head *ret) +{ + int rc; + + rc = __migrate_folio_move(src, dst, mode); if (rc == MIGRATEPAGE_SUCCESS) set_page_owner_migrate_reason(&dst->page, reason); -out: if (rc != -EAGAIN) { /* * A folio that has been migrated has all references @@ -1213,20 +1309,7 @@ out: * we want to retry. */ if (rc == MIGRATEPAGE_SUCCESS) { - /* - * Compaction can migrate also non-LRU folios which are - * not accounted to NR_ISOLATED_*. They can be recognized - * as __folio_test_movable - */ - if (likely(!__folio_test_movable(src))) - mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON + - folio_is_file_lru(src), -folio_nr_pages(src)); - - if (reason != MR_MEMORY_FAILURE) - /* - * We release the folio in page_handle_poison. - */ - folio_put(src); + migrate_folio_done(src, reason); } else { if (rc != -EAGAIN) list_add_tail(&src->lru, ret); @@ -1518,7 +1601,7 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, int pass = 0; bool is_large = false; bool is_thp = false; - struct folio *folio, *folio2; + struct folio *folio, *folio2, *dst = NULL; int rc, nr_pages; LIST_HEAD(split_folios); bool nosplit = (reason == MR_NUMA_MISPLACED); @@ -1545,9 +1628,13 @@ split_folio_migration: cond_resched(); - rc = unmap_and_move(get_new_page, put_new_page, - private, folio, pass > 2, mode, - reason, ret_folios); + rc = migrate_folio_unmap(get_new_page, put_new_page, private, + folio, &dst, pass > 2, mode, + reason, ret_folios); + if (rc == MIGRATEPAGE_UNMAP) + rc = migrate_folio_move(put_new_page, private, + folio, dst, mode, + reason, ret_folios); /* * The rules are: * Success: folio will be freed From 79a727a9b88c5b390a4c54e5868473e83031d350 Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Fri, 25 Oct 2024 10:17:24 -0400 Subject: [PATCH 022/174] vmscan,migrate: fix page count imbalance on node stats when demoting pages [ Upstream commit 35e41024c4c2b02ef8207f61b9004f6956cf037b ] When numa balancing is enabled with demotion, vmscan will call migrate_pages when shrinking LRUs. migrate_pages will decrement the the node's isolated page count, leading to an imbalanced count when invoked from (MG)LRU code. The result is dmesg output like such: $ cat /proc/sys/vm/stat_refresh [77383.088417] vmstat_refresh: nr_isolated_anon -103212 [77383.088417] vmstat_refresh: nr_isolated_file -899642 This negative value may impact compaction and reclaim throttling. The following path produces the decrement: shrink_folio_list demote_folio_list migrate_pages migrate_pages_batch migrate_folio_move migrate_folio_done mod_node_page_state(-ve) <- decrement This path happens for SUCCESSFUL migrations, not failures. Typically callers to migrate_pages are required to handle putback/accounting for failures, but this is already handled in the shrink code. When accounting for migrations, instead do not decrement the count when the migration reason is MR_DEMOTION. As of v6.11, this demotion logic is the only source of MR_DEMOTION. Link: https://lkml.kernel.org/r/20241025141724.17927-1-gourry@gourry.net Fixes: 26aa2d199d6f ("mm/migrate: demote pages during reclaim") Signed-off-by: Gregory Price Reviewed-by: Yang Shi Reviewed-by: Davidlohr Bueso Reviewed-by: Shakeel Butt Reviewed-by: "Huang, Ying" Reviewed-by: Oscar Salvador Cc: Dave Hansen Cc: Wei Xu Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 46a1476e188c..9ff5d77b61a3 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1044,7 +1044,7 @@ static void migrate_folio_done(struct folio *src, * not accounted to NR_ISOLATED_*. They can be recognized * as __PageMovable */ - if (likely(!__folio_test_movable(src))) + if (likely(!__folio_test_movable(src)) && reason != MR_DEMOTION) mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON + folio_is_file_lru(src), -folio_nr_pages(src)); From 9f5a834715d04f32cb8a8d25fe329d67e7d16e46 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 10 Apr 2024 02:26:54 +0100 Subject: [PATCH 023/174] io_uring: always lock __io_cqring_overflow_flush Commit 8d09a88ef9d3cb7d21d45c39b7b7c31298d23998 upstream. Conditional locking is never great, in case of __io_cqring_overflow_flush(), which is a slow path, it's not justified. Don't handle IOPOLL separately, always grab uring_lock for overflow flushing. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/162947df299aa12693ac4b305dacedab32ec7976.1712708261.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index f902b161f02c..92c1aa8f3501 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -593,6 +593,8 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) bool all_flushed; size_t cqe_size = sizeof(struct io_uring_cqe); + lockdep_assert_held(&ctx->uring_lock); + if (!force && __io_cqring_events(ctx) == ctx->cq_entries) return false; @@ -647,12 +649,9 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx) bool ret = true; if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) { - /* iopoll syncs against uring_lock, not completion_lock */ - if (ctx->flags & IORING_SETUP_IOPOLL) - mutex_lock(&ctx->uring_lock); + mutex_lock(&ctx->uring_lock); ret = __io_cqring_overflow_flush(ctx, false); - if (ctx->flags & IORING_SETUP_IOPOLL) - mutex_unlock(&ctx->uring_lock); + mutex_unlock(&ctx->uring_lock); } return ret; @@ -1405,6 +1404,8 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) int ret = 0; unsigned long check_cq; + lockdep_assert_held(&ctx->uring_lock); + if (!io_allowed_run_tw(ctx)) return -EEXIST; From 38c5fe74f3bef98f75d16effa49836d50c9b6097 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 26 Sep 2024 09:10:31 -0700 Subject: [PATCH 024/174] x86/bugs: Use code segment selector for VERW operand commit e4d2102018542e3ae5e297bc6e229303abff8a0f upstream. Robert Gill reported below #GP in 32-bit mode when dosemu software was executing vm86() system call: general protection fault: 0000 [#1] PREEMPT SMP CPU: 4 PID: 4610 Comm: dosemu.bin Not tainted 6.6.21-gentoo-x86 #1 Hardware name: Dell Inc. PowerEdge 1950/0H723K, BIOS 2.7.0 10/30/2010 EIP: restore_all_switch_stack+0xbe/0xcf EAX: 00000000 EBX: 00000000 ECX: 00000000 EDX: 00000000 ESI: 00000000 EDI: 00000000 EBP: 00000000 ESP: ff8affdc DS: 0000 ES: 0000 FS: 0000 GS: 0033 SS: 0068 EFLAGS: 00010046 CR0: 80050033 CR2: 00c2101c CR3: 04b6d000 CR4: 000406d0 Call Trace: show_regs+0x70/0x78 die_addr+0x29/0x70 exc_general_protection+0x13c/0x348 exc_bounds+0x98/0x98 handle_exception+0x14d/0x14d exc_bounds+0x98/0x98 restore_all_switch_stack+0xbe/0xcf exc_bounds+0x98/0x98 restore_all_switch_stack+0xbe/0xcf This only happens in 32-bit mode when VERW based mitigations like MDS/RFDS are enabled. This is because segment registers with an arbitrary user value can result in #GP when executing VERW. Intel SDM vol. 2C documents the following behavior for VERW instruction: #GP(0) - If a memory operand effective address is outside the CS, DS, ES, FS, or GS segment limit. CLEAR_CPU_BUFFERS macro executes VERW instruction before returning to user space. Use %cs selector to reference VERW operand. This ensures VERW will not #GP for an arbitrary user %ds. [ mingo: Fixed the SOB chain. ] Fixes: a0e2dab44d22 ("x86/entry_32: Add VERW just before userspace transition") Reported-by: Robert Gill Reviewed-by: Andrew Cooper Suggested-by: Brian Gerst Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 1e481d308e18..daf58a96e0a7 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -211,7 +211,16 @@ */ .macro CLEAR_CPU_BUFFERS ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF - verw _ASM_RIP(mds_verw_sel) +#ifdef CONFIG_X86_64 + verw mds_verw_sel(%rip) +#else + /* + * In 32bit mode, the memory operand must be a %cs reference. The data + * segments may not be usable (vm86 mode), and the stack segment may not + * be flat (ESPFIX32). + */ + verw %cs:mds_verw_sel +#endif .Lskip_verw_\@: .endm From 0acaf4a5025d6dafb7da787d2d4c47ed95e46ed6 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Mon, 17 Jun 2024 19:52:17 +0800 Subject: [PATCH 025/174] wifi: mac80211: fix NULL dereference at band check in starting tx ba session commit 021d53a3d87eeb9dbba524ac515651242a2a7e3b upstream. In MLD connection, link_data/link_conf are dynamically allocated. They don't point to vif->bss_conf. So, there will be no chanreq assigned to vif->bss_conf and then the chan will be NULL. Tweak the code to check ht_supported/vht_supported/has_he/has_eht on sta deflink. Crash log (with rtw89 version under MLO development): [ 9890.526087] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 9890.526102] #PF: supervisor read access in kernel mode [ 9890.526105] #PF: error_code(0x0000) - not-present page [ 9890.526109] PGD 0 P4D 0 [ 9890.526114] Oops: 0000 [#1] PREEMPT SMP PTI [ 9890.526119] CPU: 2 PID: 6367 Comm: kworker/u16:2 Kdump: loaded Tainted: G OE 6.9.0 #1 [ 9890.526123] Hardware name: LENOVO 2356AD1/2356AD1, BIOS G7ETB3WW (2.73 ) 11/28/2018 [ 9890.526126] Workqueue: phy2 rtw89_core_ba_work [rtw89_core] [ 9890.526203] RIP: 0010:ieee80211_start_tx_ba_session (net/mac80211/agg-tx.c:618 (discriminator 1)) mac80211 [ 9890.526279] Code: f7 e8 d5 93 3e ea 48 83 c4 28 89 d8 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc 49 8b 84 24 e0 f1 ff ff 48 8b 80 90 1b 00 00 <83> 38 03 0f 84 37 fe ff ff bb ea ff ff ff eb cc 49 8b 84 24 10 f3 All code ======== 0: f7 e8 imul %eax 2: d5 (bad) 3: 93 xchg %eax,%ebx 4: 3e ea ds (bad) 6: 48 83 c4 28 add $0x28,%rsp a: 89 d8 mov %ebx,%eax c: 5b pop %rbx d: 41 5c pop %r12 f: 41 5d pop %r13 11: 41 5e pop %r14 13: 41 5f pop %r15 15: 5d pop %rbp 16: c3 retq 17: cc int3 18: cc int3 19: cc int3 1a: cc int3 1b: 49 8b 84 24 e0 f1 ff mov -0xe20(%r12),%rax 22: ff 23: 48 8b 80 90 1b 00 00 mov 0x1b90(%rax),%rax 2a:* 83 38 03 cmpl $0x3,(%rax) <-- trapping instruction 2d: 0f 84 37 fe ff ff je 0xfffffffffffffe6a 33: bb ea ff ff ff mov $0xffffffea,%ebx 38: eb cc jmp 0x6 3a: 49 rex.WB 3b: 8b .byte 0x8b 3c: 84 24 10 test %ah,(%rax,%rdx,1) 3f: f3 repz Code starting with the faulting instruction =========================================== 0: 83 38 03 cmpl $0x3,(%rax) 3: 0f 84 37 fe ff ff je 0xfffffffffffffe40 9: bb ea ff ff ff mov $0xffffffea,%ebx e: eb cc jmp 0xffffffffffffffdc 10: 49 rex.WB 11: 8b .byte 0x8b 12: 84 24 10 test %ah,(%rax,%rdx,1) 15: f3 repz [ 9890.526285] RSP: 0018:ffffb8db09013d68 EFLAGS: 00010246 [ 9890.526291] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff9308e0d656c8 [ 9890.526295] RDX: 0000000000000000 RSI: ffffffffab99460b RDI: ffffffffab9a7685 [ 9890.526300] RBP: ffffb8db09013db8 R08: 0000000000000000 R09: 0000000000000873 [ 9890.526304] R10: ffff9308e0d64800 R11: 0000000000000002 R12: ffff9308e5ff6e70 [ 9890.526308] R13: ffff930952500e20 R14: ffff9309192a8c00 R15: 0000000000000000 [ 9890.526313] FS: 0000000000000000(0000) GS:ffff930b4e700000(0000) knlGS:0000000000000000 [ 9890.526316] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 9890.526318] CR2: 0000000000000000 CR3: 0000000391c58005 CR4: 00000000001706f0 [ 9890.526321] Call Trace: [ 9890.526324] [ 9890.526327] ? show_regs (arch/x86/kernel/dumpstack.c:479) [ 9890.526335] ? __die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434) [ 9890.526340] ? page_fault_oops (arch/x86/mm/fault.c:713) [ 9890.526347] ? search_module_extables (kernel/module/main.c:3256 (discriminator 3)) [ 9890.526353] ? ieee80211_start_tx_ba_session (net/mac80211/agg-tx.c:618 (discriminator 1)) mac80211 Signed-off-by: Zong-Zhe Yang Link: https://patch.msgid.link/20240617115217.22344-1-kevin_yang@realtek.com Signed-off-by: Johannes Berg Signed-off-by: Xiangyu Chen Signed-off-by: Greg Kroah-Hartman --- net/mac80211/agg-tx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index e26a72f3a104..1241ab7a86bb 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -593,7 +593,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; if (!pubsta->deflink.ht_cap.ht_supported && - sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) + !pubsta->deflink.vht_cap.vht_supported && + !pubsta->deflink.he_cap.has_he && + !pubsta->deflink.eht_cap.has_eht) return -EINVAL; if (WARN_ON_ONCE(!local->ops->ampdu_action)) From cd0cdb51b15203fa27d4b714be83b7dfffa0b752 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 18 Oct 2024 04:33:10 +0900 Subject: [PATCH 026/174] nilfs2: fix kernel bug due to missing clearing of checked flag commit 41e192ad2779cae0102879612dfe46726e4396aa upstream. Syzbot reported that in directory operations after nilfs2 detects filesystem corruption and degrades to read-only, __block_write_begin_int(), which is called to prepare block writes, may fail the BUG_ON check for accesses exceeding the folio/page size, triggering a kernel bug. This was found to be because the "checked" flag of a page/folio was not cleared when it was discarded by nilfs2's own routine, which causes the sanity check of directory entries to be skipped when the directory page/folio is reloaded. So, fix that. This was necessary when the use of nilfs2's own page discard routine was applied to more than just metadata files. Link: https://lkml.kernel.org/r/20241017193359.5051-1-konishi.ryusuke@gmail.com Fixes: 8c26c4e2694a ("nilfs2: fix issue with flush kernel thread after remount in RO mode because of driver's internal error or metadata corruption") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+d6ca2daf692c7a82f959@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d6ca2daf692c7a82f959 Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/page.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 19bc8eea2b35..6bc4cda804e1 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -404,6 +404,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent) ClearPageUptodate(page); ClearPageMappedToDisk(page); + ClearPageChecked(page); if (page_has_buffers(page)) { struct buffer_head *bh, *head; From cde8a7eb5c6762264ff0f4433358e0a0d250c875 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 23 Oct 2024 09:17:44 +0200 Subject: [PATCH 027/174] wifi: iwlwifi: mvm: fix 6 GHz scan construction commit 7245012f0f496162dd95d888ed2ceb5a35170f1a upstream. If more than 255 colocated APs exist for the set of all APs found during 2.4/5 GHz scanning, then the 6 GHz scan construction will loop forever since the loop variable has type u8, which can never reach the number found when that's bigger than 255, and is stored in a u32 variable. Also move it into the loops to have a smaller scope. Using a u32 there is fine, we limit the number of APs in the scan list and each has a limit on the number of RNR entries due to the frame size. With a limit of 1000 scan results, a frame size upper bound of 4096 (really it's more like ~2300) and a TBTT entry size of at least 11, we get an upper bound for the number of ~372k, well in the bounds of a u32. Cc: stable@vger.kernel.org Fixes: eae94cf82d74 ("iwlwifi: mvm: add support for 6GHz") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219375 Link: https://patch.msgid.link/20241023091744.f4baed5c08a1.I8b417148bbc8c5d11c101e1b8f5bf372e17bf2a7@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index 1785fded6290..2a4c59c71448 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -1739,7 +1739,8 @@ iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm *mvm, &cp->channel_config[ch_cnt]; u32 s_ssid_bitmap = 0, bssid_bitmap = 0, flags = 0; - u8 j, k, s_max = 0, b_max = 0, n_used_bssid_entries; + u8 k, s_max = 0, b_max = 0, n_used_bssid_entries; + u32 j; bool force_passive, found = false, allow_passive = true, unsolicited_probe_on_chan = false, psc_no_listen = false; From 82cae1e30bd940253593c2d4f16d88343d1358f4 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Mon, 9 Sep 2024 21:35:58 +0900 Subject: [PATCH 028/174] mm: shmem: fix data-race in shmem_getattr() commit d949d1d14fa281ace388b1de978e8f2cd52875cf upstream. I got the following KCSAN report during syzbot testing: ================================================================== BUG: KCSAN: data-race in generic_fillattr / inode_set_ctime_current write to 0xffff888102eb3260 of 4 bytes by task 6565 on cpu 1: inode_set_ctime_to_ts include/linux/fs.h:1638 [inline] inode_set_ctime_current+0x169/0x1d0 fs/inode.c:2626 shmem_mknod+0x117/0x180 mm/shmem.c:3443 shmem_create+0x34/0x40 mm/shmem.c:3497 lookup_open fs/namei.c:3578 [inline] open_last_lookups fs/namei.c:3647 [inline] path_openat+0xdbc/0x1f00 fs/namei.c:3883 do_filp_open+0xf7/0x200 fs/namei.c:3913 do_sys_openat2+0xab/0x120 fs/open.c:1416 do_sys_open fs/open.c:1431 [inline] __do_sys_openat fs/open.c:1447 [inline] __se_sys_openat fs/open.c:1442 [inline] __x64_sys_openat+0xf3/0x120 fs/open.c:1442 x64_sys_call+0x1025/0x2d60 arch/x86/include/generated/asm/syscalls_64.h:258 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x54/0x120 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x76/0x7e read to 0xffff888102eb3260 of 4 bytes by task 3498 on cpu 0: inode_get_ctime_nsec include/linux/fs.h:1623 [inline] inode_get_ctime include/linux/fs.h:1629 [inline] generic_fillattr+0x1dd/0x2f0 fs/stat.c:62 shmem_getattr+0x17b/0x200 mm/shmem.c:1157 vfs_getattr_nosec fs/stat.c:166 [inline] vfs_getattr+0x19b/0x1e0 fs/stat.c:207 vfs_statx_path fs/stat.c:251 [inline] vfs_statx+0x134/0x2f0 fs/stat.c:315 vfs_fstatat+0xec/0x110 fs/stat.c:341 __do_sys_newfstatat fs/stat.c:505 [inline] __se_sys_newfstatat+0x58/0x260 fs/stat.c:499 __x64_sys_newfstatat+0x55/0x70 fs/stat.c:499 x64_sys_call+0x141f/0x2d60 arch/x86/include/generated/asm/syscalls_64.h:263 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x54/0x120 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x76/0x7e value changed: 0x2755ae53 -> 0x27ee44d3 Reported by Kernel Concurrency Sanitizer on: CPU: 0 UID: 0 PID: 3498 Comm: udevd Not tainted 6.11.0-rc6-syzkaller-00326-gd1f2d51b711a-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 ================================================================== When calling generic_fillattr(), if you don't hold read lock, data-race will occur in inode member variables, which can cause unexpected behavior. Since there is no special protection when shmem_getattr() calls generic_fillattr(), data-race occurs by functions such as shmem_unlink() or shmem_mknod(). This can cause unexpected results, so commenting it out is not enough. Therefore, when calling generic_fillattr() from shmem_getattr(), it is appropriate to protect the inode using inode_lock_shared() and inode_unlock_shared() to prevent data-race. Link: https://lkml.kernel.org/r/20240909123558.70229-1-aha310510@gmail.com Fixes: 44a30220bc0a ("shmem: recalculate file inode when fstat") Signed-off-by: Jeongjun Park Reported-by: syzbot Cc: Hugh Dickins Cc: Yu Zhao Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/shmem.c b/mm/shmem.c index f7c08e169e42..0e1fbc53717d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1086,7 +1086,9 @@ static int shmem_getattr(struct user_namespace *mnt_userns, stat->attributes_mask |= (STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP); + inode_lock_shared(inode); generic_fillattr(&init_user_ns, inode, stat); + inode_unlock_shared(inode); if (shmem_is_huge(NULL, inode, 0, false)) stat->blksize = HPAGE_PMD_SIZE; From 1c2f04ce028ec934ecbaee10763ee999f2407555 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 2 Nov 2024 11:36:16 +0800 Subject: [PATCH 029/174] LoongArch: Fix build errors due to backported TIMENS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit eb3710efffce1dcff83761db4615f91d93aabfcb ("LoongArch: Add support to clone a time namespace") backports the TIMENS support for LoongArch (corresponding upstream commit aa5e65dc0818bbf676bf06927368ec46867778fd) but causes build errors: CC arch/loongarch/kernel/vdso.o arch/loongarch/kernel/vdso.c: In function ‘vvar_fault’: arch/loongarch/kernel/vdso.c:54:36: error: implicit declaration of function ‘find_timens_vvar_page’ [-Werror=implicit-function-declaration] 54 | struct page *timens_page = find_timens_vvar_page(vma); | ^~~~~~~~~~~~~~~~~~~~~ arch/loongarch/kernel/vdso.c:54:36: warning: initialization of ‘struct page *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] arch/loongarch/kernel/vdso.c: In function ‘vdso_join_timens’: arch/loongarch/kernel/vdso.c:143:25: error: implicit declaration of function ‘zap_vma_pages’; did you mean ‘zap_vma_ptes’? [-Werror=implicit-function-declaration] 143 | zap_vma_pages(vma); | ^~~~~~~~~~~~~ | zap_vma_ptes cc1: some warnings being treated as errors Because in 6.1.y we should define find_timens_vvar_page() by ourselves and use zap_page_range() instead of zap_vma_pages(), so fix it. Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kernel/vdso.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c index 59aa9dd466e8..64eb5386e7b2 100644 --- a/arch/loongarch/kernel/vdso.c +++ b/arch/loongarch/kernel/vdso.c @@ -40,6 +40,8 @@ static struct page *vdso_pages[] = { NULL }; struct vdso_data *vdso_data = generic_vdso_data.data; struct vdso_pcpu_data *vdso_pdata = loongarch_vdso_data.vdata.pdata; +static struct page *find_timens_vvar_page(struct vm_area_struct *vma); + static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { current->mm->context.vdso = (void *)(new_vma->vm_start); @@ -139,13 +141,37 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) mmap_read_lock(mm); for_each_vma(vmi, vma) { + unsigned long size = vma->vm_end - vma->vm_start; + if (vma_is_special_mapping(vma, &vdso_info.data_mapping)) - zap_vma_pages(vma); + zap_page_range(vma, vma->vm_start, size); } mmap_read_unlock(mm); return 0; } + +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops. + * For more details check_vma_flags() and __access_remote_vm() + */ + WARN(1, "vvar_page accessed remotely"); + + return NULL; +} +#else +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} #endif static unsigned long vdso_base(void) From a207af9bfe76bdc97c14ce58b31759d74440cf8e Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Fri, 21 Jun 2024 14:09:29 +0200 Subject: [PATCH 030/174] mtd: spi-nor: winbond: fix w25q128 regression commit d35df77707bf5ae1221b5ba1c8a88cf4fcdd4901 upstream. Commit 83e824a4a595 ("mtd: spi-nor: Correct flags for Winbond w25q128") removed the flags for non-SFDP devices. It was assumed that it wasn't in use anymore. This wasn't true. Add the no_sfdp_flags as well as the size again. We add the additional flags for dual and quad read because they have been reported to work properly by Hartmut using both older and newer versions of this flash, the similar flashes with 64Mbit and 256Mbit already have these flags and because it will (luckily) trigger our legacy SFDP parsing, so newer versions with SFDP support will still get the parameters from the SFDP tables. Reported-by: Hartmut Birr Closes: https://lore.kernel.org/r/CALxbwRo_-9CaJmt7r7ELgu+vOcgk=xZcGHobnKf=oT2=u4d4aA@mail.gmail.com/ Fixes: 83e824a4a595 ("mtd: spi-nor: Correct flags for Winbond w25q128") Reviewed-by: Linus Walleij Signed-off-by: Michael Walle Acked-by: Tudor Ambarus Reviewed-by: Esben Haabendal Reviewed-by: Pratyush Yadav Signed-off-by: Pratyush Yadav Link: https://lore.kernel.org/r/20240621120929.2670185-1-mwalle@kernel.org Link: https://lore.kernel.org/r/20240621120929.2670185-1-mwalle@kernel.org [Backported to v6.6 - vastly different due to upstream changes] Reviewed-by: Tudor Ambarus Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/winbond.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/spi-nor/winbond.c b/drivers/mtd/spi-nor/winbond.c index b7c775b615e8..58aba52022bf 100644 --- a/drivers/mtd/spi-nor/winbond.c +++ b/drivers/mtd/spi-nor/winbond.c @@ -120,9 +120,10 @@ static const struct flash_info winbond_nor_parts[] = { NO_SFDP_FLAGS(SECT_4K) }, { "w25q80bl", INFO(0xef4014, 0, 64 * 1024, 16) NO_SFDP_FLAGS(SECT_4K) }, - { "w25q128", INFO(0xef4018, 0, 0, 0) - PARSE_SFDP - FLAGS(SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) }, + { "w25q128", INFO(0xef4018, 0, 64 * 1024, 256) + FLAGS(SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) + NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ | + SPI_NOR_QUAD_READ) }, { "w25q256", INFO(0xef4019, 0, 64 * 1024, 512) NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) .fixups = &w25q256_fixups }, From 5e84eda48ffb2363437db44bbd0235594f8a58f9 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 27 May 2024 20:15:21 +0530 Subject: [PATCH 031/174] drm/amd/display: Add null checks for 'stream' and 'plane' before dereferencing commit 15c2990e0f0108b9c3752d7072a97d45d4283aea upstream. This commit adds null checks for the 'stream' and 'plane' variables in the dcn30_apply_idle_power_optimizations function. These variables were previously assumed to be null at line 922, but they were used later in the code without checking if they were null. This could potentially lead to a null pointer dereference, which would cause a crash. The null checks ensure that 'stream' and 'plane' are not null before they are used, preventing potential crashes. Fixes the below static smatch checker: drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn30/dcn30_hwseq.c:938 dcn30_apply_idle_power_optimizations() error: we previously assumed 'stream' could be null (see line 922) drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn30/dcn30_hwseq.c:940 dcn30_apply_idle_power_optimizations() error: we previously assumed 'plane' could be null (see line 922) Cc: Tom Chung Cc: Nicholas Kazlauskas Cc: Bhawanpreet Lakha Cc: Rodrigo Siqueira Cc: Roman Li Cc: Hersen Wu Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Signed-off-by: Srinivasan Shanmugam Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher [Xiangyu: Modified file path to backport this commit] Signed-off-by: Xiangyu Chen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index 407f7889e8fd..7a643690fdc7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -762,6 +762,9 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable) stream = dc->current_state->streams[0]; plane = (stream ? dc->current_state->stream_status[0].plane_states[0] : NULL); + if (!stream || !plane) + return false; + if (stream && plane) { cursor_cache_enable = stream->cursor_position.enable && plane->address.grph.cursor_cache_addr.quad_part; From 87de0a741ef6d93fcb99983138a0d89a546a043c Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 15 Mar 2024 21:25:25 -0600 Subject: [PATCH 032/174] drm/amd/display: Skip on writeback when it's not applicable commit ecedd99a9369fb5cde601ae9abd58bca2739f1ae upstream. [WHY] dynamic memory safety error detector (KASAN) catches and generates error messages "BUG: KASAN: slab-out-of-bounds" as writeback connector does not support certain features which are not initialized. [HOW] Skip them when connector type is DRM_MODE_CONNECTOR_WRITEBACK. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3199 Reviewed-by: Harry Wentland Reviewed-by: Rodrigo Siqueira Acked-by: Roman Li Signed-off-by: Alex Hung Signed-off-by: Alex Deucher Signed-off-by: Xiangyu Chen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8f7130f7d8c6..8dc0f70df24f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2990,6 +2990,10 @@ static int dm_resume(void *handle) /* Do mst topology probing after resuming cached state*/ drm_connector_list_iter_begin(ddev, &iter); drm_for_each_connector_iter(connector, &iter) { + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (aconnector->dc_link->type != dc_connection_mst_branch || aconnector->mst_port) @@ -5722,6 +5726,9 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector, &aconnector->base.probed_modes : &aconnector->base.modes; + if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + return NULL; + if (aconnector->freesync_vid_base.clock != 0) return &aconnector->freesync_vid_base; @@ -8242,6 +8249,9 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev, continue; notify: + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); mutex_lock(&adev->dm.audio_lock); From 23c4cb8a56978e5b1baa171d42e616e316c2039d Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Fri, 11 Oct 2024 02:46:19 +0900 Subject: [PATCH 033/174] vt: prevent kernel-infoleak in con_font_get() commit f956052e00de211b5c9ebaa1958366c23f82ee9e upstream. font.data may not initialize all memory spaces depending on the implementation of vc->vc_sw->con_font_get. This may cause info-leak, so to prevent this, it is safest to modify it to initialize the allocated memory space to 0, and it generally does not affect the overall performance of the system. Cc: stable@vger.kernel.org Reported-by: syzbot+955da2d57931604ee691@syzkaller.appspotmail.com Fixes: 05e2600cb0a4 ("VT: Bump font size limitation to 64x128 pixels") Signed-off-by: Jeongjun Park Link: https://lore.kernel.org/r/20241010174619.59662-1-aha310510@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index e2f9348725ff..e1b40a384868 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -4603,7 +4603,7 @@ static int con_font_get(struct vc_data *vc, struct console_font_op *op) int c; if (op->data) { - font.data = kmalloc(max_font_size, GFP_KERNEL); + font.data = kzalloc(max_font_size, GFP_KERNEL); if (!font.data) return -ENOMEM; } else From b3660228db5a7709e8f230482d7c9f93c19d02d1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 4 Mar 2023 14:03:27 -0800 Subject: [PATCH 034/174] mm: avoid gcc complaint about pointer casting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e77d587a2c04e82c6a0dffa4a32c874a4029385d upstream. The migration code ends up temporarily stashing information of the wrong type in unused fields of the newly allocated destination folio. That all works fine, but gcc does complain about the pointer type mis-use: mm/migrate.c: In function ‘__migrate_folio_extract’: mm/migrate.c:1050:20: note: randstruct: casting between randomized structure pointer types (ssa): ‘struct anon_vma’ and ‘struct address_space’ 1050 | *anon_vmap = (void *)dst->mapping; | ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~ and gcc is actually right to complain since it really doesn't understand that this is a very temporary special case where this is ok. This could be fixed in different ways by just obfuscating the assignment sufficiently that gcc doesn't see what is going on, but the truly "proper C" way to do this is by explicitly using a union. Using unions for type conversions like this is normally hugely ugly and syntactically nasty, but this really is one of the few cases where we want to make it clear that we're not doing type conversion, we're really re-using the value bit-for-bit just using another type. IOW, this should not become a common pattern, but in this one case using that odd union is probably the best way to document to the compiler what is conceptually going on here. [ Side note: there are valid cases where we convert pointers to other pointer types, notably the whole "folio vs page" situation, where the types actually have fundamental commonalities. The fact that the gcc note is limited to just randomized structures means that we don't see equivalent warnings for those cases, but it migth also mean that we miss other cases where we do play these kinds of dodgy games, and this kind of explicit conversion might be a good idea. ] I verified that at least for an allmodconfig build on x86-64, this generates the exact same code, apart from line numbers and assembler comment changes. Fixes: 64c8902ed441 ("migrate_pages: split unmap_and_move() to _unmap() and _move()") Cc: Huang, Ying Cc: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/migrate.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 9ff5d77b61a3..8393f4a935a9 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1017,11 +1017,16 @@ out: * destination folio. This is safe because nobody is using them * except us. */ +union migration_ptr { + struct anon_vma *anon_vma; + struct address_space *mapping; +}; static void __migrate_folio_record(struct folio *dst, unsigned long page_was_mapped, struct anon_vma *anon_vma) { - dst->mapping = (void *)anon_vma; + union migration_ptr ptr = { .anon_vma = anon_vma }; + dst->mapping = ptr.mapping; dst->private = (void *)page_was_mapped; } @@ -1029,7 +1034,8 @@ static void __migrate_folio_extract(struct folio *dst, int *page_was_mappedp, struct anon_vma **anon_vmap) { - *anon_vmap = (void *)dst->mapping; + union migration_ptr ptr = { .mapping = dst->mapping }; + *anon_vmap = ptr.anon_vma; *page_was_mappedp = (unsigned long)dst->private; dst->mapping = NULL; dst->private = NULL; From 7dcd6204160ee147fa902d30657c4fc7e16d5a4e Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 17 Apr 2023 07:59:29 +0800 Subject: [PATCH 035/174] migrate_pages_batch: fix statistics for longterm pin retry commit 851ae6424697d1c4f085cb878c88168923ebcad1 upstream. In commit fd4a7ac32918 ("mm: migrate: try again if THP split is failed due to page refcnt"), if the THP splitting fails due to page reference count, we will retry to improve migration successful rate. But the failed splitting is counted as migration failure and migration retry, which will cause duplicated failure counting. So, in this patch, this is fixed via undoing the failure counting if we decide to retry. The patch is tested via failure injection. Link: https://lkml.kernel.org/r/20230416235929.1040194-1-ying.huang@intel.com Fixes: fd4a7ac32918 ("mm: migrate: try again if THP split is failed due to page refcnt") Signed-off-by: "Huang, Ying" Reviewed-by: Baolin Wang Cc: Alistair Popple Cc: David Hildenbrand Cc: Yang Shi Cc: Zi Yan Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/migrate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/migrate.c b/mm/migrate.c index 8393f4a935a9..209078154a46 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1700,6 +1700,9 @@ split_folio_migration: large_retry++; thp_retry += is_thp; nr_retry_pages += nr_pages; + /* Undo duplicated failure counting. */ + nr_large_failed--; + stats->nr_thp_failed -= is_thp; break; } } From d7039b844a1c0817aef0c66c2f87cd6b29fbd29e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 8 Nov 2024 16:26:48 +0100 Subject: [PATCH 036/174] Linux 6.1.116 Link: https://lore.kernel.org/r/20241106120306.038154857@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Tested-by: Peter Schneider Tested-by: Linux Kernel Functional Testing Tested-by: Jon Hunter Tested-by: Sven Joachim Tested-by: Salvatore Bonaccorso Tested-by: Ron Economos Tested-by: Hardik Garg Tested-by: Yann Sionneau Link: https://lore.kernel.org/r/20241107064547.006019150@linuxfoundation.org Tested-by: Luna Jernberg Tested-by: Pavel Machek (CIP) Tested-by: Jon Hunter Tested-by: Peter Schneider Tested-by: Mark Brown Tested-by: Ronald Warsow Tested-by: Florian Fainelli Tested-by: Salvatore Bonaccorso Tested-by: Ron Economos Tested-by: Hardik Garg Tested-by: Linux Kernel Functional Testing Tested-by: kernelci.org bot Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1982ced2f43a..8f1f0f4a96b4 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 115 +SUBLEVEL = 116 EXTRAVERSION = NAME = Curry Ramen From d7b0f08fd642b26938872df9876b57959db1172a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 26 Sep 2024 15:48:40 +0200 Subject: [PATCH 037/174] arm64: dts: rockchip: Fix rt5651 compatible value on rk3399-eaidk-610 [ Upstream commit 2f39bba3b4f037d6c3c9174eed5befcef1c79abb ] There are no DT bindings and driver support for a "rockchip,rt5651" codec. Replace "rockchip,rt5651" by "realtek,rt5651", which matches the "simple-audio-card,name" property in the "rt5651-sound" node. Fixes: 904f983256fdd24b ("arm64: dts: rockchip: Add dts for a rk3399 based board EAIDK-610") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/a9877b8b1bd0de279d2ec8294d5be14587203a82.1727358193.git.geert+renesas@glider.be Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts b/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts index d1f343345f67..1f156f5a8666 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts @@ -541,7 +541,7 @@ status = "okay"; rt5651: audio-codec@1a { - compatible = "rockchip,rt5651"; + compatible = "realtek,rt5651"; reg = <0x1a>; clocks = <&cru SCLK_I2S_8CH_OUT>; clock-names = "mclk"; From 89b30d16db326067756246772cefb5360a3a2cd9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 26 Sep 2024 15:48:41 +0200 Subject: [PATCH 038/174] arm64: dts: rockchip: Fix rt5651 compatible value on rk3399-sapphire-excavator [ Upstream commit 577b5761679da90e691acc939ebbe7879fff5f31 ] There are no DT bindings and driver support for a "rockchip,rt5651" codec. Replace "rockchip,rt5651" by "realtek,rt5651", which matches the "simple-audio-card,name" property in the "rt5651-sound" node. Fixes: 0a3c78e251b3a266 ("arm64: dts: rockchip: Add support for rk3399 excavator main board") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/abc6c89811b3911785601d6d590483eacb145102.1727358193.git.geert+renesas@glider.be Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts index dbec2b7173a0..31ea3d0182c0 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts @@ -163,7 +163,7 @@ status = "okay"; rt5651: rt5651@1a { - compatible = "rockchip,rt5651"; + compatible = "realtek,rt5651"; reg = <0x1a>; clocks = <&cru SCLK_I2S_8CH_OUT>; clock-names = "mclk"; From 8db0adaefe909b6c50b9ca3dd8729af153b30991 Mon Sep 17 00:00:00 2001 From: Diederik de Haas Date: Tue, 8 Oct 2024 13:15:37 +0200 Subject: [PATCH 039/174] arm64: dts: rockchip: Remove hdmi's 2nd interrupt on rk3328 [ Upstream commit de50a7e3681771c6b990238af82bf1dea9b11b21 ] The "synopsys,dw-hdmi.yaml" binding specifies that the interrupts property of the hdmi node has 'maxItems: 1', so the hdmi node in rk3328.dtsi having 2 is incorrect. Paragraph 1.3 ("System Interrupt connection") of the RK3328 TRM v1.1 page 16 and 17 define the following hdmi related interrupts: - 67 hdmi_intr - 103 hdmi_intr_wakeup The difference of 32 is due to a different base used in the TRM. The RK3399 (which uses the same binding) has '23: hdmi_irq' and '24: hdmi_wakeup_irq' according to its TRM (page 19). The RK3568 (also same binding) has '76: hdmi_wakeup' and '77: hdmi' according to page 17 of its TRM. In both cases the non-wakeup IRQ was used, so use that too for rk3328. Helped-by: Heiko Stuebner Fixes: 725e351c265a ("arm64: dts: rockchip: add rk3328 display nodes") Signed-off-by: Diederik de Haas Link: https://lore.kernel.org/r/20241008113344.23957-3-didi.debian@cknow.org Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 5adb2fbc2aaf..75ea512e9724 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -724,8 +724,7 @@ compatible = "rockchip,rk3328-dw-hdmi"; reg = <0x0 0xff3c0000 0x0 0x20000>; reg-io-width = <4>; - interrupts = , - ; + interrupts = ; clocks = <&cru PCLK_HDMI>, <&cru SCLK_HDMI_SFC>, <&cru SCLK_RTC32K>; From d6477a9858900f889298f800748ba236852fbff4 Mon Sep 17 00:00:00 2001 From: Diederik de Haas Date: Tue, 8 Oct 2024 13:15:38 +0200 Subject: [PATCH 040/174] arm64: dts: rockchip: Fix wakeup prop names on PineNote BT node [ Upstream commit 87299d6ee95a37d2d576dd8077ea6860f77ad8e2 ] The "brcm,bluetooth.yaml" binding has 'device-wakeup-gpios' and 'host-wakeup-gpios' property names, not '*-wake-gpios'. Fix the incorrect property names. Note that the "realtek,bluetooth.yaml" binding does use the '*-wake-gpios' property names. Fixes: d449121e5e8a ("arm64: dts: rockchip: Add Pine64 PineNote board") Signed-off-by: Diederik de Haas Link: https://lore.kernel.org/r/20241008113344.23957-4-didi.debian@cknow.org Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi index 8d61f824c12d..52ab14308a5c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi @@ -685,8 +685,8 @@ compatible = "brcm,bcm43438-bt"; clocks = <&rk817 1>; clock-names = "lpo"; - device-wake-gpios = <&gpio0 RK_PC2 GPIO_ACTIVE_HIGH>; - host-wake-gpios = <&gpio0 RK_PC3 GPIO_ACTIVE_HIGH>; + device-wakeup-gpios = <&gpio0 RK_PC2 GPIO_ACTIVE_HIGH>; + host-wakeup-gpios = <&gpio0 RK_PC3 GPIO_ACTIVE_HIGH>; reset-gpios = <&gpio0 RK_PC4 GPIO_ACTIVE_LOW>; pinctrl-0 = <&bt_enable_h>, <&bt_host_wake_l>, <&bt_wake_h>; pinctrl-names = "default"; From 72b96b794d609d934a72e0b98009b387a3a08c21 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:30 +0200 Subject: [PATCH 041/174] arm64: dts: rockchip: Fix bluetooth properties on Rock960 boards [ Upstream commit ea74528aaea5a1dfc8e3de09ef2af37530eca526 ] The expected clock-name is different, and extclk also is deprecated in favor of txco for clocks that are not crystals. So fix it to match the binding. Fixes: c72235c288c8 ("arm64: dts: rockchip: Add on-board WiFi/BT support for Rock960 boards") Cc: Manivannan Sadhasivam Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-5-heiko@sntech.de Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi index 94e39ed63397..d38b25a0c77f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi @@ -575,7 +575,7 @@ bluetooth { compatible = "brcm,bcm43438-bt"; clocks = <&rk808 1>; - clock-names = "ext_clock"; + clock-names = "txco"; device-wakeup-gpios = <&gpio2 RK_PD3 GPIO_ACTIVE_HIGH>; host-wakeup-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_HIGH>; shutdown-gpios = <&gpio0 RK_PB1 GPIO_ACTIVE_HIGH>; From 3746e8b25ac3a0b1f27c75d4d9505482c18ee270 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:32 +0200 Subject: [PATCH 042/174] arm64: dts: rockchip: Remove #cooling-cells from fan on Theobroma lion [ Upstream commit 5ed96580568c4f79a0aff11a67f10b3e9229ba86 ] All Theobroma boards use a ti,amc6821 as fan controller. It normally runs in an automatically controlled way and while it may be possible to use it as part of a dt-based thermal management, this is not yet specified in the binding, nor implemented in any kernel. Newer boards already don't contain that #cooling-cells property, but older ones do. So remove them for now, they can be re-added if thermal integration gets implemented in the future. There are two further occurences in v6.12-rc in px30-ringneck and rk3399-puma, but those already get removed by the i2c-mux conversion scheduled for 6.13 . As the undocumented property is in the kernel so long, I opted for not causing extra merge conflicts between 6.12 and 6.13 Fixes: d99a02bcfa81 ("arm64: dts: rockchip: add RK3368-uQ7 (Lion) SoM") Cc: Quentin Schulz Cc: Klaus Goger Reviewed-by: Quentin Schulz Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-7-heiko@sntech.de Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi b/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi index 5753e57fd716..e8859cfd2d39 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi @@ -60,7 +60,6 @@ fan: fan@18 { compatible = "ti,amc6821"; reg = <0x18>; - #cooling-cells = <2>; }; rtc_twi: rtc@6f { From 7219ff9791ac8a2be514a5d33b5eef44fdda3747 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:33 +0200 Subject: [PATCH 043/174] arm64: dts: rockchip: Fix LED triggers on rk3308-roc-cc [ Upstream commit 3a53a7187f41ec3db12cf4c2cb0db4ba87c2f3a1 ] There are two LEDs on the board, power and user events. Currently both are assigned undocumented IR(-remote) triggers that are probably only part of the vendor-kernel. To make dtbs check happier, assign the power-led to a generic default-on trigger and the user led to the documented rc-feedback trigger that should mostly match its current usage. Fixes: 4403e1237be3 ("arm64: dts: rockchip: Add devicetree for board roc-rk3308-cc") Cc: Andy Yan Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-8-heiko@sntech.de Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts index 7ea48167747c..70aeca428b38 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts @@ -36,14 +36,14 @@ power_led: led-0 { label = "firefly:red:power"; - linux,default-trigger = "ir-power-click"; + linux,default-trigger = "default-on"; default-state = "on"; gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_HIGH>; }; user_led: led-1 { label = "firefly:blue:user"; - linux,default-trigger = "ir-user-click"; + linux,default-trigger = "rc-feedback"; default-state = "off"; gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_HIGH>; }; From ed5268f3e8eab5a6b16c92d39234f377a4b41a4a Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Fri, 21 Jul 2023 13:10:37 +0200 Subject: [PATCH 044/174] arm64: dts: imx8qm: Fix VPU core alias name [ Upstream commit f6038de293f28503eccbfcfa84d39faf56d09150 ] Alias names use dashes instead of underscores, fix this. Silences also dtbs_check warning: imx8qxp-tqma8xqp-mba8xx.dtb: aliases: 'vpu_core0', 'vpu_core1', 'vpu_core2' do not match any of the regexes: '^[a-z][a-z0-9\\-]*$', 'pinctrl-[0-9]+' from schema $id: http://devicetree.org/schemas/aliases.yaml# Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo Stable-dep-of: eed2d8e8d005 ("arm64: dts: imx8-ss-vpu: Fix imx8qm VPU IRQs") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8qxp.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8qxp.dtsi b/arch/arm64/boot/dts/freescale/imx8qxp.dtsi index f4ea18bb95ab..dce699dffb9b 100644 --- a/arch/arm64/boot/dts/freescale/imx8qxp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qxp.dtsi @@ -46,9 +46,9 @@ serial1 = &lpuart1; serial2 = &lpuart2; serial3 = &lpuart3; - vpu_core0 = &vpu_core0; - vpu_core1 = &vpu_core1; - vpu_core2 = &vpu_core2; + vpu-core0 = &vpu_core0; + vpu-core1 = &vpu_core1; + vpu-core2 = &vpu_core2; }; cpus { From 272abcefdbfdfd2e78e713959391e1c8939e124d Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 14 Dec 2023 14:20:00 +0100 Subject: [PATCH 045/174] arm64: dts: imx8qxp: Add VPU subsystem file [ Upstream commit 6bcd8b2fa2a9826fb6a849a9bfd7bdef145cabb6 ] imx8qxp re-uses imx8qm VPU subsystem file, but it has different base addresses. Also imx8qxp has only two VPU cores, delete vpu_vore2 and mu2_m0 accordingly. Signed-off-by: Alexander Stein Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Stable-dep-of: eed2d8e8d005 ("arm64: dts: imx8-ss-vpu: Fix imx8qm VPU IRQs") Signed-off-by: Sasha Levin --- .../boot/dts/freescale/imx8qxp-ss-vpu.dtsi | 17 +++++++++++++++++ arch/arm64/boot/dts/freescale/imx8qxp.dtsi | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi b/arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi new file mode 100644 index 000000000000..7894a3ab26d6 --- /dev/null +++ b/arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR X11) +/* + * Copyright 2023 TQ-Systems GmbH , + * D-82229 Seefeld, Germany. + * Author: Alexander Stein + */ + +&vpu_core0 { + reg = <0x2d040000 0x10000>; +}; + +&vpu_core1 { + reg = <0x2d050000 0x10000>; +}; + +/delete-node/ &mu2_m0; +/delete-node/ &vpu_core2; diff --git a/arch/arm64/boot/dts/freescale/imx8qxp.dtsi b/arch/arm64/boot/dts/freescale/imx8qxp.dtsi index dce699dffb9b..bec66bb24082 100644 --- a/arch/arm64/boot/dts/freescale/imx8qxp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qxp.dtsi @@ -48,7 +48,6 @@ serial3 = &lpuart3; vpu-core0 = &vpu_core0; vpu-core1 = &vpu_core1; - vpu-core2 = &vpu_core2; }; cpus { @@ -316,6 +315,7 @@ }; #include "imx8qxp-ss-img.dtsi" +#include "imx8qxp-ss-vpu.dtsi" #include "imx8qxp-ss-adma.dtsi" #include "imx8qxp-ss-conn.dtsi" #include "imx8qxp-ss-lsio.dtsi" From 65af08b542030c81aa39cc40d939d25b120b5f22 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 4 Sep 2024 13:41:03 +0200 Subject: [PATCH 046/174] arm64: dts: imx8-ss-vpu: Fix imx8qm VPU IRQs [ Upstream commit eed2d8e8d0051a6551e4dffba99e16eb88c676ac ] imx8-ss-vpu only contained imx8qxp IRQ numbers, only mu2_m0 uses the correct imx8qm IRQ number, as imx8qxp lacks this MU. Fix this by providing imx8qm IRQ numbers in the main imx8-ss-vpu.dtsi and override the IRQ numbers in SoC-specific imx8qxp-ss-vpu.dtsi, similar to reg property for VPU core devices. Fixes: 0d9968d98467d ("arm64: dts: freescale: imx8q: add imx vpu codec entries") Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8-ss-vpu.dtsi | 4 ++-- arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-vpu.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-vpu.dtsi index c6540768bdb9..87211c18d65a 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-vpu.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-vpu.dtsi @@ -15,7 +15,7 @@ vpu: vpu@2c000000 { mu_m0: mailbox@2d000000 { compatible = "fsl,imx6sx-mu"; reg = <0x2d000000 0x20000>; - interrupts = ; + interrupts = ; #mbox-cells = <2>; power-domains = <&pd IMX_SC_R_VPU_MU_0>; status = "disabled"; @@ -24,7 +24,7 @@ vpu: vpu@2c000000 { mu1_m0: mailbox@2d020000 { compatible = "fsl,imx6sx-mu"; reg = <0x2d020000 0x20000>; - interrupts = ; + interrupts = ; #mbox-cells = <2>; power-domains = <&pd IMX_SC_R_VPU_MU_1>; status = "disabled"; diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi b/arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi index 7894a3ab26d6..f81937b5fb72 100644 --- a/arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qxp-ss-vpu.dtsi @@ -5,6 +5,14 @@ * Author: Alexander Stein */ +&mu_m0 { + interrupts = ; +}; + +&mu1_m0 { + interrupts = ; +}; + &vpu_core0 { reg = <0x2d040000 0x10000>; }; From a45a7930f4976c5a6b05b98f064d58b0f06512bc Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sat, 12 Oct 2024 10:52:21 +0800 Subject: [PATCH 047/174] arm64: dts: imx8mp: correct sdhc ipg clk [ Upstream commit eab6ba2aa3bbaf598a66e31f709bf84b7bb7dc8a ] The ipg clk for sdhc sources from IPG_CLK_ROOT per i.MX 8M Plus Applications Processor Reference Manual, Table 5-2. System Clocks. Fixes: 6d9b8d20431f ("arm64: dts: freescale: Add i.MX8MP dtsi support") Signed-off-by: Peng Fan Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index 8c34b3e12a66..86af7115ac60 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -934,7 +934,7 @@ compatible = "fsl,imx8mp-usdhc", "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b40000 0x10000>; interrupts = ; - clocks = <&clk IMX8MP_CLK_DUMMY>, + clocks = <&clk IMX8MP_CLK_IPG_ROOT>, <&clk IMX8MP_CLK_NAND_USDHC_BUS>, <&clk IMX8MP_CLK_USDHC1_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -948,7 +948,7 @@ compatible = "fsl,imx8mp-usdhc", "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b50000 0x10000>; interrupts = ; - clocks = <&clk IMX8MP_CLK_DUMMY>, + clocks = <&clk IMX8MP_CLK_IPG_ROOT>, <&clk IMX8MP_CLK_NAND_USDHC_BUS>, <&clk IMX8MP_CLK_USDHC2_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -962,7 +962,7 @@ compatible = "fsl,imx8mp-usdhc", "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b60000 0x10000>; interrupts = ; - clocks = <&clk IMX8MP_CLK_DUMMY>, + clocks = <&clk IMX8MP_CLK_IPG_ROOT>, <&clk IMX8MP_CLK_NAND_USDHC_BUS>, <&clk IMX8MP_CLK_USDHC3_ROOT>; clock-names = "ipg", "ahb", "per"; From f7539956d70d3870c85673beb20bc769008ef8a0 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:37 +0200 Subject: [PATCH 048/174] ARM: dts: rockchip: fix rk3036 acodec node [ Upstream commit c7206853cd7d31c52575fb1dc7616b4398f3bc8f ] The acodec node is not conformant to the binding. Set the correct nodename, use the correct compatible, add the needed #sound-dai-cells and sort the rockchip,grf below clocks properties as expected. Fixes: faea098e1808 ("ARM: dts: rockchip: add core rk3036 dtsi") Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-12-heiko@sntech.de Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm/boot/dts/rk3036.dtsi | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi index c420c7c642cb..e6bb1d7a2b4e 100644 --- a/arch/arm/boot/dts/rk3036.dtsi +++ b/arch/arm/boot/dts/rk3036.dtsi @@ -382,12 +382,13 @@ }; }; - acodec: acodec-ana@20030000 { - compatible = "rk3036-codec"; + acodec: audio-codec@20030000 { + compatible = "rockchip,rk3036-codec"; reg = <0x20030000 0x4000>; - rockchip,grf = <&grf>; clock-names = "acodec_pclk"; clocks = <&cru PCLK_ACODEC>; + rockchip,grf = <&grf>; + #sound-dai-cells = <0>; status = "disabled"; }; From 44c3b97a1c50fe6e969dd1e0fca396693177275a Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:38 +0200 Subject: [PATCH 049/174] ARM: dts: rockchip: drop grf reference from rk3036 hdmi [ Upstream commit 1580ccb6ed9dc76b8ff3e2d8912e8215c8b0fa6d ] Neither the binding nor the driver implementation specify/use the grf reference provided in the rk3036. And neither does the newer rk3128 user of the hdmi controller. So drop the rockchip,grf property. Fixes: b7217cf19c63 ("ARM: dts: rockchip: add hdmi device node for rk3036") Cc: Caesar Wang Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-13-heiko@sntech.de Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm/boot/dts/rk3036.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi index e6bb1d7a2b4e..4e208528eebf 100644 --- a/arch/arm/boot/dts/rk3036.dtsi +++ b/arch/arm/boot/dts/rk3036.dtsi @@ -398,7 +398,6 @@ interrupts = ; clocks = <&cru PCLK_HDMI>; clock-names = "pclk"; - rockchip,grf = <&grf>; pinctrl-names = "default"; pinctrl-0 = <&hdmi_ctl>; status = "disabled"; From 89e601bd7316ad929c70f9b7e25cb4cd43d2a22c Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:39 +0200 Subject: [PATCH 050/174] ARM: dts: rockchip: Fix the spi controller on rk3036 [ Upstream commit 8bade1ad1f0821aef31f6a8fb1027ae292566d85 ] Compatible and clock names did not match the existing binding. So set the correct values and re-order+rename the clocks. It looks like no rk3036 board did use the spi controller so far, so this was never detected on a running device yet. Fixes: f629fcfab2cd ("ARM: dts: rockchip: support the spi for rk3036") Cc: Caesar Wang Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-14-heiko@sntech.de Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm/boot/dts/rk3036.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi index 4e208528eebf..5bdbadd879fe 100644 --- a/arch/arm/boot/dts/rk3036.dtsi +++ b/arch/arm/boot/dts/rk3036.dtsi @@ -550,11 +550,11 @@ }; spi: spi@20074000 { - compatible = "rockchip,rockchip-spi"; + compatible = "rockchip,rk3036-spi"; reg = <0x20074000 0x1000>; interrupts = ; - clocks = <&cru PCLK_SPI>, <&cru SCLK_SPI>; - clock-names = "apb-pclk","spi_pclk"; + clocks = <&cru SCLK_SPI>, <&cru PCLK_SPI>; + clock-names = "spiclk", "apb_pclk"; dmas = <&pdma 8>, <&pdma 9>; dma-names = "tx", "rx"; pinctrl-names = "default"; From 5d739ad16ca3e2c183cb2b829da084692845176e Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 8 Oct 2024 22:39:40 +0200 Subject: [PATCH 051/174] ARM: dts: rockchip: Fix the realtek audio codec on rk3036-kylin [ Upstream commit 77a9a7f2d3b94d29d13d71b851114d593a2147cf ] Both the node name as well as the compatible were not named according to the binding expectations, fix that. Fixes: 47bf3a5c9e2a ("ARM: dts: rockchip: add the sound setup for rk3036-kylin board") Cc: Caesar Wang Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241008203940.2573684-15-heiko@sntech.de Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm/boot/dts/rk3036-kylin.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/rk3036-kylin.dts b/arch/arm/boot/dts/rk3036-kylin.dts index 67e1e04139e7..43926d0962bb 100644 --- a/arch/arm/boot/dts/rk3036-kylin.dts +++ b/arch/arm/boot/dts/rk3036-kylin.dts @@ -304,8 +304,8 @@ &i2c2 { status = "okay"; - rt5616: rt5616@1b { - compatible = "rt5616"; + rt5616: audio-codec@1b { + compatible = "realtek,rt5616"; reg = <0x1b>; clocks = <&cru SCLK_I2S_OUT>; clock-names = "mclk"; From 1884ab3d22536a5c14b17c78c2ce76d1734e8b0b Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 29 Oct 2024 15:44:35 +0100 Subject: [PATCH 052/174] HID: core: zero-initialize the report buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 177f25d1292c7e16e1199b39c85480f7f8815552 ] Since the report buffer is used by all kinds of drivers in various ways, let's zero-initialize it during allocation to make sure that it can't be ever used to leak kernel memory via specially-crafted report. Fixes: 27ce405039bf ("HID: fix data access in implement()") Reported-by: Benoît Sevens Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index e2e52aa0eeba..b5887658c6af 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1878,7 +1878,7 @@ u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags) u32 len = hid_report_len(report) + 7; - return kmalloc(len, flags); + return kzalloc(len, flags); } EXPORT_SYMBOL_GPL(hid_alloc_report_buf); From a50863dd1f92d43c975ab2ecc3476617fe98a66e Mon Sep 17 00:00:00 2001 From: Corey Hickey Date: Mon, 28 Oct 2024 11:02:41 -0700 Subject: [PATCH 053/174] platform/x86/amd/pmc: Detect when STB is not available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit bceec87a73804bb4c33b9a6c96e2d27cd893a801 ] Loading the amd_pmc module as: amd_pmc enable_stb=1 ...can result in the following messages in the kernel ring buffer: amd_pmc AMDI0009:00: SMU cmd failed. err: 0xff ioremap on RAM at 0x0000000000000000 - 0x0000000000ffffff WARNING: CPU: 10 PID: 2151 at arch/x86/mm/ioremap.c:217 __ioremap_caller+0x2cd/0x340 Further debugging reveals that this occurs when the requests for S2D_PHYS_ADDR_LOW and S2D_PHYS_ADDR_HIGH return a value of 0, indicating that the STB is inaccessible. To prevent the ioremap warning and provide clarity to the user, handle the invalid address and display an error message. Link: https://lore.kernel.org/platform-driver-x86/c588ff5d-3e04-4549-9a86-284b9b4419ba@amd.com Fixes: 3d7d407dfb05 ("platform/x86: amd-pmc: Add support for AMD Spill to DRAM STB feature") Acked-by: Shyam Sundar S K Signed-off-by: Corey Hickey Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20241028180241.1341624-1-bugfood-ml@fatooh.org Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/amd/pmc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c index eb9fc6cb13e3..f237c1ea8d35 100644 --- a/drivers/platform/x86/amd/pmc.c +++ b/drivers/platform/x86/amd/pmc.c @@ -878,6 +878,11 @@ static int amd_pmc_s2d_init(struct amd_pmc_dev *dev) amd_pmc_send_cmd(dev, S2D_PHYS_ADDR_LOW, &phys_addr_low, STB_SPILL_TO_DRAM, 1); amd_pmc_send_cmd(dev, S2D_PHYS_ADDR_HIGH, &phys_addr_hi, STB_SPILL_TO_DRAM, 1); + if (!phys_addr_hi && !phys_addr_low) { + dev_err(dev->dev, "STB is not enabled on the system; disable enable_stb or contact system vendor\n"); + return -EINVAL; + } + stb_phys_addr = ((u64)phys_addr_hi << 32 | phys_addr_low); /* Clear msg_port for other SMU operation */ From f22232160ee843b6074c51ef6b4693c6197b9eb2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 9 Oct 2024 16:28:06 +1100 Subject: [PATCH 054/174] sunrpc: handle -ENOTCONN in xs_tcp_setup_socket() [ Upstream commit 10f0740234f0b157b41bdc7e9c3555a9b86c1599 ] xs_tcp_finish_connecting() can return -ENOTCONN but the switch statement in xs_tcp_setup_socket() treats that as an unhandled error. If we treat it as a known error it would propagate back to call_connect_status() which does handle that error code. This appears to be the intention of the commit (given below) which added -ENOTCONN as a return status for xs_tcp_finish_connecting(). So add -ENOTCONN to the switch statement as an error to pass through to the caller. Link: https://bugzilla.suse.com/show_bug.cgi?id=1231050 Link: https://access.redhat.com/discussions/3434091 Fixes: 01d37c428ae0 ("SUNRPC: xprt_connect() don't abort the task if the transport isn't bound") Signed-off-by: NeilBrown Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- net/sunrpc/xprtsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 02f651f85e73..190dae11f634 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2351,6 +2351,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -EHOSTUNREACH: case -EADDRINUSE: case -ENOBUFS: + case -ENOTCONN: break; default: printk("%s: connect returned unhandled error %d\n", From 9a65be8111c3f9cd009989660100a6dd4cd2c920 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 4 Oct 2024 11:07:23 +1000 Subject: [PATCH 055/174] NFSv3: only use NFS timeout for MOUNT when protocols are compatible [ Upstream commit 6e2a10343ecb71c4457bc16be05758f9c7aae7d9 ] If a timeout is specified in the mount options, it currently applies to both the NFS protocol and (with v3) the MOUNT protocol. This is sensible when they both use the same underlying protocol, or those protocols are compatible w.r.t timeouts as RDMA and TCP are. However if, for example, NFS is using TCP and MOUNT is using UDP then using the same timeout doesn't make much sense. If you mount -o vers=3,proto=tcp,mountproto=udp,timeo=600,retrans=5 \ server:/path /mountpoint then the timeo=600 which was intended for the NFS/TCP request will apply to the MOUNT/UDP requests with the result that there will only be one request sent (because UDP has a maximum timeout of 60 seconds). This is not what a reasonable person might expect. This patch disables the sharing of timeout information in cases where the underlying protocols are not compatible. Fixes: c9301cb35b59 ("nfs: hornor timeo and retrans option when mounting NFSv3") Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/super.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f7b4df29ac5f..3dffeb1d17b9 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -866,7 +866,15 @@ static int nfs_request_mount(struct fs_context *fc, * Now ask the mount server to map our export path * to a file handle. */ - status = nfs_mount(&request, ctx->timeo, ctx->retrans); + if ((request.protocol == XPRT_TRANSPORT_UDP) == + !(ctx->flags & NFS_MOUNT_TCP)) + /* + * NFS protocol and mount protocol are both UDP or neither UDP + * so timeouts are compatible. Use NFS timeouts for MOUNT + */ + status = nfs_mount(&request, ctx->timeo, ctx->retrans); + else + status = nfs_mount(&request, NFS_UNSPEC_TIMEO, NFS_UNSPEC_RETRANS); if (status != 0) { dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", request.hostname, status); From ba5634feb29f059eb71b12a3113a07338137d5cf Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 22 Mar 2023 09:27:04 +1100 Subject: [PATCH 056/174] NFSv3: handle out-of-order write replies. [ Upstream commit 3db63daabe210af32a09533fe7d8d47c711a103c ] NFSv3 includes pre/post wcc attributes which allow the client to determine if all changes to the file have been made by the client itself, or if any might have been made by some other client. If there are gaps in the pre/post ctime sequence it must be assumed that some other client changed the file in that gap and the local cache must be suspect. The next time the file is opened the cache should be invalidated. Since Commit 1c341b777501 ("NFS: Add deferred cache invalidation for close-to-open consistency violations") in linux 5.3 the Linux client has been triggering this invalidation. The chunk in nfs_update_inode() in particularly triggers. Unfortunately Linux NFS assumes that all replies will be processed in the order sent, and will arrive in the order processed. This is not true in general. Consequently Linux NFS might ignore the wcc info in a WRITE reply because the reply is in response to a WRITE that was sent before some other request for which a reply has already been seen. This is detected by Linux using the gencount tests in nfs_inode_attr_cmp(). Also, when the gencount tests pass it is still possible that the request were processed on the server in a different order, and a gap seen in the ctime sequence might be filled in by a subsequent reply, so gaps should not immediately trigger delayed invalidation. The net result is that writing to a server and then reading the file back can result in going to the server for the read rather than serving it from cache - all because a couple of replies arrived out-of-order. This is a performance regression over kernels before 5.3, though the change in 5.3 is a correctness improvement. This has been seen with Linux writing to a Netapp server which occasionally re-orders requests. In testing the majority of requests were in-order, but a few (maybe 2 or three at a time) could be re-ordered. This patch addresses the problem by recording any gaps seen in the pre/post ctime sequence and not triggering invalidation until either there are too many gaps to fit in the table, or until there are no more active writes and the remaining gaps cannot be resolved. We allocate a table of 16 gaps on demand. If the allocation fails we revert to current behaviour which is of little cost as we are unlikely to be able to cache the writes anyway. In the table we store "start->end" pair when iversion is updated and "end<-start" pairs pre/post pairs reported by the server. Usually these exactly cancel out and so nothing is stored. When there are out-of-order replies we do store gaps and these will eventually be cancelled against later replies when this client is the only writer. If the final write is out-of-order there may be one gap remaining when the file is closed. This will be noticed and if there is precisely on gap and if the iversion can be advanced to match it, then we do so. This patch makes no attempt to handle directories correctly. The same problem potentially exists in the out-of-order replies to create/unlink requests can cause future lookup requires to be sent to the server unnecessarily. A similar scheme using the same primitives could be used to notice and handle out-of-order replies. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker Stable-dep-of: 867da60d463b ("nfs: avoid i_lock contention in nfs_clear_invalid_mapping") Signed-off-by: Sasha Levin --- fs/nfs/inode.c | 112 +++++++++++++++++++++++++++++++++++------ include/linux/nfs_fs.h | 47 +++++++++++++++++ 2 files changed, 144 insertions(+), 15 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index cf8c3771e4bf..82e846611d5e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -208,11 +208,12 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) nfsi->cache_validity |= flags; - if (inode->i_mapping->nrpages == 0) - nfsi->cache_validity &= ~(NFS_INO_INVALID_DATA | - NFS_INO_DATA_INVAL_DEFER); - else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) - nfsi->cache_validity &= ~NFS_INO_DATA_INVAL_DEFER; + if (inode->i_mapping->nrpages == 0) { + nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; + nfs_ooo_clear(nfsi); + } else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { + nfs_ooo_clear(nfsi); + } trace_nfs_set_cache_invalid(inode, 0); } EXPORT_SYMBOL_GPL(nfs_set_cache_invalid); @@ -677,9 +678,10 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) trace_nfs_size_truncate(inode, offset); i_size_write(inode, offset); /* Optimisation */ - if (offset == 0) - NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_DATA | - NFS_INO_DATA_INVAL_DEFER); + if (offset == 0) { + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; + nfs_ooo_clear(NFS_I(inode)); + } NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; spin_unlock(&inode->i_lock); @@ -1099,7 +1101,7 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx) spin_lock(&inode->i_lock); if (list_empty(&nfsi->open_files) && - (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) + nfs_ooo_test(nfsi)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA | NFS_INO_REVAL_FORCED); list_add_tail_rcu(&ctx->list, &nfsi->open_files); @@ -1342,8 +1344,8 @@ int nfs_clear_invalid_mapping(struct address_space *mapping) set_bit(NFS_INO_INVALIDATING, bitlock); smp_wmb(); - nfsi->cache_validity &= - ~(NFS_INO_INVALID_DATA | NFS_INO_DATA_INVAL_DEFER); + nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; + nfs_ooo_clear(nfsi); spin_unlock(&inode->i_lock); trace_nfs_invalidate_mapping_enter(inode); ret = nfs_invalidate_mapping(inode, mapping); @@ -1805,6 +1807,66 @@ static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr, return 0; } +static void nfs_ooo_merge(struct nfs_inode *nfsi, + u64 start, u64 end) +{ + int i, cnt; + + if (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER) + /* No point merging anything */ + return; + + if (!nfsi->ooo) { + nfsi->ooo = kmalloc(sizeof(*nfsi->ooo), GFP_ATOMIC); + if (!nfsi->ooo) { + nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; + return; + } + nfsi->ooo->cnt = 0; + } + + /* add this range, merging if possible */ + cnt = nfsi->ooo->cnt; + for (i = 0; i < cnt; i++) { + if (end == nfsi->ooo->gap[i].start) + end = nfsi->ooo->gap[i].end; + else if (start == nfsi->ooo->gap[i].end) + start = nfsi->ooo->gap[i].start; + else + continue; + /* Remove 'i' from table and loop to insert the new range */ + cnt -= 1; + nfsi->ooo->gap[i] = nfsi->ooo->gap[cnt]; + i = -1; + } + if (start != end) { + if (cnt >= ARRAY_SIZE(nfsi->ooo->gap)) { + nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; + kfree(nfsi->ooo); + nfsi->ooo = NULL; + return; + } + nfsi->ooo->gap[cnt].start = start; + nfsi->ooo->gap[cnt].end = end; + cnt += 1; + } + nfsi->ooo->cnt = cnt; +} + +static void nfs_ooo_record(struct nfs_inode *nfsi, + struct nfs_fattr *fattr) +{ + /* This reply was out-of-order, so record in the + * pre/post change id, possibly cancelling + * gaps created when iversion was jumpped forward. + */ + if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) && + (fattr->valid & NFS_ATTR_FATTR_PRECHANGE)) + nfs_ooo_merge(nfsi, + fattr->change_attr, + fattr->pre_change_attr); +} + static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) { @@ -1815,8 +1877,12 @@ static int nfs_refresh_inode_locked(struct inode *inode, if (attr_cmp > 0 || nfs_inode_finish_partial_attr_update(fattr, inode)) ret = nfs_update_inode(inode, fattr); - else if (attr_cmp == 0) - ret = nfs_check_inode_attributes(inode, fattr); + else { + nfs_ooo_record(NFS_I(inode), fattr); + + if (attr_cmp == 0) + ret = nfs_check_inode_attributes(inode, fattr); + } trace_nfs_refresh_inode_exit(inode, ret); return ret; @@ -1907,6 +1973,8 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa if (attr_cmp < 0) return 0; if ((fattr->valid & NFS_ATTR_FATTR) == 0 || !attr_cmp) { + /* Record the pre/post change info before clearing PRECHANGE */ + nfs_ooo_record(NFS_I(inode), fattr); fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE | NFS_ATTR_FATTR_PRESIZE | NFS_ATTR_FATTR_PREMTIME @@ -2061,6 +2129,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* More cache consistency checks */ if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { + if (!have_writers && nfsi->ooo && nfsi->ooo->cnt == 1 && + nfsi->ooo->gap[0].end == inode_peek_iversion_raw(inode)) { + /* There is one remaining gap that hasn't been + * merged into iversion - do that now. + */ + inode_set_iversion_raw(inode, nfsi->ooo->gap[0].start); + kfree(nfsi->ooo); + nfsi->ooo = NULL; + } if (!inode_eq_iversion_raw(inode, fattr->change_attr)) { /* Could it be a race with writeback? */ if (!(have_writers || have_delegation)) { @@ -2082,8 +2159,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); - } else if (!have_delegation) - nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; + } else if (!have_delegation) { + nfs_ooo_record(nfsi, fattr); + nfs_ooo_merge(nfsi, inode_peek_iversion_raw(inode), + fattr->change_attr); + } inode_set_iversion_raw(inode, fattr->change_attr); } } else { @@ -2237,6 +2317,7 @@ struct inode *nfs_alloc_inode(struct super_block *sb) return NULL; nfsi->flags = 0UL; nfsi->cache_validity = 0UL; + nfsi->ooo = NULL; #if IS_ENABLED(CONFIG_NFS_V4) nfsi->nfs4_acl = NULL; #endif /* CONFIG_NFS_V4 */ @@ -2249,6 +2330,7 @@ EXPORT_SYMBOL_GPL(nfs_alloc_inode); void nfs_free_inode(struct inode *inode) { + kfree(NFS_I(inode)->ooo); kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); } EXPORT_SYMBOL_GPL(nfs_free_inode); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index ac7d799d9d38..dc6c3cc2c2b3 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -190,6 +190,39 @@ struct nfs_inode { /* Open contexts for shared mmap writes */ struct list_head open_files; + /* Keep track of out-of-order replies. + * The ooo array contains start/end pairs of + * numbers from the changeid sequence when + * the inode's iversion has been updated. + * It also contains end/start pair (i.e. reverse order) + * of sections of the changeid sequence that have + * been seen in replies from the server. + * Normally these should match and when both + * A:B and B:A are found in ooo, they are both removed. + * And if a reply with A:B causes an iversion update + * of A:B, then neither are added. + * When a reply has pre_change that doesn't match + * iversion, then the changeid pair and any consequent + * change in iversion ARE added. Later replies + * might fill in the gaps, or possibly a gap is caused + * by a change from another client. + * When a file or directory is opened, if the ooo table + * is not empty, then we assume the gaps were due to + * another client and we invalidate the cached data. + * + * We can only track a limited number of concurrent gaps. + * Currently that limit is 16. + * We allocate the table on demand. If there is insufficient + * memory, then we probably cannot cache the file anyway + * so there is no loss. + */ + struct { + int cnt; + struct { + u64 start, end; + } gap[16]; + } *ooo; + #if IS_ENABLED(CONFIG_NFS_V4) struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ @@ -616,6 +649,20 @@ nfs_fileid_to_ino_t(u64 fileid) return ino; } +static inline void nfs_ooo_clear(struct nfs_inode *nfsi) +{ + nfsi->cache_validity &= ~NFS_INO_DATA_INVAL_DEFER; + kfree(nfsi->ooo); + nfsi->ooo = NULL; +} + +static inline bool nfs_ooo_test(struct nfs_inode *nfsi) +{ + return (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER) || + (nfsi->ooo && nfsi->ooo->cnt > 0); + +} + #define NFS_JUKEBOX_RETRY_TIME (5 * HZ) /* We need to block new opens while a file is being unlinked. From 99659d23459cb7a887ce0901c37de23117e76a74 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 18 Oct 2024 17:15:41 -0400 Subject: [PATCH 057/174] nfs: avoid i_lock contention in nfs_clear_invalid_mapping [ Upstream commit 867da60d463bb2a3e28c9235c487e56e96cffa00 ] Multi-threaded buffered reads to the same file exposed significant inode spinlock contention in nfs_clear_invalid_mapping(). Eliminate this spinlock contention by checking flags without locking, instead using smp_rmb and smp_load_acquire accordingly, but then take spinlock and double-check these inode flags. Also refactor nfs_set_cache_invalid() slightly to use smp_store_release() to pair with nfs_clear_invalid_mapping()'s smp_load_acquire(). While this fix is beneficial for all multi-threaded buffered reads issued by an NFS client, this issue was identified in the context of surprisingly low LOCALIO performance with 4K multi-threaded buffered read IO. This fix dramatically speeds up LOCALIO performance: before: read: IOPS=1583k, BW=6182MiB/s (6482MB/s)(121GiB/20002msec) after: read: IOPS=3046k, BW=11.6GiB/s (12.5GB/s)(232GiB/20001msec) Fixes: 17dfeb911339 ("NFS: Fix races in nfs_revalidate_mapping") Signed-off-by: Mike Snitzer Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/inode.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 82e846611d5e..719448d81aed 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -206,12 +206,15 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) nfs_fscache_invalidate(inode, 0); flags &= ~NFS_INO_REVAL_FORCED; - nfsi->cache_validity |= flags; + flags |= nfsi->cache_validity; + if (inode->i_mapping->nrpages == 0) + flags &= ~NFS_INO_INVALID_DATA; - if (inode->i_mapping->nrpages == 0) { - nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; - nfs_ooo_clear(nfsi); - } else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { + /* pairs with nfs_clear_invalid_mapping()'s smp_load_acquire() */ + smp_store_release(&nfsi->cache_validity, flags); + + if (inode->i_mapping->nrpages == 0 || + nfsi->cache_validity & NFS_INO_INVALID_DATA) { nfs_ooo_clear(nfsi); } trace_nfs_set_cache_invalid(inode, 0); @@ -1331,6 +1334,13 @@ int nfs_clear_invalid_mapping(struct address_space *mapping) TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); if (ret) goto out; + smp_rmb(); /* pairs with smp_wmb() below */ + if (test_bit(NFS_INO_INVALIDATING, bitlock)) + continue; + /* pairs with nfs_set_cache_invalid()'s smp_store_release() */ + if (!(smp_load_acquire(&nfsi->cache_validity) & NFS_INO_INVALID_DATA)) + goto out; + /* Slow-path that double-checks with spinlock held */ spin_lock(&inode->i_lock); if (test_bit(NFS_INO_INVALIDATING, bitlock)) { spin_unlock(&inode->i_lock); From bbad2d5b6c99db468d8f88b6ba6a56ed409b4881 Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Tue, 8 Oct 2024 12:46:39 +0000 Subject: [PATCH 058/174] security/keys: fix slab-out-of-bounds in key_task_permission [ Upstream commit 4a74da044ec9ec8679e6beccc4306b936b62873f ] KASAN reports an out of bounds read: BUG: KASAN: slab-out-of-bounds in __kuid_val include/linux/uidgid.h:36 BUG: KASAN: slab-out-of-bounds in uid_eq include/linux/uidgid.h:63 [inline] BUG: KASAN: slab-out-of-bounds in key_task_permission+0x394/0x410 security/keys/permission.c:54 Read of size 4 at addr ffff88813c3ab618 by task stress-ng/4362 CPU: 2 PID: 4362 Comm: stress-ng Not tainted 5.10.0-14930-gafbffd6c3ede #15 Call Trace: __dump_stack lib/dump_stack.c:82 [inline] dump_stack+0x107/0x167 lib/dump_stack.c:123 print_address_description.constprop.0+0x19/0x170 mm/kasan/report.c:400 __kasan_report.cold+0x6c/0x84 mm/kasan/report.c:560 kasan_report+0x3a/0x50 mm/kasan/report.c:585 __kuid_val include/linux/uidgid.h:36 [inline] uid_eq include/linux/uidgid.h:63 [inline] key_task_permission+0x394/0x410 security/keys/permission.c:54 search_nested_keyrings+0x90e/0xe90 security/keys/keyring.c:793 This issue was also reported by syzbot. It can be reproduced by following these steps(more details [1]): 1. Obtain more than 32 inputs that have similar hashes, which ends with the pattern '0xxxxxxxe6'. 2. Reboot and add the keys obtained in step 1. The reproducer demonstrates how this issue happened: 1. In the search_nested_keyrings function, when it iterates through the slots in a node(below tag ascend_to_node), if the slot pointer is meta and node->back_pointer != NULL(it means a root), it will proceed to descend_to_node. However, there is an exception. If node is the root, and one of the slots points to a shortcut, it will be treated as a keyring. 2. Whether the ptr is keyring decided by keyring_ptr_is_keyring function. However, KEYRING_PTR_SUBTYPE is 0x2UL, the same as ASSOC_ARRAY_PTR_SUBTYPE_MASK. 3. When 32 keys with the similar hashes are added to the tree, the ROOT has keys with hashes that are not similar (e.g. slot 0) and it splits NODE A without using a shortcut. When NODE A is filled with keys that all hashes are xxe6, the keys are similar, NODE A will split with a shortcut. Finally, it forms the tree as shown below, where slot 6 points to a shortcut. NODE A +------>+---+ ROOT | | 0 | xxe6 +---+ | +---+ xxxx | 0 | shortcut : : xxe6 +---+ | +---+ xxe6 : : | | | xxe6 +---+ | +---+ | 6 |---+ : : xxe6 +---+ +---+ xxe6 : : | f | xxe6 +---+ +---+ xxe6 | f | +---+ 4. As mentioned above, If a slot(slot 6) of the root points to a shortcut, it may be mistakenly transferred to a key*, leading to a read out-of-bounds read. To fix this issue, one should jump to descend_to_node if the ptr is a shortcut, regardless of whether the node is root or not. [1] https://lore.kernel.org/linux-kernel/1cfa878e-8c7b-4570-8606-21daf5e13ce7@huaweicloud.com/ [jarkko: tweaked the commit message a bit to have an appropriate closes tag.] Fixes: b2a4df200d57 ("KEYS: Expand the capacity of a keyring") Reported-by: syzbot+5b415c07907a2990d1a3@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000cbb7860611f61147@google.com/T/ Signed-off-by: Chen Ridong Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Sasha Levin --- security/keys/keyring.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 4448758f643a..f331725d5a37 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -772,8 +772,11 @@ ascend_to_node: for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = READ_ONCE(node->slots[slot]); - if (assoc_array_ptr_is_meta(ptr) && node->back_pointer) - goto descend_to_node; + if (assoc_array_ptr_is_meta(ptr)) { + if (node->back_pointer || + assoc_array_ptr_is_shortcut(ptr)) + goto descend_to_node; + } if (!keyring_ptr_is_keyring(ptr)) continue; From 12a3977538b688326fd781c9908c0cc7fece0e33 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Tue, 29 Oct 2024 17:04:06 +0800 Subject: [PATCH 059/174] net: enetc: set MAC address to the VF net_device [ Upstream commit badccd49b93bb945bf4e5cc8707db67cdc5e27e5 ] The MAC address of VF can be configured through the mailbox mechanism of ENETC, but the previous implementation forgot to set the MAC address in net_device, resulting in the SMAC of the sent frames still being the old MAC address. Since the MAC address in the hardware has been changed, Rx cannot receive frames with the DMAC address as the new MAC address. The most obvious phenomenon is that after changing the MAC address, we can see that the MAC address of eno0vf0 has not changed through the "ifconfig eno0vf0" command and the IP address cannot be obtained . root@ls1028ardb:~# ifconfig eno0vf0 down root@ls1028ardb:~# ifconfig eno0vf0 hw ether 00:04:9f:3a:4d:56 up root@ls1028ardb:~# ifconfig eno0vf0 eno0vf0: flags=4163 mtu 1500 ether 66:36:2c:3b:87:76 txqueuelen 1000 (Ethernet) RX packets 794 bytes 69239 (69.2 KB) RX errors 0 dropped 0 overruns 0 frame 0 TX packets 11 bytes 2226 (2.2 KB) TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0 Fixes: beb74ac878c8 ("enetc: Add vf to pf messaging support") Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Reviewed-by: Claudiu Manoil Link: https://patch.msgid.link/20241029090406.841836-1-wei.fang@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/enetc/enetc_vf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index dfcaac302e24..b15db70769e5 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -78,11 +78,18 @@ static int enetc_vf_set_mac_addr(struct net_device *ndev, void *addr) { struct enetc_ndev_priv *priv = netdev_priv(ndev); struct sockaddr *saddr = addr; + int err; if (!is_valid_ether_addr(saddr->sa_data)) return -EADDRNOTAVAIL; - return enetc_msg_vsi_set_primary_mac_addr(priv, saddr); + err = enetc_msg_vsi_set_primary_mac_addr(priv, saddr); + if (err) + return err; + + eth_hw_addr_set(ndev, saddr->sa_data); + + return 0; } static int enetc_vf_set_features(struct net_device *ndev, From bf9bff13225baf5f658577f7d985fc4933d79527 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 29 Oct 2024 13:46:21 -0400 Subject: [PATCH 060/174] sctp: properly validate chunk size in sctp_sf_ootb() [ Upstream commit 0ead60804b64f5bd6999eec88e503c6a1a242d41 ] A size validation fix similar to that in Commit 50619dbf8db7 ("sctp: add size validation when walking chunks") is also required in sctp_sf_ootb() to address a crash reported by syzbot: BUG: KMSAN: uninit-value in sctp_sf_ootb+0x7f5/0xce0 net/sctp/sm_statefuns.c:3712 sctp_sf_ootb+0x7f5/0xce0 net/sctp/sm_statefuns.c:3712 sctp_do_sm+0x181/0x93d0 net/sctp/sm_sideeffect.c:1166 sctp_endpoint_bh_rcv+0xc38/0xf90 net/sctp/endpointola.c:407 sctp_inq_push+0x2ef/0x380 net/sctp/inqueue.c:88 sctp_rcv+0x3831/0x3b20 net/sctp/input.c:243 sctp4_rcv+0x42/0x50 net/sctp/protocol.c:1159 ip_protocol_deliver_rcu+0xb51/0x13d0 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x336/0x500 net/ipv4/ip_input.c:233 Reported-by: syzbot+f0cbb34d39392f2746ca@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Link: https://patch.msgid.link/a29ebb6d8b9f8affd0f9abb296faafafe10c17d8.1730223981.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sctp/sm_statefuns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a56749a50e5c..4848d5d50a5f 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3752,7 +3752,7 @@ enum sctp_disposition sctp_sf_ootb(struct net *net, } ch = (struct sctp_chunkhdr *)ch_end; - } while (ch_end < skb_tail_pointer(skb)); + } while (ch_end + sizeof(*ch) < skb_tail_pointer(skb)); if (ootb_shut_ack) return sctp_sf_shut_8_4_5(net, ep, asoc, type, arg, commands); From dac989c2a3b53da604db5be4d8106dedb035a939 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Mon, 14 Oct 2024 15:53:13 +0200 Subject: [PATCH 061/174] can: c_can: fix {rx,tx}_errors statistics [ Upstream commit 4d6d26537940f3b3e17138987ed9e4a334780bf7 ] The c_can_handle_bus_err() function was incorrectly incrementing only the receive error counter, even in cases of bit or acknowledgment errors that occur during transmission. The patch fixes the issue by incrementing the appropriate counter based on the type of error. Fixes: 881ff67ad450 ("can: c_can: Added support for Bosch C_CAN controller") Signed-off-by: Dario Binacchi Link: https://patch.msgid.link/20241014135319.2009782-1-dario.binacchi@amarulasolutions.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/c_can/c_can_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/c_can/c_can_main.c b/drivers/net/can/c_can/c_can_main.c index c63f7fc1e691..511615dc3341 100644 --- a/drivers/net/can/c_can/c_can_main.c +++ b/drivers/net/can/c_can/c_can_main.c @@ -1011,7 +1011,6 @@ static int c_can_handle_bus_err(struct net_device *dev, /* common for all type of bus errors */ priv->can.can_stats.bus_error++; - stats->rx_errors++; /* propagate the error condition to the CAN stack */ skb = alloc_can_err_skb(dev, &cf); @@ -1027,26 +1026,32 @@ static int c_can_handle_bus_err(struct net_device *dev, case LEC_STUFF_ERROR: netdev_dbg(dev, "stuff error\n"); cf->data[2] |= CAN_ERR_PROT_STUFF; + stats->rx_errors++; break; case LEC_FORM_ERROR: netdev_dbg(dev, "form error\n"); cf->data[2] |= CAN_ERR_PROT_FORM; + stats->rx_errors++; break; case LEC_ACK_ERROR: netdev_dbg(dev, "ack error\n"); cf->data[3] = CAN_ERR_PROT_LOC_ACK; + stats->tx_errors++; break; case LEC_BIT1_ERROR: netdev_dbg(dev, "bit1 error\n"); cf->data[2] |= CAN_ERR_PROT_BIT1; + stats->tx_errors++; break; case LEC_BIT0_ERROR: netdev_dbg(dev, "bit0 error\n"); cf->data[2] |= CAN_ERR_PROT_BIT0; + stats->tx_errors++; break; case LEC_CRC_ERROR: netdev_dbg(dev, "CRC error\n"); cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; + stats->rx_errors++; break; default: break; From 64aa0771140b3bb568fe6b34b5b52d4684a1526f Mon Sep 17 00:00:00 2001 From: Mateusz Polchlopek Date: Mon, 28 Oct 2024 12:59:22 -0400 Subject: [PATCH 062/174] ice: change q_index variable type to s16 to store -1 value [ Upstream commit 64502dac974a5d9951d16015fa2e16a14e5f2bb2 ] Fix Flow Director not allowing to re-map traffic to 0th queue when action is configured to drop (and vice versa). The current implementation of ethtool callback in the ice driver forbids change Flow Director action from 0 to -1 and from -1 to 0 with an error, e.g: # ethtool -U eth2 flow-type tcp4 src-ip 1.1.1.1 loc 1 action 0 # ethtool -U eth2 flow-type tcp4 src-ip 1.1.1.1 loc 1 action -1 rmgr: Cannot insert RX class rule: Invalid argument We set the value of `u16 q_index = 0` at the beginning of the function ice_set_fdir_input_set(). In case of "drop traffic" action (which is equal to -1 in ethtool) we store the 0 value. Later, when want to change traffic rule to redirect to queue with index 0 it returns an error caused by duplicate found. Fix this behaviour by change of the type of field `q_index` from u16 to s16 in `struct ice_fdir_fltr`. This allows to store -1 in the field in case of "drop traffic" action. What is more, change the variable type in the function ice_set_fdir_input_set() and assign at the beginning the new `#define ICE_FDIR_NO_QUEUE_IDX` which is -1. Later, if the action is set to another value (point specific queue index) the variable value is overwritten in the function. Fixes: cac2a27cd9ab ("ice: Support IPv4 Flow Director filters") Reviewed-by: Przemek Kitszel Signed-off-by: Mateusz Polchlopek Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c | 3 ++- drivers/net/ethernet/intel/ice/ice_fdir.h | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 1839a37139dc..b6bbf2376ef5 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1694,11 +1694,12 @@ static int ice_set_fdir_input_set(struct ice_vsi *vsi, struct ethtool_rx_flow_spec *fsp, struct ice_fdir_fltr *input) { - u16 dest_vsi, q_index = 0; + s16 q_index = ICE_FDIR_NO_QUEUE_IDX; u16 orig_q_index = 0; struct ice_pf *pf; struct ice_hw *hw; int flow_type; + u16 dest_vsi; u8 dest_ctl; if (!vsi || !fsp || !input) diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h index b384d2a4ab19..063ea3d51653 100644 --- a/drivers/net/ethernet/intel/ice/ice_fdir.h +++ b/drivers/net/ethernet/intel/ice/ice_fdir.h @@ -50,6 +50,8 @@ */ #define ICE_FDIR_IPV4_PKT_FLAG_MF 0x20 +#define ICE_FDIR_NO_QUEUE_IDX -1 + enum ice_fltr_prgm_desc_dest { ICE_FLTR_PRGM_DESC_DEST_DROP_PKT, ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX, @@ -181,7 +183,7 @@ struct ice_fdir_fltr { u16 flex_fltr; /* filter control */ - u16 q_index; + s16 q_index; u16 orig_q_index; u16 dest_vsi; u8 dest_ctl; From 7ad3fb3bfd43feb4e15c81dffd23ac4e55742791 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Wed, 16 Oct 2024 11:30:11 +0200 Subject: [PATCH 063/174] i40e: fix race condition by adding filter's intermediate sync state [ Upstream commit f30490e9695ef7da3d0899c6a0293cc7cd373567 ] Fix a race condition in the i40e driver that leads to MAC/VLAN filters becoming corrupted and leaking. Address the issue that occurs under heavy load when multiple threads are concurrently modifying MAC/VLAN filters by setting mac and port VLAN. 1. Thread T0 allocates a filter in i40e_add_filter() within i40e_ndo_set_vf_port_vlan(). 2. Thread T1 concurrently frees the filter in __i40e_del_filter() within i40e_ndo_set_vf_mac(). 3. Subsequently, i40e_service_task() calls i40e_sync_vsi_filters(), which refers to the already freed filter memory, causing corruption. Reproduction steps: 1. Spawn multiple VFs. 2. Apply a concurrent heavy load by running parallel operations to change MAC addresses on the VFs and change port VLANs on the host. 3. Observe errors in dmesg: "Error I40E_AQ_RC_ENOSPC adding RX filters on VF XX, please set promiscuous on manually for VF XX". Exact code for stable reproduction Intel can't open-source now. The fix involves implementing a new intermediate filter state, I40E_FILTER_NEW_SYNC, for the time when a filter is on a tmp_add_list. These filters cannot be deleted from the hash list directly but must be removed using the full process. Fixes: 278e7d0b9d68 ("i40e: store MAC/VLAN filters in a hash with the MAC Address as key") Signed-off-by: Aleksandr Loktionov Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Reviewed-by: Michal Schmidt Tested-by: Michal Schmidt Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c | 12 ++++++++++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 5293fc00938c..22ac8c48ca34 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -790,6 +790,7 @@ enum i40e_filter_state { I40E_FILTER_ACTIVE, /* Added to switch by FW */ I40E_FILTER_FAILED, /* Rejected by FW */ I40E_FILTER_REMOVE, /* To be removed */ + I40E_FILTER_NEW_SYNC, /* New, not sent yet, is in i40e_sync_vsi_filters() */ /* There is no 'removed' state; the filter struct is freed */ }; struct i40e_mac_filter { diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 62497f5565c5..b438cf846c41 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -105,6 +105,7 @@ static char *i40e_filter_state_string[] = { "ACTIVE", "FAILED", "REMOVE", + "NEW_SYNC", }; /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 5f46a0677d19..3b165d8f03dc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1251,6 +1251,7 @@ int i40e_count_filters(struct i40e_vsi *vsi) hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { if (f->state == I40E_FILTER_NEW || + f->state == I40E_FILTER_NEW_SYNC || f->state == I40E_FILTER_ACTIVE) ++cnt; } @@ -1437,6 +1438,8 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, new->f = add_head; new->state = add_head->state; + if (add_head->state == I40E_FILTER_NEW) + add_head->state = I40E_FILTER_NEW_SYNC; /* Add the new filter to the tmp list */ hlist_add_head(&new->hlist, tmp_add_list); @@ -1546,6 +1549,8 @@ static int i40e_correct_vf_mac_vlan_filters(struct i40e_vsi *vsi, return -ENOMEM; new_mac->f = add_head; new_mac->state = add_head->state; + if (add_head->state == I40E_FILTER_NEW) + add_head->state = I40E_FILTER_NEW_SYNC; /* Add the new filter to the tmp list */ hlist_add_head(&new_mac->hlist, tmp_add_list); @@ -2437,7 +2442,8 @@ static int i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, struct i40e_mac_filter *f) { - bool enable = f->state == I40E_FILTER_NEW; + bool enable = f->state == I40E_FILTER_NEW || + f->state == I40E_FILTER_NEW_SYNC; struct i40e_hw *hw = &vsi->back->hw; int aq_ret; @@ -2611,6 +2617,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) /* Add it to the hash list */ hlist_add_head(&new->hlist, &tmp_add_list); + f->state = I40E_FILTER_NEW_SYNC; } /* Count the number of active (current and new) VLAN @@ -2762,7 +2769,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) spin_lock_bh(&vsi->mac_filter_hash_lock); hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { /* Only update the state if we're still NEW */ - if (new->f->state == I40E_FILTER_NEW) + if (new->f->state == I40E_FILTER_NEW || + new->f->state == I40E_FILTER_NEW_SYNC) new->f->state = new->state; hlist_del(&new->hlist); netdev_hw_addr_refcnt(new->f, vsi->netdev, -1); From 76b155e14d9b182ce83d32ada2d0d7219ea8c8dd Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Fri, 1 Nov 2024 17:15:07 +0800 Subject: [PATCH 064/174] net: hns3: fix kernel crash when uninstalling driver [ Upstream commit df3dff8ab6d79edc942464999d06fbaedf8cdd18 ] When the driver is uninstalled and the VF is disabled concurrently, a kernel crash occurs. The reason is that the two actions call function pci_disable_sriov(). The num_VFs is checked to determine whether to release the corresponding resources. During the second calling, num_VFs is not 0 and the resource release function is called. However, the corresponding resource has been released during the first invoking. Therefore, the problem occurs: [15277.839633][T50670] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020 ... [15278.131557][T50670] Call trace: [15278.134686][T50670] klist_put+0x28/0x12c [15278.138682][T50670] klist_del+0x14/0x20 [15278.142592][T50670] device_del+0xbc/0x3c0 [15278.146676][T50670] pci_remove_bus_device+0x84/0x120 [15278.151714][T50670] pci_stop_and_remove_bus_device+0x6c/0x80 [15278.157447][T50670] pci_iov_remove_virtfn+0xb4/0x12c [15278.162485][T50670] sriov_disable+0x50/0x11c [15278.166829][T50670] pci_disable_sriov+0x24/0x30 [15278.171433][T50670] hnae3_unregister_ae_algo_prepare+0x60/0x90 [hnae3] [15278.178039][T50670] hclge_exit+0x28/0xd0 [hclge] [15278.182730][T50670] __se_sys_delete_module.isra.0+0x164/0x230 [15278.188550][T50670] __arm64_sys_delete_module+0x1c/0x30 [15278.193848][T50670] invoke_syscall+0x50/0x11c [15278.198278][T50670] el0_svc_common.constprop.0+0x158/0x164 [15278.203837][T50670] do_el0_svc+0x34/0xcc [15278.207834][T50670] el0_svc+0x20/0x30 For details, see the following figure. rmmod hclge disable VFs ---------------------------------------------------- hclge_exit() sriov_numvfs_store() ... device_lock() pci_disable_sriov() hns3_pci_sriov_configure() pci_disable_sriov() sriov_disable() sriov_disable() if !num_VFs : if !num_VFs : return; return; sriov_del_vfs() sriov_del_vfs() ... ... klist_put() klist_put() ... ... num_VFs = 0; num_VFs = 0; device_unlock(); In this patch, when driver is removing, we get the device_lock() to protect num_VFs, just like sriov_numvfs_store(). Fixes: 0dd8a25f355b ("net: hns3: disable sriov before unload hclge layer") Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241101091507.3644584-1-shaojijie@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hnae3.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c index 67b0bf310daa..9a63fbc69408 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c @@ -25,8 +25,11 @@ void hnae3_unregister_ae_algo_prepare(struct hnae3_ae_algo *ae_algo) pci_id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev); if (!pci_id) continue; - if (IS_ENABLED(CONFIG_PCI_IOV)) + if (IS_ENABLED(CONFIG_PCI_IOV)) { + device_lock(&ae_dev->pdev->dev); pci_disable_sriov(ae_dev->pdev); + device_unlock(&ae_dev->pdev->dev); + } } } EXPORT_SYMBOL(hnae3_unregister_ae_algo_prepare); From 7efd9a10b896cf93886f7a26c7fbc751b28c4e18 Mon Sep 17 00:00:00 2001 From: Diogo Silva Date: Sat, 2 Nov 2024 16:15:05 +0100 Subject: [PATCH 065/174] net: phy: ti: add PHY_RST_AFTER_CLK_EN flag [ Upstream commit 256748d5480bb3c4b731236c6d6fc86a8e2815d8 ] DP83848 datasheet (section 4.7.2) indicates that the reset pin should be toggled after the clocks are running. Add the PHY_RST_AFTER_CLK_EN to make sure that this indication is respected. In my experience not having this flag enabled would lead to, on some boots, the wrong MII mode being selected if the PHY was initialized on the bootloader and was receiving data during Linux boot. Signed-off-by: Diogo Silva Reviewed-by: Andrew Lunn Fixes: 34e45ad9378c ("net: phy: dp83848: Add TI DP83848 Ethernet PHY") Link: https://patch.msgid.link/20241102151504.811306-1-paissilva@ld-100007.ds1.internal Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/phy/dp83848.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c index 937061acfc61..351411f0aa6f 100644 --- a/drivers/net/phy/dp83848.c +++ b/drivers/net/phy/dp83848.c @@ -147,6 +147,8 @@ MODULE_DEVICE_TABLE(mdio, dp83848_tbl); /* IRQ related */ \ .config_intr = dp83848_config_intr, \ .handle_interrupt = dp83848_handle_interrupt, \ + \ + .flags = PHY_RST_AFTER_CLK_EN, \ } static struct phy_driver dp83848_driver[] = { From 2af6499260c44de778ca52423204749af6178f96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Fri, 1 Nov 2024 17:17:29 -0400 Subject: [PATCH 066/174] net: stmmac: Fix unbalanced IRQ wake disable warning on single irq case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 25d70702142ac2115e75e01a0a985c6ea1d78033 ] Commit a23aa0404218 ("net: stmmac: ethtool: Fixed calltrace caused by unbalanced disable_irq_wake calls") introduced checks to prevent unbalanced enable and disable IRQ wake calls. However it only initialized the auxiliary variable on one of the paths, stmmac_request_irq_multi_msi(), missing the other, stmmac_request_irq_single(). Add the same initialization on stmmac_request_irq_single() to prevent "Unbalanced IRQ wake disable" warnings from being printed the first time disable_irq_wake() is called on platforms that run on that code path. Fixes: a23aa0404218 ("net: stmmac: ethtool: Fixed calltrace caused by unbalanced disable_irq_wake calls") Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241101-stmmac-unbalanced-wake-single-fix-v1-1-5952524c97f0@collabora.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 045e57c444fd..14e5b94b0b5a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3665,6 +3665,7 @@ static int stmmac_request_irq_single(struct net_device *dev) /* Request the Wake IRQ in case of another line * is used for WoL */ + priv->wol_irq_disabled = true; if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) { ret = request_irq(priv->wol_irq, stmmac_interrupt, IRQF_SHARED, dev->name, dev); From f3401e3c8d339ddb6ccb2e3d11ad634b7846a806 Mon Sep 17 00:00:00 2001 From: Philo Lu Date: Mon, 4 Nov 2024 16:57:04 +0800 Subject: [PATCH 067/174] virtio_net: Add hash_key_length check [ Upstream commit 3f7d9c1964fcd16d02a8a9d4fd6f6cb60c4cc530 ] Add hash_key_length check in virtnet_probe() to avoid possible out of bound errors when setting/reading the hash key. Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Signed-off-by: Philo Lu Signed-off-by: Xuan Zhuo Acked-by: Joe Damato Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/virtio_net.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index e3e5107adaca..11aa0a7d54cd 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3887,6 +3887,12 @@ static int virtnet_probe(struct virtio_device *vdev) if (vi->has_rss || vi->has_rss_hash_report) { vi->rss_key_size = virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); + if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) { + dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n", + vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE); + err = -EINVAL; + goto free; + } vi->rss_hash_types_supported = virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); From fd4e062fbc07156f8e9d73212d347c744572677e Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Mon, 4 Nov 2024 21:01:38 +0800 Subject: [PATCH 068/174] net: arc: fix the device for dma_map_single/dma_unmap_single [ Upstream commit 71803c1dfa29e0d13b99e48fda11107cc8caebc7 ] The ndev->dev and pdev->dev aren't the same device, use ndev->dev.parent which has dma_mask, ndev->dev.parent is just pdev->dev. Or it would cause the following issue: [ 39.933526] ------------[ cut here ]------------ [ 39.938414] WARNING: CPU: 1 PID: 501 at kernel/dma/mapping.c:149 dma_map_page_attrs+0x90/0x1f8 Fixes: f959dcd6ddfd ("dma-direct: Fix potential NULL pointer dereference") Signed-off-by: David Wu Signed-off-by: Johan Jonker Signed-off-by: Andy Yan Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/arc/emac_main.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index ba0646b3b122..a32b5f7c0b96 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -111,6 +111,7 @@ static void arc_emac_tx_clean(struct net_device *ndev) { struct arc_emac_priv *priv = netdev_priv(ndev); struct net_device_stats *stats = &ndev->stats; + struct device *dev = ndev->dev.parent; unsigned int i; for (i = 0; i < TX_BD_NUM; i++) { @@ -140,7 +141,7 @@ static void arc_emac_tx_clean(struct net_device *ndev) stats->tx_bytes += skb->len; } - dma_unmap_single(&ndev->dev, dma_unmap_addr(tx_buff, addr), + dma_unmap_single(dev, dma_unmap_addr(tx_buff, addr), dma_unmap_len(tx_buff, len), DMA_TO_DEVICE); /* return the sk_buff to system */ @@ -174,6 +175,7 @@ static void arc_emac_tx_clean(struct net_device *ndev) static int arc_emac_rx(struct net_device *ndev, int budget) { struct arc_emac_priv *priv = netdev_priv(ndev); + struct device *dev = ndev->dev.parent; unsigned int work_done; for (work_done = 0; work_done < budget; work_done++) { @@ -223,9 +225,9 @@ static int arc_emac_rx(struct net_device *ndev, int budget) continue; } - addr = dma_map_single(&ndev->dev, (void *)skb->data, + addr = dma_map_single(dev, (void *)skb->data, EMAC_BUFFER_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(&ndev->dev, addr)) { + if (dma_mapping_error(dev, addr)) { if (net_ratelimit()) netdev_err(ndev, "cannot map dma buffer\n"); dev_kfree_skb(skb); @@ -237,7 +239,7 @@ static int arc_emac_rx(struct net_device *ndev, int budget) } /* unmap previosly mapped skb */ - dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr), + dma_unmap_single(dev, dma_unmap_addr(rx_buff, addr), dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE); pktlen = info & LEN_MASK; @@ -423,6 +425,7 @@ static int arc_emac_open(struct net_device *ndev) { struct arc_emac_priv *priv = netdev_priv(ndev); struct phy_device *phy_dev = ndev->phydev; + struct device *dev = ndev->dev.parent; int i; phy_dev->autoneg = AUTONEG_ENABLE; @@ -445,9 +448,9 @@ static int arc_emac_open(struct net_device *ndev) if (unlikely(!rx_buff->skb)) return -ENOMEM; - addr = dma_map_single(&ndev->dev, (void *)rx_buff->skb->data, + addr = dma_map_single(dev, (void *)rx_buff->skb->data, EMAC_BUFFER_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(&ndev->dev, addr)) { + if (dma_mapping_error(dev, addr)) { netdev_err(ndev, "cannot dma map\n"); dev_kfree_skb(rx_buff->skb); return -ENOMEM; @@ -548,6 +551,7 @@ static void arc_emac_set_rx_mode(struct net_device *ndev) static void arc_free_tx_queue(struct net_device *ndev) { struct arc_emac_priv *priv = netdev_priv(ndev); + struct device *dev = ndev->dev.parent; unsigned int i; for (i = 0; i < TX_BD_NUM; i++) { @@ -555,7 +559,7 @@ static void arc_free_tx_queue(struct net_device *ndev) struct buffer_state *tx_buff = &priv->tx_buff[i]; if (tx_buff->skb) { - dma_unmap_single(&ndev->dev, + dma_unmap_single(dev, dma_unmap_addr(tx_buff, addr), dma_unmap_len(tx_buff, len), DMA_TO_DEVICE); @@ -579,6 +583,7 @@ static void arc_free_tx_queue(struct net_device *ndev) static void arc_free_rx_queue(struct net_device *ndev) { struct arc_emac_priv *priv = netdev_priv(ndev); + struct device *dev = ndev->dev.parent; unsigned int i; for (i = 0; i < RX_BD_NUM; i++) { @@ -586,7 +591,7 @@ static void arc_free_rx_queue(struct net_device *ndev) struct buffer_state *rx_buff = &priv->rx_buff[i]; if (rx_buff->skb) { - dma_unmap_single(&ndev->dev, + dma_unmap_single(dev, dma_unmap_addr(rx_buff, addr), dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE); @@ -679,6 +684,7 @@ static netdev_tx_t arc_emac_tx(struct sk_buff *skb, struct net_device *ndev) unsigned int len, *txbd_curr = &priv->txbd_curr; struct net_device_stats *stats = &ndev->stats; __le32 *info = &priv->txbd[*txbd_curr].info; + struct device *dev = ndev->dev.parent; dma_addr_t addr; if (skb_padto(skb, ETH_ZLEN)) @@ -692,10 +698,9 @@ static netdev_tx_t arc_emac_tx(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_BUSY; } - addr = dma_map_single(&ndev->dev, (void *)skb->data, len, - DMA_TO_DEVICE); + addr = dma_map_single(dev, (void *)skb->data, len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&ndev->dev, addr))) { + if (unlikely(dma_mapping_error(dev, addr))) { stats->tx_dropped++; stats->tx_errors++; dev_kfree_skb_any(skb); From 98ffd585303006672bb3a07fb8532a5e515c8906 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Mon, 4 Nov 2024 21:01:39 +0800 Subject: [PATCH 069/174] net: arc: rockchip: fix emac mdio node support [ Upstream commit 0a1c7a7b0adbf595ce7f218609db53749e966573 ] The binding emac_rockchip.txt is converted to YAML. Changed against the original binding is an added MDIO subnode. This make the driver failed to find the PHY, and given the 'mdio has invalid PHY address' it is probably looking in the wrong node. Fix emac_mdio.c so that it can handle both old and new device trees. Fixes: 1dabb74971b3 ("ARM: dts: rockchip: restyle emac nodes") Signed-off-by: Johan Jonker Tested-by: Andy Yan Link: https://lore.kernel.org/r/20220603163539.537-3-jbx6244@gmail.com Signed-off-by: Andy Yan Reviewed-by: Andrew Lunn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/arc/emac_mdio.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/arc/emac_mdio.c b/drivers/net/ethernet/arc/emac_mdio.c index 87f40c2ba904..078b1a72c161 100644 --- a/drivers/net/ethernet/arc/emac_mdio.c +++ b/drivers/net/ethernet/arc/emac_mdio.c @@ -133,6 +133,7 @@ int arc_mdio_probe(struct arc_emac_priv *priv) struct arc_emac_mdio_bus_data *data = &priv->bus_data; struct device_node *np = priv->dev->of_node; const char *name = "Synopsys MII Bus"; + struct device_node *mdio_node; struct mii_bus *bus; int error; @@ -164,7 +165,13 @@ int arc_mdio_probe(struct arc_emac_priv *priv) snprintf(bus->id, MII_BUS_ID_SIZE, "%s", bus->name); - error = of_mdiobus_register(bus, priv->dev->of_node); + /* Backwards compatibility for EMAC nodes without MDIO subnode. */ + mdio_node = of_get_child_by_name(np, "mdio"); + if (!mdio_node) + mdio_node = of_node_get(np); + + error = of_mdiobus_register(bus, mdio_node); + of_node_put(mdio_node); if (error) { mdiobus_free(bus); return dev_err_probe(priv->dev, error, From 3f8f470f3120f8ca83f747bc7aabe4625d7756e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaros=C5=82aw=20Janik?= Date: Wed, 30 Oct 2024 18:18:12 +0100 Subject: [PATCH 070/174] Revert "ALSA: hda/conexant: Mute speakers at suspend / shutdown" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c9363bbb0f68dd1ddb8be7bbfe958cdfcd38d851 upstream. Commit 4f61c8fe3520 ("ALSA: hda/conexant: Mute speakers at suspend / shutdown") mutes speakers on system shutdown or whenever HDA controller is suspended by PM; this however interacts badly with Thinkpad's ACPI firmware behavior which uses beeps to signal various events (enter/leave suspend or hibernation, AC power connect/disconnect, low battery, etc.); now those beeps are either muted altogether (for suspend/hibernate/ shutdown related events) or work more or less randomly (eg. AC plug/unplug is only audible when you are playing music at the moment, because HDA device is likely in suspend mode otherwise). Since the original bug report mentioned in 4f61c8fe3520 complained about Lenovo's Thinkpad laptop - revert this commit altogether. Fixes: 4f61c8fe3520 ("ALSA: hda/conexant: Mute speakers at suspend / shutdown") Signed-off-by: Jarosław Janik Link: https://patch.msgid.link/20241030171813.18941-2-jaroslaw.janik@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 5833623f6ffa..a14b9cb48f69 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -205,8 +205,6 @@ static void cx_auto_shutdown(struct hda_codec *codec) { struct conexant_spec *spec = codec->spec; - snd_hda_gen_shutup_speakers(codec); - /* Turn the problematic codec into D3 to avoid spurious noises from the internal speaker during (and after) reboot */ cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false); From d6386b279d7581a1e04454e9cb5ac303157082d3 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 13:29:43 +0200 Subject: [PATCH 071/174] media: stb0899_algo: initialize cfr before using it commit 2d861977e7314f00bf27d0db17c11ff5e85e609a upstream. The loop at stb0899_search_carrier() starts with a random value for cfr, as reported by Coverity. Initialize it to zero, just like stb0899_dvbs_algo() to ensure that carrier search won't bail out. Fixes: 8bd135bab91f ("V4L/DVB (9375): Add STB0899 support") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/stb0899_algo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/stb0899_algo.c b/drivers/media/dvb-frontends/stb0899_algo.c index df89c33dac23..40537c4ccb0d 100644 --- a/drivers/media/dvb-frontends/stb0899_algo.c +++ b/drivers/media/dvb-frontends/stb0899_algo.c @@ -269,7 +269,7 @@ static enum stb0899_status stb0899_search_carrier(struct stb0899_state *state) short int derot_freq = 0, last_derot_freq = 0, derot_limit, next_loop = 3; int index = 0; - u8 cfr[2]; + u8 cfr[2] = {0}; u8 reg; internal->status = NOCARRIER; From b751a96025275c17f04083cbfe856822f1658946 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 15:23:01 +0200 Subject: [PATCH 072/174] media: dvbdev: prevent the risk of out of memory access [ Upstream commit 972e63e895abbe8aa1ccbdbb4e6362abda7cd457 ] The dvbdev contains a static variable used to store dvb minors. The behavior of it depends if CONFIG_DVB_DYNAMIC_MINORS is set or not. When not set, dvb_register_device() won't check for boundaries, as it will rely that a previous call to dvb_register_adapter() would already be enforcing it. On a similar way, dvb_device_open() uses the assumption that the register functions already did the needed checks. This can be fragile if some device ends using different calls. This also generate warnings on static check analysers like Coverity. So, add explicit guards to prevent potential risk of OOM issues. Fixes: 5dd3f3071070 ("V4L/DVB (9361): Dynamic DVB minor allocation") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/dvb-core/dvbdev.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c index 04b7ce479fc3..d1212acb7093 100644 --- a/drivers/media/dvb-core/dvbdev.c +++ b/drivers/media/dvb-core/dvbdev.c @@ -86,10 +86,15 @@ static DECLARE_RWSEM(minor_rwsem); static int dvb_device_open(struct inode *inode, struct file *file) { struct dvb_device *dvbdev; + unsigned int minor = iminor(inode); + + if (minor >= MAX_DVB_MINORS) + return -ENODEV; mutex_lock(&dvbdev_mutex); down_read(&minor_rwsem); - dvbdev = dvb_minors[iminor(inode)]; + + dvbdev = dvb_minors[minor]; if (dvbdev && dvbdev->fops) { int err = 0; @@ -529,7 +534,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, for (minor = 0; minor < MAX_DVB_MINORS; minor++) if (dvb_minors[minor] == NULL) break; - if (minor == MAX_DVB_MINORS) { + if (minor >= MAX_DVB_MINORS) { if (new_node) { list_del (&new_node->list_head); kfree(dvbdevfops); @@ -544,6 +549,14 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, } #else minor = nums2minor(adap->num, type, id); + if (minor >= MAX_DVB_MINORS) { + dvb_media_device_free(dvbdev); + list_del(&dvbdev->list_head); + kfree(dvbdev); + *pdvbdev = NULL; + mutex_unlock(&dvbdev_register_lock); + return ret; + } #endif dvbdev->minor = minor; dvb_minors[minor] = dvb_device_get(dvbdev); From fd6d84b8ee157cb4c304485f73fbc620b807d212 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 16:05:16 +0200 Subject: [PATCH 073/174] media: dvb_frontend: don't play tricks with underflow values [ Upstream commit 9883a4d41aba7612644e9bb807b971247cea9b9d ] fepriv->auto_sub_step is unsigned. Setting it to -1 is just a trick to avoid calling continue, as reported by Coverity. It relies to have this code just afterwards: if (!ready) fepriv->auto_sub_step++; Simplify the code by simply setting it to zero and use continue to return to the while loop. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/dvb-core/dvb_frontend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c index a1a3dbb0e738..a997cffbc8ea 100644 --- a/drivers/media/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb-core/dvb_frontend.c @@ -443,8 +443,8 @@ static int dvb_frontend_swzigzag_autotune(struct dvb_frontend *fe, int check_wra default: fepriv->auto_step++; - fepriv->auto_sub_step = -1; /* it'll be incremented to 0 in a moment */ - break; + fepriv->auto_sub_step = 0; + continue; } if (!ready) fepriv->auto_sub_step++; From d98c63c00ca16dfda8e80611b3fe6a48bc84d674 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 12:25:09 +0200 Subject: [PATCH 074/174] media: adv7604: prevent underflow condition when reporting colorspace [ Upstream commit 50b9fa751d1aef5d262bde871c70a7f44262f0bc ] Currently, adv76xx_log_status() reads some date using io_read() which may return negative values. The current logic doesn't check such errors, causing colorspace to be reported on a wrong way at adv76xx_log_status(), as reported by Coverity. If I/O error happens there, print a different message, instead of reporting bogus messages to userspace. Fixes: 54450f591c99 ("[media] adv7604: driver for the Analog Devices ADV7604 video decoder") Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/i2c/adv7604.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c index bda0c547ce44..96c2fa2c3b7a 100644 --- a/drivers/media/i2c/adv7604.c +++ b/drivers/media/i2c/adv7604.c @@ -2516,10 +2516,10 @@ static int adv76xx_log_status(struct v4l2_subdev *sd) const struct adv76xx_chip_info *info = state->info; struct v4l2_dv_timings timings; struct stdi_readback stdi; - u8 reg_io_0x02 = io_read(sd, 0x02); + int ret; + u8 reg_io_0x02; u8 edid_enabled; u8 cable_det; - static const char * const csc_coeff_sel_rb[16] = { "bypassed", "YPbPr601 -> RGB", "reserved", "YPbPr709 -> RGB", "reserved", "RGB -> YPbPr601", "reserved", "RGB -> YPbPr709", @@ -2618,13 +2618,21 @@ static int adv76xx_log_status(struct v4l2_subdev *sd) v4l2_info(sd, "-----Color space-----\n"); v4l2_info(sd, "RGB quantization range ctrl: %s\n", rgb_quantization_range_txt[state->rgb_quantization_range]); - v4l2_info(sd, "Input color space: %s\n", - input_color_space_txt[reg_io_0x02 >> 4]); - v4l2_info(sd, "Output color space: %s %s, alt-gamma %s\n", - (reg_io_0x02 & 0x02) ? "RGB" : "YCbCr", - (((reg_io_0x02 >> 2) & 0x01) ^ (reg_io_0x02 & 0x01)) ? - "(16-235)" : "(0-255)", - (reg_io_0x02 & 0x08) ? "enabled" : "disabled"); + + ret = io_read(sd, 0x02); + if (ret < 0) { + v4l2_info(sd, "Can't read Input/Output color space\n"); + } else { + reg_io_0x02 = ret; + + v4l2_info(sd, "Input color space: %s\n", + input_color_space_txt[reg_io_0x02 >> 4]); + v4l2_info(sd, "Output color space: %s %s, alt-gamma %s\n", + (reg_io_0x02 & 0x02) ? "RGB" : "YCbCr", + (((reg_io_0x02 >> 2) & 0x01) ^ (reg_io_0x02 & 0x01)) ? + "(16-235)" : "(0-255)", + (reg_io_0x02 & 0x08) ? "enabled" : "disabled"); + } v4l2_info(sd, "Color space conversion: %s\n", csc_coeff_sel_rb[cp_read(sd, info->cp_csc) >> 4]); From 3b4f6966d7ea24746321973e7d45128e6c6f9a97 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 30 Oct 2024 12:02:53 +0100 Subject: [PATCH 075/174] scsi: sd_zbc: Use kvzalloc() to allocate REPORT ZONES buffer [ Upstream commit 7ce3e6107103214d354a16729a472f588be60572 ] We have two reports of failed memory allocation in btrfs' code which is calling into report zones. Both of these reports have the following signature coming from __vmalloc_area_node(): kworker/u17:5: vmalloc error: size 0, failed to allocate pages, mode:0x10dc2(GFP_KERNEL|__GFP_HIGHMEM|__GFP_NORETRY|__GFP_ZERO), nodemask=(null),cpuset=/,mems_allowed=0 Further debugging showed these where allocations of one sector (512 bytes) and at least one of the reporter's systems where low on memory, so going through the overhead of allocating a vm area failed. Switching the allocation from __vmalloc() to kvzalloc() avoids the overhead of vmalloc() on small allocations and succeeds. Note: the buffer is already freed using kvfree() so there's no need to adjust the free path. Cc: Qu Wenru Cc: Naohiro Aota Link: https://github.com/kdave/btrfs-progs/issues/779 Link: https://github.com/kdave/btrfs-progs/issues/915 Fixes: 23a50861adda ("scsi: sd_zbc: Cleanup sd_zbc_alloc_report_buffer()") Signed-off-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20241030110253.11718-1-jth@kernel.org Reviewed-by: Damien Le Moal Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/sd_zbc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 4c35b4a91635..4c78288ffa72 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -216,8 +216,7 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); while (bufsize >= SECTOR_SIZE) { - buf = __vmalloc(bufsize, - GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY); + buf = kvzalloc(bufsize, GFP_KERNEL | __GFP_NORETRY); if (buf) { *buflen = bufsize; return buf; From 42a26e971e75934fb4ea1b8a845241f286b64487 Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Fri, 1 Nov 2024 21:55:13 +0300 Subject: [PATCH 076/174] ALSA: firewire-lib: fix return value on fail in amdtp_tscm_init() [ Upstream commit 8abbf1f01d6a2ef9f911f793e30f7382154b5a3a ] If amdtp_stream_init() fails in amdtp_tscm_init(), the latter returns zero, though it's supposed to return error code, which is checked inside init_stream() in file tascam-stream.c. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 47faeea25ef3 ("ALSA: firewire-tascam: add data block processing layer") Signed-off-by: Murad Masimov Reviewed-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20241101185517.1819-1-m.masimov@maxima.ru Signed-off-by: Sasha Levin --- sound/firewire/tascam/amdtp-tascam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/tascam/amdtp-tascam.c b/sound/firewire/tascam/amdtp-tascam.c index 64d66a802545..ad185ff3209d 100644 --- a/sound/firewire/tascam/amdtp-tascam.c +++ b/sound/firewire/tascam/amdtp-tascam.c @@ -244,7 +244,7 @@ int amdtp_tscm_init(struct amdtp_stream *s, struct fw_unit *unit, err = amdtp_stream_init(s, unit, dir, flags, fmt, process_ctx_payloads, sizeof(struct amdtp_tscm)); if (err < 0) - return 0; + return err; if (dir == AMDTP_OUT_STREAM) { // Use fixed value for FDF field. From 648e7f59e8495373ae3c9737256ae52087b94f6c Mon Sep 17 00:00:00 2001 From: Emil Dahl Juhl Date: Tue, 15 Oct 2024 19:18:26 +0200 Subject: [PATCH 077/174] tools/lib/thermal: Fix sampling handler context ptr [ Upstream commit fcd54cf480c87b96313a97dbf898c644b7bb3a2e ] The sampling handler, provided by the user alongside a void* context, was invoked with an internal structure instead of the user context. Correct the invocation of the sampling handler to pass the user context pointer instead. Note that the approach taken is similar to that in events.c, and will reduce the chances of this mistake happening if additional sampling callbacks are added. Fixes: 47c4b0de080a ("tools/lib/thermal: Add a thermal library") Signed-off-by: Emil Dahl Juhl Link: https://lore.kernel.org/r/20241015171826.170154-1-emdj@bang-olufsen.dk Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin --- tools/lib/thermal/sampling.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/lib/thermal/sampling.c b/tools/lib/thermal/sampling.c index 70577423a9f0..f67c1f9ea1d7 100644 --- a/tools/lib/thermal/sampling.c +++ b/tools/lib/thermal/sampling.c @@ -16,6 +16,8 @@ static int handle_thermal_sample(struct nl_msg *n, void *arg) struct thermal_handler_param *thp = arg; struct thermal_handler *th = thp->th; + arg = thp->arg; + genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); switch (genlhdr->cmd) { From 2e9a53eef28d1f6ff9d4991825d8cf83e122d9c7 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Fri, 18 Oct 2024 15:31:36 +0800 Subject: [PATCH 078/174] thermal/of: support thermal zones w/o trips subnode [ Upstream commit 725f31f300e300a9d94976bd8f1db6e746f95f63 ] Although the current device tree binding of thermal zones require the trips subnode, the binding in kernel v5.15 does not require it, and many device trees shipped with the kernel, for example, allwinner/sun50i-a64.dtsi and mediatek/mt8183-kukui.dtsi in ARM64, still comply to the old binding and contain no trips subnode. Allow the code to successfully register thermal zones w/o trips subnode for DT binding compatibility now. Furtherly, the inconsistency between DTs and bindings should be resolved by either adding empty trips subnode or dropping the trips subnode requirement. Fixes: d0c75fa2c17f ("thermal/of: Initialize trip points separately") Signed-off-by: Icenowy Zheng [wenst@chromium.org: Reworked logic and kernel log messages] Signed-off-by: Chen-Yu Tsai Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20241018073139.1268995-1-wenst@chromium.org Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin --- drivers/thermal/thermal_of.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index 323c8cd17148..07476147559a 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -238,18 +238,15 @@ static struct thermal_trip *thermal_of_trips_init(struct device_node *np, int *n struct device_node *trips; int ret, count; + *ntrips = 0; + trips = of_get_child_by_name(np, "trips"); - if (!trips) { - pr_err("Failed to find 'trips' node\n"); - return ERR_PTR(-EINVAL); - } + if (!trips) + return NULL; count = of_get_child_count(trips); - if (!count) { - pr_err("No trip point defined\n"); - ret = -EINVAL; - goto out_of_node_put; - } + if (!count) + return NULL; tt = kzalloc(sizeof(*tt) * count, GFP_KERNEL); if (!tt) { @@ -272,7 +269,6 @@ static struct thermal_trip *thermal_of_trips_init(struct device_node *np, int *n out_kfree: kfree(tt); - *ntrips = 0; out_of_node_put: of_node_put(trips); @@ -619,11 +615,14 @@ struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, trips = thermal_of_trips_init(np, &ntrips); if (IS_ERR(trips)) { - pr_err("Failed to find trip points for %pOFn id=%d\n", sensor, id); + pr_err("Failed to parse trip points for %pOFn id=%d\n", sensor, id); ret = PTR_ERR(trips); goto out_kfree_of_ops; } + if (!trips) + pr_info("No trip points found for %pOFn id=%d\n", sensor, id); + ret = thermal_of_monitor_init(np, &delay, &pdelay); if (ret) { pr_err("Failed to initialize monitoring delays from %pOFn\n", np); From 4f1d74f74752eab8af6b8b28797dc6490d57374c Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Tue, 5 Nov 2024 15:02:42 +0100 Subject: [PATCH 079/174] ASoC: stm32: spdifrx: fix dma channel release in stm32_spdifrx_remove [ Upstream commit 9bb4af400c386374ab1047df44c508512c08c31f ] In case of error when requesting ctrl_chan DMA channel, ctrl_chan is not null. So the release of the dma channel leads to the following issue: [ 4.879000] st,stm32-spdifrx 500d0000.audio-controller: dma_request_slave_channel error -19 [ 4.888975] Unable to handle kernel NULL pointer dereference at virtual address 000000000000003d [...] [ 5.096577] Call trace: [ 5.099099] dma_release_channel+0x24/0x100 [ 5.103235] stm32_spdifrx_remove+0x24/0x60 [snd_soc_stm32_spdifrx] [ 5.109494] stm32_spdifrx_probe+0x320/0x4c4 [snd_soc_stm32_spdifrx] To avoid this issue, release channel only if the pointer is valid. Fixes: 794df9448edb ("ASoC: stm32: spdifrx: manage rebind issue") Signed-off-by: Amelie Delaunay Signed-off-by: Olivier Moysan Link: https://patch.msgid.link/20241105140242.527279-1-olivier.moysan@foss.st.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/stm/stm32_spdifrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_spdifrx.c b/sound/soc/stm/stm32_spdifrx.c index d399c906bb92..e66382680e09 100644 --- a/sound/soc/stm/stm32_spdifrx.c +++ b/sound/soc/stm/stm32_spdifrx.c @@ -943,7 +943,7 @@ static int stm32_spdifrx_remove(struct platform_device *pdev) { struct stm32_spdifrx_data *spdifrx = platform_get_drvdata(pdev); - if (spdifrx->ctrl_chan) + if (!IS_ERR(spdifrx->ctrl_chan)) dma_release_channel(spdifrx->ctrl_chan); if (spdifrx->dmab) From 5e1523076acf95b4ea68d19b6f27e6891267cc24 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 11:38:10 +0200 Subject: [PATCH 080/174] media: ar0521: don't overflow when checking PLL values commit 438d3085ba5b8b5bfa5290faa594e577f6ac9aa7 upstream. The PLL checks are comparing 64 bit integers with 32 bit ones, as reported by Coverity. Depending on the values of the variables, this may underflow. Fix it ensuring that both sides of the expression are u64. Fixes: 852b50aeed15 ("media: On Semi AR0521 sensor driver") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Acked-by: Sakari Ailus Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/ar0521.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/i2c/ar0521.c b/drivers/media/i2c/ar0521.c index 02856a733238..1ede57958410 100644 --- a/drivers/media/i2c/ar0521.c +++ b/drivers/media/i2c/ar0521.c @@ -223,10 +223,10 @@ static u32 calc_pll(struct ar0521_dev *sensor, int num, u32 freq, u16 *pre_ptr, continue; /* Minimum value */ if (new_mult > 254) break; /* Maximum, larger pre won't work either */ - if (sensor->extclk_freq * (u64)new_mult < AR0521_PLL_MIN * + if (sensor->extclk_freq * (u64)new_mult < (u64)AR0521_PLL_MIN * new_pre) continue; - if (sensor->extclk_freq * (u64)new_mult > AR0521_PLL_MAX * + if (sensor->extclk_freq * (u64)new_mult > (u64)AR0521_PLL_MAX * new_pre) break; /* Larger pre won't work either */ new_pll = div64_round_up(sensor->extclk_freq * (u64)new_mult, From c85db2d4432de4ff9d97006691ce2dcb5bda660e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 11:10:31 +0200 Subject: [PATCH 081/174] media: s5p-jpeg: prevent buffer overflows commit 14a22762c3daeac59a5a534e124acbb4d7a79b3a upstream. The current logic allows word to be less than 2. If this happens, there will be buffer overflows, as reported by smatch. Add extra checks to prevent it. While here, remove an unused word = 0 assignment. Fixes: 6c96dbbc2aa9 ("[media] s5p-jpeg: add support for 5433") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Jacek Anaszewski Signed-off-by: Greg Kroah-Hartman --- .../media/platform/samsung/s5p-jpeg/jpeg-core.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/media/platform/samsung/s5p-jpeg/jpeg-core.c b/drivers/media/platform/samsung/s5p-jpeg/jpeg-core.c index 55814041b8d8..d8d1f17cee83 100644 --- a/drivers/media/platform/samsung/s5p-jpeg/jpeg-core.c +++ b/drivers/media/platform/samsung/s5p-jpeg/jpeg-core.c @@ -775,11 +775,14 @@ static void exynos4_jpeg_parse_decode_h_tbl(struct s5p_jpeg_ctx *ctx) (unsigned long)vb2_plane_vaddr(&vb->vb2_buf, 0) + ctx->out_q.sos + 2; jpeg_buffer.curr = 0; - word = 0; - if (get_word_be(&jpeg_buffer, &word)) return; - jpeg_buffer.size = (long)word - 2; + + if (word < 2) + jpeg_buffer.size = 0; + else + jpeg_buffer.size = (long)word - 2; + jpeg_buffer.data += 2; jpeg_buffer.curr = 0; @@ -1058,6 +1061,7 @@ static int get_word_be(struct s5p_jpeg_buffer *buf, unsigned int *word) if (byte == -1) return -1; *word = (unsigned int)byte | temp; + return 0; } @@ -1145,7 +1149,7 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result, if (get_word_be(&jpeg_buffer, &word)) break; length = (long)word - 2; - if (!length) + if (length <= 0) return false; sof = jpeg_buffer.curr; /* after 0xffc0 */ sof_len = length; @@ -1176,7 +1180,7 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result, if (get_word_be(&jpeg_buffer, &word)) break; length = (long)word - 2; - if (!length) + if (length <= 0) return false; if (n_dqt >= S5P_JPEG_MAX_MARKER) return false; @@ -1189,7 +1193,7 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result, if (get_word_be(&jpeg_buffer, &word)) break; length = (long)word - 2; - if (!length) + if (length <= 0) return false; if (n_dht >= S5P_JPEG_MAX_MARKER) return false; @@ -1214,6 +1218,7 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result, if (get_word_be(&jpeg_buffer, &word)) break; length = (long)word - 2; + /* No need to check underflows as skip() does it */ skip(&jpeg_buffer, length); break; } From fbefe31e4598cdb0889eee2e74c995b2212efb08 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 12:14:11 +0200 Subject: [PATCH 082/174] media: cx24116: prevent overflows on SNR calculus commit 576a307a7650bd544fbb24df801b9b7863b85e2f upstream. as reported by Coverity, if reading SNR registers fail, a negative number will be returned, causing an underflow when reading SNR registers. Prevent that. Fixes: 8953db793d5b ("V4L/DVB (9178): cx24116: Add module parameter to return SNR as ESNO.") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/cx24116.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/cx24116.c b/drivers/media/dvb-frontends/cx24116.c index 8b978a9f74a4..f5dd3a81725a 100644 --- a/drivers/media/dvb-frontends/cx24116.c +++ b/drivers/media/dvb-frontends/cx24116.c @@ -741,6 +741,7 @@ static int cx24116_read_snr_pct(struct dvb_frontend *fe, u16 *snr) { struct cx24116_state *state = fe->demodulator_priv; u8 snr_reading; + int ret; static const u32 snr_tab[] = { /* 10 x Table (rounded up) */ 0x00000, 0x0199A, 0x03333, 0x04ccD, 0x06667, 0x08000, 0x0999A, 0x0b333, 0x0cccD, 0x0e667, @@ -749,7 +750,11 @@ static int cx24116_read_snr_pct(struct dvb_frontend *fe, u16 *snr) dprintk("%s()\n", __func__); - snr_reading = cx24116_readreg(state, CX24116_REG_QUALITY0); + ret = cx24116_readreg(state, CX24116_REG_QUALITY0); + if (ret < 0) + return ret; + + snr_reading = ret; if (snr_reading >= 0xa0 /* 100% */) *snr = 0xffff; From 4b132a464f9fa23f3c1d28de9679a525bb10c99c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 16 Oct 2024 11:24:15 +0200 Subject: [PATCH 083/174] media: pulse8-cec: fix data timestamp at pulse8_setup() commit ba9cf6b430433e57bfc8072364e944b7c0eca2a4 upstream. As pointed by Coverity, there is a hidden overflow condition there. As date is signed and u8 is unsigned, doing: date = (data[0] << 24) With a value bigger than 07f will make all upper bits of date 0xffffffff. This can be demonstrated with this small code: typedef int64_t time64_t; typedef uint8_t u8; int main(void) { u8 data[] = { 0xde ,0xad , 0xbe, 0xef }; time64_t date; date = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; printf("Invalid data = 0x%08lx\n", date); date = ((unsigned)data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; printf("Expected data = 0x%08lx\n", date); return 0; } Fix it by converting the upper bit calculation to unsigned. Fixes: cea28e7a55e7 ("media: pulse8-cec: reorganize function order") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/cec/usb/pulse8/pulse8-cec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/cec/usb/pulse8/pulse8-cec.c b/drivers/media/cec/usb/pulse8/pulse8-cec.c index ba67587bd43e..171366fe3544 100644 --- a/drivers/media/cec/usb/pulse8/pulse8-cec.c +++ b/drivers/media/cec/usb/pulse8/pulse8-cec.c @@ -685,7 +685,7 @@ static int pulse8_setup(struct pulse8 *pulse8, struct serio *serio, err = pulse8_send_and_wait(pulse8, cmd, 1, cmd[0], 4); if (err) return err; - date = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + date = ((unsigned)data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; dev_info(pulse8->dev, "Firmware build date %ptT\n", &date); dev_dbg(pulse8->dev, "Persistent config:\n"); From c63c30c9d9f2c8de34b16cd2b8400240533b914e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 16 Oct 2024 11:53:15 +0200 Subject: [PATCH 084/174] media: v4l2-tpg: prevent the risk of a division by zero commit e6a3ea83fbe15d4818d01804e904cbb0e64e543b upstream. As reported by Coverity, the logic at tpg_precalculate_line() blindly rescales the buffer even when scaled_witdh is equal to zero. If this ever happens, this will cause a division by zero. Instead, add a WARN_ON_ONCE() to trigger such cases and return without doing any precalculation. Fixes: 63881df94d3e ("[media] vivid: add the Test Pattern Generator") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/common/v4l2-tpg/v4l2-tpg-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c index fe30f5b0050d..a635ce4cd643 100644 --- a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c +++ b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c @@ -1795,6 +1795,9 @@ static void tpg_precalculate_line(struct tpg_data *tpg) unsigned p; unsigned x; + if (WARN_ON_ONCE(!tpg->src_width || !tpg->scaled_width)) + return; + switch (tpg->pattern) { case TPG_PAT_GREEN: contrast = TPG_COLOR_100_RED; From f7503fd2d706996926b6976ba7fbfc9b7d917555 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 15 Oct 2024 14:23:38 +0200 Subject: [PATCH 085/174] media: v4l2-ctrls-api: fix error handling for v4l2_g_ctrl() commit 4c76f331a9a173ac8fe1297a9231c2a38f88e368 upstream. As detected by Coverity, the error check logic at get_ctrl() is broken: if ptr_to_user() fails to fill a control due to an error, no errors are returned and v4l2_g_ctrl() returns success on a failed operation, which may cause applications to fail. Add an error check at get_ctrl() and ensure that it will be returned to userspace without filling the control value if get_ctrl() fails. Fixes: 71c689dc2e73 ("media: v4l2-ctrls: split up into four source files") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-ctrls-api.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-ctrls-api.c b/drivers/media/v4l2-core/v4l2-ctrls-api.c index 002ea6588edf..64b0c3ef27f5 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls-api.c +++ b/drivers/media/v4l2-core/v4l2-ctrls-api.c @@ -753,9 +753,10 @@ static int get_ctrl(struct v4l2_ctrl *ctrl, struct v4l2_ext_control *c) for (i = 0; i < master->ncontrols; i++) cur_to_new(master->cluster[i]); ret = call_op(master, g_volatile_ctrl); - new_to_user(c, ctrl); + if (!ret) + ret = new_to_user(c, ctrl); } else { - cur_to_user(c, ctrl); + ret = cur_to_user(c, ctrl); } v4l2_ctrl_unlock(master); return ret; @@ -770,7 +771,10 @@ int v4l2_g_ctrl(struct v4l2_ctrl_handler *hdl, struct v4l2_control *control) if (!ctrl || !ctrl->is_int) return -EINVAL; ret = get_ctrl(ctrl, &c); - control->value = c.value; + + if (!ret) + control->value = c.value; + return ret; } EXPORT_SYMBOL(v4l2_g_ctrl); @@ -811,10 +815,11 @@ static int set_ctrl_lock(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, int ret; v4l2_ctrl_lock(ctrl); - user_to_new(c, ctrl); - ret = set_ctrl(fh, ctrl, 0); + ret = user_to_new(c, ctrl); if (!ret) - cur_to_user(c, ctrl); + ret = set_ctrl(fh, ctrl, 0); + if (!ret) + ret = cur_to_user(c, ctrl); v4l2_ctrl_unlock(ctrl); return ret; } From 4ee68cf5305d253ccfbdc4028ebf548153694df1 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 1 Oct 2024 16:56:22 +0200 Subject: [PATCH 086/174] can: mcp251xfd: mcp251xfd_get_tef_len(): fix length calculation commit 3c1c18551e6ac1b988d0a05c5650e3f6c95a1b8a upstream. Commit b8e0ddd36ce9 ("can: mcp251xfd: tef: prepare to workaround broken TEF FIFO tail index erratum") introduced mcp251xfd_get_tef_len() to get the number of unhandled transmit events from the Transmit Event FIFO (TEF). As the TEF has no head pointer, the driver uses the TX FIFO's tail pointer instead, assuming that send frames are completed. However the check for the TEF being full was not correct. This leads to the driver stop working if the TEF is full. Fix the TEF full check by assuming that if, from the driver's point of view, there are no free TX buffers in the chip and the TX FIFO is empty, all messages must have been sent and the TEF must therefore be full. Reported-by: Sven Schuchmann Closes: https://patch.msgid.link/FR3P281MB155216711EFF900AD9791B7ED9692@FR3P281MB1552.DEUP281.PROD.OUTLOOK.COM Fixes: b8e0ddd36ce9 ("can: mcp251xfd: tef: prepare to workaround broken TEF FIFO tail index erratum") Tested-by: Sven Schuchmann Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20241104-mcp251xfd-fix-length-calculation-v3-1-608b6e7e2197@pengutronix.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c index d4df5ccb60e3..493daaa0c7a3 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c @@ -16,9 +16,9 @@ #include "mcp251xfd.h" -static inline bool mcp251xfd_tx_fifo_sta_full(u32 fifo_sta) +static inline bool mcp251xfd_tx_fifo_sta_empty(u32 fifo_sta) { - return !(fifo_sta & MCP251XFD_REG_FIFOSTA_TFNRFNIF); + return fifo_sta & MCP251XFD_REG_FIFOSTA_TFERFFIF; } static inline int @@ -122,7 +122,11 @@ mcp251xfd_get_tef_len(struct mcp251xfd_priv *priv, u8 *len_p) if (err) return err; - if (mcp251xfd_tx_fifo_sta_full(fifo_sta)) { + /* If the chip says the TX-FIFO is empty, but there are no TX + * buffers free in the ring, we assume all have been sent. + */ + if (mcp251xfd_tx_fifo_sta_empty(fifo_sta) && + mcp251xfd_get_tx_free(tx_ring) == 0) { *len_p = tx_ring->obj_num; return 0; } From 49500cfd14c714cb051be61551e585e275dfd8bc Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 25 Oct 2024 14:34:40 +0200 Subject: [PATCH 087/174] can: mcp251xfd: mcp251xfd_ring_alloc(): fix coalescing configuration when switching CAN modes commit eb9a839b3d8a989be5970035a5cf29bcd6ffd24d upstream. Since commit 50ea5449c563 ("can: mcp251xfd: fix ring configuration when switching from CAN-CC to CAN-FD mode"), the current ring and coalescing configuration is passed to can_ram_get_layout(). That fixed the issue when switching between CAN-CC and CAN-FD mode with configured ring (rx, tx) and/or coalescing parameters (rx-frames-irq, tx-frames-irq). However 50ea5449c563 ("can: mcp251xfd: fix ring configuration when switching from CAN-CC to CAN-FD mode"), introduced a regression when switching CAN modes with disabled coalescing configuration: Even if the previous CAN mode has no coalescing configured, the new mode is configured with active coalescing. This leads to delayed receiving of CAN-FD frames. This comes from the fact, that ethtool uses usecs = 0 and max_frames = 1 to disable coalescing, however the driver uses internally priv->{rx,tx}_obj_num_coalesce_irq = 0 to indicate disabled coalescing. Fix the regression by assigning struct ethtool_coalesce ec->{rx,tx}_max_coalesced_frames_irq = 1 if coalescing is disabled in the driver as can_ram_get_layout() expects this. Reported-by: https://github.com/vdh-robothania Closes: https://github.com/raspberrypi/linux/issues/6407 Fixes: 50ea5449c563 ("can: mcp251xfd: fix ring configuration when switching from CAN-CC to CAN-FD mode") Cc: stable@vger.kernel.org Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241025-mcp251xfd-fix-coalesing-v1-1-9d11416de1df@pengutronix.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c index a894cb1fb9bf..29242dde814d 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c @@ -2,7 +2,7 @@ // // mcp251xfd - Microchip MCP251xFD Family CAN controller driver // -// Copyright (c) 2019, 2020, 2021 Pengutronix, +// Copyright (c) 2019, 2020, 2021, 2024 Pengutronix, // Marc Kleine-Budde // // Based on: @@ -473,9 +473,11 @@ int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv) }; const struct ethtool_coalesce ec = { .rx_coalesce_usecs_irq = priv->rx_coalesce_usecs_irq, - .rx_max_coalesced_frames_irq = priv->rx_obj_num_coalesce_irq, + .rx_max_coalesced_frames_irq = priv->rx_obj_num_coalesce_irq == 0 ? + 1 : priv->rx_obj_num_coalesce_irq, .tx_coalesce_usecs_irq = priv->tx_coalesce_usecs_irq, - .tx_max_coalesced_frames_irq = priv->tx_obj_num_coalesce_irq, + .tx_max_coalesced_frames_irq = priv->tx_obj_num_coalesce_irq == 0 ? + 1 : priv->tx_obj_num_coalesce_irq, }; struct can_ram_layout layout; From f56446ba5378d19e31040b548a14ee9a8f1500ea Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 2 Nov 2024 18:46:38 +0900 Subject: [PATCH 088/174] ksmbd: fix slab-use-after-free in ksmbd_smb2_session_create commit 0a77715db22611df50b178374c51e2ba0d58866e upstream. There is a race condition between ksmbd_smb2_session_create and ksmbd_expire_session. This patch add missing sessions_table_lock while adding/deleting session from global session table. Cc: stable@vger.kernel.org # v5.15+ Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/mgmt/user_session.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 844db95e6651..0cbcf8bf905f 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -174,6 +174,7 @@ static void ksmbd_expire_session(struct ksmbd_conn *conn) unsigned long id; struct ksmbd_session *sess; + down_write(&sessions_table_lock); down_write(&conn->session_lock); xa_for_each(&conn->sessions, id, sess) { if (atomic_read(&sess->refcnt) == 0 && @@ -187,6 +188,7 @@ static void ksmbd_expire_session(struct ksmbd_conn *conn) } } up_write(&conn->session_lock); + up_write(&sessions_table_lock); } int ksmbd_session_register(struct ksmbd_conn *conn, @@ -228,7 +230,6 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn) } } } - up_write(&sessions_table_lock); down_write(&conn->session_lock); xa_for_each(&conn->sessions, id, sess) { @@ -248,6 +249,7 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn) } } up_write(&conn->session_lock); + up_write(&sessions_table_lock); } struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn, From d8664ce789bd46290c59a00da6897252f92c237d Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 28 Oct 2024 08:28:30 +0900 Subject: [PATCH 089/174] ksmbd: Fix the missing xa_store error check commit 3abab905b14f4ba756d413f37f1fb02b708eee93 upstream. xa_store() can fail, it return xa_err(-EINVAL) if the entry cannot be stored in an XArray, or xa_err(-ENOMEM) if memory allocation failed, so check error for xa_store() to fix it. Cc: stable@vger.kernel.org Fixes: b685757c7b08 ("ksmbd: Implements sess->rpc_handle_list as xarray") Signed-off-by: Jinjie Ruan Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/mgmt/user_session.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 0cbcf8bf905f..710ac7b976fa 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -90,7 +90,7 @@ static int __rpc_method(char *rpc_name) int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name) { - struct ksmbd_session_rpc *entry; + struct ksmbd_session_rpc *entry, *old; struct ksmbd_rpc_command *resp; int method; @@ -106,16 +106,19 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name) entry->id = ksmbd_ipc_id_alloc(); if (entry->id < 0) goto free_entry; - xa_store(&sess->rpc_handle_list, entry->id, entry, GFP_KERNEL); + old = xa_store(&sess->rpc_handle_list, entry->id, entry, GFP_KERNEL); + if (xa_is_err(old)) + goto free_id; resp = ksmbd_rpc_open(sess, entry->id); if (!resp) - goto free_id; + goto erase_xa; kvfree(resp); return entry->id; -free_id: +erase_xa: xa_erase(&sess->rpc_handle_list, entry->id); +free_id: ksmbd_rpc_id_free(entry->id); free_entry: kfree(entry); From f7557bbca40d4ca8bb1c6c940ac6c95078bd0827 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 4 Nov 2024 13:40:41 +0900 Subject: [PATCH 090/174] ksmbd: fix slab-use-after-free in smb3_preauth_hash_rsp commit b8fc56fbca7482c1e5c0e3351c6ae78982e25ada upstream. ksmbd_user_session_put should be called under smb3_preauth_hash_rsp(). It will avoid freeing session before calling smb3_preauth_hash_rsp(). Cc: stable@vger.kernel.org # v5.15+ Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/server.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index 09ebcf39d5bc..da5b9678ad05 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -238,11 +238,11 @@ static void __handle_ksmbd_work(struct ksmbd_work *work, } while (is_chained == true); send: - if (work->sess) - ksmbd_user_session_put(work->sess); if (work->tcon) ksmbd_tree_connect_put(work->tcon); smb3_preauth_hash_rsp(work); + if (work->sess) + ksmbd_user_session_put(work->sess); if (work->sess && work->sess->enc && work->encrypted && conn->ops->encrypt_resp) { rc = conn->ops->encrypt_resp(work); From 284e213f0ef1a49ae316e52e24a1ab53967b0c66 Mon Sep 17 00:00:00 2001 From: Erik Schumacher Date: Fri, 25 Oct 2024 08:37:00 +0000 Subject: [PATCH 091/174] pwm: imx-tpm: Use correct MODULO value for EPWM mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cc6a931d1f3b412263d515fd93b21fc0ca5147fe upstream. The modulo register defines the period of the edge-aligned PWM mode (which is the only mode implemented). The reference manual states: "The EPWM period is determined by (MOD + 0001h) ..." So the value that is written to the MOD register must therefore be one less than the calculated period length. Return -EINVAL if the calculated length is already zero. A correct MODULO value is particularly relevant if the PWM has to output a high frequency due to a low period value. Fixes: 738a1cfec2ed ("pwm: Add i.MX TPM PWM driver support") Cc: stable@vger.kernel.org Signed-off-by: Erik Schumacher Link: https://lore.kernel.org/r/1a3890966d68b9f800d457cbf095746627495e18.camel@iris-sensing.com Signed-off-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- drivers/pwm/pwm-imx-tpm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-imx-tpm.c b/drivers/pwm/pwm-imx-tpm.c index 318dc0be974b..48eb32d9aae9 100644 --- a/drivers/pwm/pwm-imx-tpm.c +++ b/drivers/pwm/pwm-imx-tpm.c @@ -106,7 +106,9 @@ static int pwm_imx_tpm_round_state(struct pwm_chip *chip, p->prescale = prescale; period_count = (clock_unit + ((1 << prescale) >> 1)) >> prescale; - p->mod = period_count; + if (period_count == 0) + return -EINVAL; + p->mod = period_count - 1; /* calculate real period HW can support */ tmp = (u64)period_count << prescale; From 91139f33b07fc66b96fd529ff64a4014a9a935e5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Oct 2024 16:39:36 -0400 Subject: [PATCH 092/174] drm/amdgpu: Adjust debugfs eviction and IB access permissions commit f790a2c494c4ef587eeeb9fca20124de76a1646f upstream. Users should not be able to run these. Reviewed-by: Yang Wang Signed-off-by: Alex Deucher (cherry picked from commit 7ba9395430f611cfc101b1c2687732baafa239d5) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 06ab6066da61..3cca3f07f34d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2009,11 +2009,11 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_securedisplay_debugfs_init(adev); amdgpu_fw_attestation_debugfs_init(adev); - debugfs_create_file("amdgpu_evict_vram", 0444, root, adev, + debugfs_create_file("amdgpu_evict_vram", 0400, root, adev, &amdgpu_evict_vram_fops); - debugfs_create_file("amdgpu_evict_gtt", 0444, root, adev, + debugfs_create_file("amdgpu_evict_gtt", 0400, root, adev, &amdgpu_evict_gtt_fops); - debugfs_create_file("amdgpu_test_ib", 0444, root, adev, + debugfs_create_file("amdgpu_test_ib", 0400, root, adev, &amdgpu_debugfs_test_ib_fops); debugfs_create_file("amdgpu_vm_info", 0444, root, adev, &amdgpu_debugfs_vm_info_fops); From 25d7e84343e1235b667cf5226c3934fdf36f0df6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Oct 2024 16:52:08 -0400 Subject: [PATCH 093/174] drm/amdgpu: add missing size check in amdgpu_debugfs_gprwave_read() commit 4d75b9468021c73108b4439794d69e892b1d24e3 upstream. Avoid a possible buffer overflow if size is larger than 4K. Reviewed-by: Yang Wang Signed-off-by: Alex Deucher (cherry picked from commit f5d873f5825b40d886d03bd2aede91d4cf002434) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 3cca3f07f34d..a8dd63f270c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -419,7 +419,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user ssize_t result = 0; int r; - if (size & 0x3 || *pos & 0x3) + if (size > 4096 || size & 0x3 || *pos & 0x3) return -EINVAL; r = pm_runtime_get_sync(adev_to_drm(adev)->dev); From 1a9f55ed5b512f510ccd21ad527d532e60550e80 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 31 Oct 2024 16:28:48 +0100 Subject: [PATCH 094/174] drm/amdgpu: prevent NULL pointer dereference if ATIF is not supported commit a6dd15981c03f2cdc9a351a278f09b5479d53d2e upstream. acpi_evaluate_object() may return AE_NOT_FOUND (failure), which would result in dereferencing buffer.pointer (obj) while being NULL. Although this case may be unrealistic for the current code, it is still better to protect against possible bugs. Bail out also when status is AE_NOT_FOUND. This fixes 1 FORWARD_NULL issue reported by Coverity Report: CID 1600951: Null pointer dereferences (FORWARD_NULL) Signed-off-by: Antonio Quartulli Fixes: c9b7c809b89f ("drm/amd: Guard against bad data for ATIF ACPI method") Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20241031152848.4716-1-antonio@mandelbit.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 91c9e221fe2553edf2db71627d8453f083de87a1) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index d967fba2e85e..7ad7ebaaa93c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -132,8 +132,8 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif, &buffer); obj = (union acpi_object *)buffer.pointer; - /* Fail if calling the method fails and ATIF is supported */ - if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { + /* Fail if calling the method fails */ + if (ACPI_FAILURE(status)) { DRM_DEBUG_DRIVER("failed to evaluate ATIF got %s\n", acpi_format_exception(status)); kfree(obj); From 66ada34425a3d7c420d632c938242c3c3b061da6 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 11 Oct 2024 08:48:39 +0300 Subject: [PATCH 095/174] thermal/drivers/qcom/lmh: Remove false lockdep backtrace commit f16beaaee248eaa37ad40b5905924fcf70ae02e3 upstream. Annotate LMH IRQs with lockdep classes so that the lockdep doesn't report possible recursive locking issue between LMH and GIC interrupts. For the reference: CPU0 ---- lock(&irq_desc_lock_class); lock(&irq_desc_lock_class); *** DEADLOCK *** Call trace: dump_backtrace+0x98/0xf0 show_stack+0x18/0x24 dump_stack_lvl+0x90/0xd0 dump_stack+0x18/0x24 print_deadlock_bug+0x258/0x348 __lock_acquire+0x1078/0x1f44 lock_acquire+0x1fc/0x32c _raw_spin_lock_irqsave+0x60/0x88 __irq_get_desc_lock+0x58/0x98 enable_irq+0x38/0xa0 lmh_enable_interrupt+0x2c/0x38 irq_enable+0x40/0x8c __irq_startup+0x78/0xa4 irq_startup+0x78/0x168 __enable_irq+0x70/0x7c enable_irq+0x4c/0xa0 qcom_cpufreq_ready+0x20/0x2c cpufreq_online+0x2a8/0x988 cpufreq_add_dev+0x80/0x98 subsys_interface_register+0x104/0x134 cpufreq_register_driver+0x150/0x234 qcom_cpufreq_hw_driver_probe+0x2a8/0x388 platform_probe+0x68/0xc0 really_probe+0xbc/0x298 __driver_probe_device+0x78/0x12c driver_probe_device+0x3c/0x160 __device_attach_driver+0xb8/0x138 bus_for_each_drv+0x84/0xe0 __device_attach+0x9c/0x188 device_initial_probe+0x14/0x20 bus_probe_device+0xac/0xb0 deferred_probe_work_func+0x8c/0xc8 process_one_work+0x20c/0x62c worker_thread+0x1bc/0x36c kthread+0x120/0x124 ret_from_fork+0x10/0x20 Fixes: 53bca371cdf7 ("thermal/drivers/qcom: Add support for LMh driver") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20241011-lmh-lockdep-v1-1-495cbbe6fef1@linaro.org Signed-off-by: Daniel Lezcano Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/qcom/lmh.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c index 97cf0dc3a6c3..1434ab8f6988 100644 --- a/drivers/thermal/qcom/lmh.c +++ b/drivers/thermal/qcom/lmh.c @@ -73,7 +73,14 @@ static struct irq_chip lmh_irq_chip = { static int lmh_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { struct lmh_hw_data *lmh_data = d->host_data; + static struct lock_class_key lmh_lock_key; + static struct lock_class_key lmh_request_key; + /* + * This lock class tells lockdep that GPIO irqs are in a different + * category than their parents, so it won't report false recursion. + */ + irq_set_lockdep_class(irq, &lmh_lock_key, &lmh_request_key); irq_set_chip_and_handler(irq, &lmh_irq_chip, handle_simple_irq); irq_set_chip_data(irq, lmh_data); From ffaf0f6eab4617d6b0e3eb55208f679bf378f275 Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:12:22 +0800 Subject: [PATCH 096/174] dm cache: correct the number of origin blocks to match the target length commit 235d2e739fcbe964c9ce179b4c991025662dcdb6 upstream. When creating a cache device, the actual size of the cache origin might be greater than the specified cache target length. In such case, the number of origin blocks should match the cache target length, not the full size of the origin device, since access beyond the cache target is not possible. This issue occurs when reducing the origin device size using lvm, as lvreduce preloads the new cache table before resuming the cache origin, which can result in incorrect sizes for the discard bitset and smq hotspot blocks. Reproduce steps: 1. create a cache device consists of 4096 origin blocks dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 65536 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dd if=/dev/zero of=/dev/mapper/cmeta bs=4k count=1 oflag=direct dmsetup create cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" 2. reduce the cache origin to 2048 oblocks, in lvreduce's approach dmsetup reload corig --table "0 262144 linear /dev/sdc 262144" dmsetup reload cache --table "0 262144 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" dmsetup suspend cache dmsetup suspend corig dmsetup suspend cdata dmsetup suspend cmeta dmsetup resume corig dmsetup resume cdata dmsetup resume cmeta dmsetup resume cache 3. shutdown the cache, and check the number of discard blocks in superblock. The value is expected to be 2048, but actually is 4096. dmsetup remove cache corig cdata cmeta dd if=/dev/sdc bs=1c count=8 skip=224 2>/dev/null | hexdump -e '1/8 "%u\n"' Fix by correcting the origin_blocks initialization in cache_create and removing the unused origin_sectors from struct cache_args accordingly. Signed-off-by: Ming-Hung Tsai Fixes: c6b4fcbad044 ("dm: add cache target") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 8f7426b71e02..ebfbbd1ec9a3 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1980,7 +1980,6 @@ struct cache_args { sector_t cache_sectors; struct dm_dev *origin_dev; - sector_t origin_sectors; uint32_t block_size; @@ -2061,6 +2060,7 @@ static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as, static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, char **error) { + sector_t origin_sectors; int r; if (!at_least_one_arg(as, error)) @@ -2073,8 +2073,8 @@ static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, return r; } - ca->origin_sectors = get_dev_size(ca->origin_dev); - if (ca->ti->len > ca->origin_sectors) { + origin_sectors = get_dev_size(ca->origin_dev); + if (ca->ti->len > origin_sectors) { *error = "Device size larger than cached device"; return -EINVAL; } @@ -2384,7 +2384,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; - origin_blocks = cache->origin_sectors = ca->origin_sectors; + origin_blocks = cache->origin_sectors = ti->len; origin_blocks = block_div(origin_blocks, ca->block_size); cache->origin_blocks = to_oblock(origin_blocks); From 5a754d3c771280f2d06bf8ab716d6a0d36ca256e Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:12:49 +0800 Subject: [PATCH 097/174] dm cache: fix flushing uninitialized delayed_work on cache_ctr error commit 135496c208ba26fd68cdef10b64ed7a91ac9a7ff upstream. An unexpected WARN_ON from flush_work() may occur when cache creation fails, caused by destroying the uninitialized delayed_work waker in the error path of cache_create(). For example, the warning appears on the superblock checksum error. Reproduce steps: dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 65536 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dd if=/dev/urandom of=/dev/mapper/cmeta bs=4k count=1 oflag=direct dmsetup create cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" Kernel logs: (snip) WARNING: CPU: 0 PID: 84 at kernel/workqueue.c:4178 __flush_work+0x5d4/0x890 Fix by pulling out the cancel_delayed_work_sync() from the constructor's error path. This patch doesn't affect the use-after-free fix for concurrent dm_resume and dm_destroy (commit 6a459d8edbdb ("dm cache: Fix UAF in destroy()")) as cache_dtr is not changed. Signed-off-by: Ming-Hung Tsai Fixes: 6a459d8edbdb ("dm cache: Fix UAF in destroy()") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index ebfbbd1ec9a3..c774ede6f925 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1882,16 +1882,13 @@ static void check_migrations(struct work_struct *ws) * This function gets called on the error paths of the constructor, so we * have to cope with a partially initialised struct. */ -static void destroy(struct cache *cache) +static void __destroy(struct cache *cache) { - unsigned int i; - mempool_exit(&cache->migration_pool); if (cache->prison) dm_bio_prison_destroy_v2(cache->prison); - cancel_delayed_work_sync(&cache->waker); if (cache->wq) destroy_workqueue(cache->wq); @@ -1919,13 +1916,22 @@ static void destroy(struct cache *cache) if (cache->policy) dm_cache_policy_destroy(cache->policy); + bioset_exit(&cache->bs); + + kfree(cache); +} + +static void destroy(struct cache *cache) +{ + unsigned int i; + + cancel_delayed_work_sync(&cache->waker); + for (i = 0; i < cache->nr_ctr_args ; i++) kfree(cache->ctr_args[i]); kfree(cache->ctr_args); - bioset_exit(&cache->bs); - - kfree(cache); + __destroy(cache); } static void cache_dtr(struct dm_target *ti) @@ -2538,7 +2544,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) *result = cache; return 0; bad: - destroy(cache); + __destroy(cache); return r; } @@ -2589,7 +2595,7 @@ static int cache_ctr(struct dm_target *ti, unsigned int argc, char **argv) r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); if (r) { - destroy(cache); + __destroy(cache); goto out; } From 56507203e1b6127967ec2b51fb0b23a0d4af1334 Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:13:16 +0800 Subject: [PATCH 098/174] dm cache: fix out-of-bounds access to the dirty bitset when resizing commit 792227719725497ce10a8039803bec13f89f8910 upstream. dm-cache checks the dirty bits of the cache blocks to be dropped when shrinking the fast device, but an index bug in bitset iteration causes out-of-bounds access. Reproduce steps: 1. create a cache device of 1024 cache blocks (128 bytes dirty bitset) dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 131072 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dd if=/dev/zero of=/dev/mapper/cmeta bs=4k count=1 oflag=direct dmsetup create cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" 2. shrink the fast device to 512 cache blocks, triggering out-of-bounds access to the dirty bitset (offset 0x80) dmsetup suspend cache dmsetup reload cdata --table "0 65536 linear /dev/sdc 8192" dmsetup resume cdata dmsetup resume cache KASAN reports: BUG: KASAN: vmalloc-out-of-bounds in cache_preresume+0x269/0x7b0 Read of size 8 at addr ffffc900000f3080 by task dmsetup/131 (...snip...) The buggy address belongs to the virtual mapping at [ffffc900000f3000, ffffc900000f5000) created by: cache_ctr+0x176a/0x35f0 (...snip...) Memory state around the buggy address: ffffc900000f2f80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffffc900000f3000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffffc900000f3080: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ^ ffffc900000f3100: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffffc900000f3180: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 Fix by making the index post-incremented. Signed-off-by: Ming-Hung Tsai Fixes: f494a9c6b1b6 ("dm cache: cache shrinking support") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index c774ede6f925..59908dbd4c18 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2889,13 +2889,13 @@ static bool can_resize(struct cache *cache, dm_cblock_t new_size) * We can't drop a dirty block when shrinking the cache. */ while (from_cblock(new_size) < from_cblock(cache->cache_size)) { - new_size = to_cblock(from_cblock(new_size) + 1); if (is_dirty(cache, new_size)) { DMERR("%s: unable to shrink cache; cache block %llu is dirty", cache_device_name(cache), (unsigned long long) from_cblock(new_size)); return false; } + new_size = to_cblock(from_cblock(new_size) + 1); } return true; From 011450c2f96ca4f6f642778d5d67ca42b370e7e2 Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:13:39 +0800 Subject: [PATCH 099/174] dm cache: optimize dirty bit checking with find_next_bit when resizing commit f484697e619a83ecc370443a34746379ad99d204 upstream. When shrinking the fast device, dm-cache iteratively searches for a dirty bit among the cache blocks to be dropped, which is less efficient. Use find_next_bit instead, as it is twice as fast as the iterative approach with test_bit. Signed-off-by: Ming-Hung Tsai Fixes: f494a9c6b1b6 ("dm cache: cache shrinking support") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 59908dbd4c18..f852e27231f0 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2888,14 +2888,14 @@ static bool can_resize(struct cache *cache, dm_cblock_t new_size) /* * We can't drop a dirty block when shrinking the cache. */ - while (from_cblock(new_size) < from_cblock(cache->cache_size)) { - if (is_dirty(cache, new_size)) { - DMERR("%s: unable to shrink cache; cache block %llu is dirty", - cache_device_name(cache), - (unsigned long long) from_cblock(new_size)); - return false; - } - new_size = to_cblock(from_cblock(new_size) + 1); + new_size = to_cblock(find_next_bit(cache->dirty_bitset, + from_cblock(cache->cache_size), + from_cblock(new_size))); + if (new_size != cache->cache_size) { + DMERR("%s: unable to shrink cache; cache block %llu is dirty", + cache_device_name(cache), + (unsigned long long) from_cblock(new_size)); + return false; } return true; From c52ec00cb2f9bebfada22edcc0db385b910a1cdb Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:13:54 +0800 Subject: [PATCH 100/174] dm cache: fix potential out-of-bounds access on the first resume commit c0ade5d98979585d4f5a93e4514c2e9a65afa08d upstream. Out-of-bounds access occurs if the fast device is expanded unexpectedly before the first-time resume of the cache table. This happens because expanding the fast device requires reloading the cache table for cache_create to allocate new in-core data structures that fit the new size, and the check in cache_preresume is not performed during the first resume, leading to the issue. Reproduce steps: 1. prepare component devices: dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 65536 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dd if=/dev/zero of=/dev/mapper/cmeta bs=4k count=1 oflag=direct 2. load a cache table of 512 cache blocks, and deliberately expand the fast device before resuming the cache, making the in-core data structures inadequate. dmsetup create cache --notable dmsetup reload cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" dmsetup reload cdata --table "0 131072 linear /dev/sdc 8192" dmsetup resume cdata dmsetup resume cache 3. suspend the cache to write out the in-core dirty bitset and hint array, leading to out-of-bounds access to the dirty bitset at offset 0x40: dmsetup suspend cache KASAN reports: BUG: KASAN: vmalloc-out-of-bounds in is_dirty_callback+0x2b/0x80 Read of size 8 at addr ffffc90000085040 by task dmsetup/90 (...snip...) The buggy address belongs to the virtual mapping at [ffffc90000085000, ffffc90000087000) created by: cache_ctr+0x176a/0x35f0 (...snip...) Memory state around the buggy address: ffffc90000084f00: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffffc90000084f80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 >ffffc90000085000: 00 00 00 00 00 00 00 00 f8 f8 f8 f8 f8 f8 f8 f8 ^ ffffc90000085080: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffffc90000085100: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 Fix by checking the size change on the first resume. Signed-off-by: Ming-Hung Tsai Fixes: f494a9c6b1b6 ("dm cache: cache shrinking support") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 37 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index f852e27231f0..e714114d495a 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2878,24 +2878,24 @@ static dm_cblock_t get_cache_dev_size(struct cache *cache) static bool can_resize(struct cache *cache, dm_cblock_t new_size) { if (from_cblock(new_size) > from_cblock(cache->cache_size)) { - if (cache->sized) { - DMERR("%s: unable to extend cache due to missing cache table reload", - cache_device_name(cache)); - return false; - } + DMERR("%s: unable to extend cache due to missing cache table reload", + cache_device_name(cache)); + return false; } /* * We can't drop a dirty block when shrinking the cache. */ - new_size = to_cblock(find_next_bit(cache->dirty_bitset, - from_cblock(cache->cache_size), - from_cblock(new_size))); - if (new_size != cache->cache_size) { - DMERR("%s: unable to shrink cache; cache block %llu is dirty", - cache_device_name(cache), - (unsigned long long) from_cblock(new_size)); - return false; + if (cache->loaded_mappings) { + new_size = to_cblock(find_next_bit(cache->dirty_bitset, + from_cblock(cache->cache_size), + from_cblock(new_size))); + if (new_size != cache->cache_size) { + DMERR("%s: unable to shrink cache; cache block %llu is dirty", + cache_device_name(cache), + (unsigned long long) from_cblock(new_size)); + return false; + } } return true; @@ -2926,20 +2926,15 @@ static int cache_preresume(struct dm_target *ti) /* * Check to see if the cache has resized. */ - if (!cache->sized) { - r = resize_cache_dev(cache, csize); - if (r) - return r; - - cache->sized = true; - - } else if (csize != cache->cache_size) { + if (!cache->sized || csize != cache->cache_size) { if (!can_resize(cache, csize)) return -EINVAL; r = resize_cache_dev(cache, csize); if (r) return r; + + cache->sized = true; } if (!cache->loaded_mappings) { From 80342c5876cd55dd1b755d62d728c2267eca948b Mon Sep 17 00:00:00 2001 From: Zichen Xie Date: Mon, 21 Oct 2024 14:54:45 -0500 Subject: [PATCH 101/174] dm-unstriped: cast an operand to sector_t to prevent potential uint32_t overflow commit 5a4510c762fc04c74cff264cd4d9e9f5bf364bae upstream. This was found by a static analyzer. There may be a potential integer overflow issue in unstripe_ctr(). uc->unstripe_offset and uc->unstripe_width are defined as "sector_t"(uint64_t), while uc->unstripe, uc->chunk_size and uc->stripes are all defined as "uint32_t". The result of the calculation will be limited to "uint32_t" without correct casting. So, we recommend adding an extra cast to prevent potential integer overflow. Fixes: 18a5bf270532 ("dm: add unstriped target") Signed-off-by: Zichen Xie Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-unstripe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c index fdc8921e5c19..e69d297b9122 100644 --- a/drivers/md/dm-unstripe.c +++ b/drivers/md/dm-unstripe.c @@ -84,8 +84,8 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) } uc->physical_start = start; - uc->unstripe_offset = uc->unstripe * uc->chunk_size; - uc->unstripe_width = (uc->stripes - 1) * uc->chunk_size; + uc->unstripe_offset = (sector_t)uc->unstripe * uc->chunk_size; + uc->unstripe_width = (sector_t)(uc->stripes - 1) * uc->chunk_size; uc->chunk_shift = is_power_of_2(uc->chunk_size) ? fls(uc->chunk_size) - 1 : 0; tmp_len = ti->len; From 4fee0ad11a1be8824779148ccd4bb2d1c6e58afc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 5 Nov 2024 13:02:17 +0100 Subject: [PATCH 102/174] ALSA: usb-audio: Add quirk for HP 320 FHD Webcam commit dabc44c28f118910dea96244d903f0c270225669 upstream. HP 320 FHD Webcam (03f0:654a) seems to have flaky firmware like other webcam devices that don't like the frequency inquiries. Also, Mic Capture Volume has an invalid resolution, hence fix it to be 16 (as a blind shot). Link: https://bugzilla.suse.com/show_bug.cgi?id=1232768 Cc: Link: https://patch.msgid.link/20241105120220.5740-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 1 + sound/usb/quirks.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 49facdf35b15..102a9b3ba3be 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1205,6 +1205,7 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, } break; case USB_ID(0x1bcf, 0x2283): /* NexiGo N930AF FHD Webcam */ + case USB_ID(0x03f0, 0x654a): /* HP 320 FHD Webcam */ if (!strcmp(kctl->id.name, "Mic Capture Volume")) { usb_audio_info(chip, "set resolution quirk: cval->res = 16\n"); diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 86b8c6fa9a5d..e96f5361e762 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2014,6 +2014,8 @@ struct usb_audio_quirk_flags_table { static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { /* Device matches */ + DEVICE_FLG(0x03f0, 0x654a, /* HP 320 FHD Webcam */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x041e, 0x3000, /* Creative SB Extigy */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x041e, 0x4080, /* Creative Live Cam VF0610 */ From 1ced986a37988b0080508617e468ae0739000ffc Mon Sep 17 00:00:00 2001 From: Christoffer Sandberg Date: Tue, 29 Oct 2024 16:16:52 +0100 Subject: [PATCH 103/174] ALSA: hda/realtek: Fix headset mic on TUXEDO Gemini 17 Gen3 commit 0b04fbe886b4274c8e5855011233aaa69fec6e75 upstream. Quirk is needed to enable headset microphone on missing pin 0x19. Signed-off-by: Christoffer Sandberg Signed-off-by: Werner Sembach Cc: Link: https://patch.msgid.link/20241029151653.80726-1-wse@tuxedocomputers.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d750c6e6eb98..1b5527cb59a9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10015,6 +10015,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x1403, "Clevo N140CU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x1404, "Clevo N150CU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x14a1, "Clevo L141MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x28c1, "Clevo V370VND", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1558, 0x4018, "Clevo NV40M[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x4019, "Clevo NV40MZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x4020, "Clevo NV40MB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), From d3bcf4069df3434c810ea3888c7b26a3775346d3 Mon Sep 17 00:00:00 2001 From: Benjamin Segall Date: Fri, 25 Oct 2024 18:35:35 -0700 Subject: [PATCH 104/174] posix-cpu-timers: Clear TICK_DEP_BIT_POSIX_TIMER on clone [ Upstream commit b5413156bad91dc2995a5c4eab1b05e56914638a ] When cloning a new thread, its posix_cputimers are not inherited, and are cleared by posix_cputimers_init(). However, this does not clear the tick dependency it creates in tsk->tick_dep_mask, and the handler does not reach the code to clear the dependency if there were no timers to begin with. Thus if a thread has a cputimer running before clone/fork, all descendants will prevent nohz_full unless they create a cputimer of their own. Fix this by entirely clearing the tick_dep_mask in copy_process(). (There is currently no inherited state that needs a tick dependency) Process-wide timers do not have this problem because fork does not copy signal_struct as a baseline, it creates one from scratch. Fixes: b78783000d5c ("posix-cpu-timers: Migrate to use new tick dependency mask model") Signed-off-by: Ben Segall Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/xm26o737bq8o.fsf@google.com Signed-off-by: Sasha Levin --- include/linux/tick.h | 8 ++++++++ kernel/fork.c | 2 ++ 2 files changed, 10 insertions(+) diff --git a/include/linux/tick.h b/include/linux/tick.h index 9459fef5b857..9701c571a5cf 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -252,12 +252,19 @@ static inline void tick_dep_set_task(struct task_struct *tsk, if (tick_nohz_full_enabled()) tick_nohz_dep_set_task(tsk, bit); } + static inline void tick_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit) { if (tick_nohz_full_enabled()) tick_nohz_dep_clear_task(tsk, bit); } + +static inline void tick_dep_init_task(struct task_struct *tsk) +{ + atomic_set(&tsk->tick_dep_mask, 0); +} + static inline void tick_dep_set_signal(struct task_struct *tsk, enum tick_dep_bits bit) { @@ -291,6 +298,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit) { } static inline void tick_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit) { } +static inline void tick_dep_init_task(struct task_struct *tsk) { } static inline void tick_dep_set_signal(struct task_struct *tsk, enum tick_dep_bits bit) { } static inline void tick_dep_clear_signal(struct signal_struct *signal, diff --git a/kernel/fork.c b/kernel/fork.c index 8dd46baee4c3..09a935724bd9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -97,6 +97,7 @@ #include #include #include +#include #include #include @@ -2183,6 +2184,7 @@ static __latent_entropy struct task_struct *copy_process( acct_clear_integrals(p); posix_cputimers_init(&p->posix_cputimers); + tick_dep_init_task(p); p->io_context = NULL; audit_set_context(p, NULL); From f6b2b2b981af8e7d7c62d34143acefa4e1edfe8b Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Fri, 25 Oct 2024 16:03:27 +0200 Subject: [PATCH 105/174] nfs: Fix KMSAN warning in decode_getfattr_attrs() commit dc270d7159699ad6d11decadfce9633f0f71c1db upstream. Fix the following KMSAN warning: CPU: 1 UID: 0 PID: 7651 Comm: cp Tainted: G B Tainted: [B]=BAD_PAGE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009) ===================================================== ===================================================== BUG: KMSAN: uninit-value in decode_getfattr_attrs+0x2d6d/0x2f90 decode_getfattr_attrs+0x2d6d/0x2f90 decode_getfattr_generic+0x806/0xb00 nfs4_xdr_dec_getattr+0x1de/0x240 rpcauth_unwrap_resp_decode+0xab/0x100 rpcauth_unwrap_resp+0x95/0xc0 call_decode+0x4ff/0xb50 __rpc_execute+0x57b/0x19d0 rpc_execute+0x368/0x5e0 rpc_run_task+0xcfe/0xee0 nfs4_proc_getattr+0x5b5/0x990 __nfs_revalidate_inode+0x477/0xd00 nfs_access_get_cached+0x1021/0x1cc0 nfs_do_access+0x9f/0xae0 nfs_permission+0x1e4/0x8c0 inode_permission+0x356/0x6c0 link_path_walk+0x958/0x1330 path_lookupat+0xce/0x6b0 filename_lookup+0x23e/0x770 vfs_statx+0xe7/0x970 vfs_fstatat+0x1f2/0x2c0 __se_sys_newfstatat+0x67/0x880 __x64_sys_newfstatat+0xbd/0x120 x64_sys_call+0x1826/0x3cf0 do_syscall_64+0xd0/0x1b0 entry_SYSCALL_64_after_hwframe+0x77/0x7f The KMSAN warning is triggered in decode_getfattr_attrs(), when calling decode_attr_mdsthreshold(). It appears that fattr->mdsthreshold is not initialized. Fix the issue by initializing fattr->mdsthreshold to NULL in nfs_fattr_init(). Cc: stable@vger.kernel.org # v3.5.x Fixes: 88034c3d88c2 ("NFSv4.1 mdsthreshold attribute xdr") Signed-off-by: Roberto Sassu Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 719448d81aed..964df0725f4c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1566,6 +1566,7 @@ void nfs_fattr_init(struct nfs_fattr *fattr) fattr->gencount = nfs_inc_attr_generation_counter(); fattr->owner_name = NULL; fattr->group_name = NULL; + fattr->mdsthreshold = NULL; } EXPORT_SYMBOL_GPL(nfs_fattr_init); From 424c4acb3376f21d1c590304ab4695b5554746f4 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Fri, 1 Nov 2024 10:53:16 +0800 Subject: [PATCH 106/174] net: wwan: t7xx: Fix off-by-one error in t7xx_dpmaif_rx_buf_alloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3b557be89fc688dbd9ccf704a70f7600a094f13a upstream. The error path in t7xx_dpmaif_rx_buf_alloc(), free and unmap the already allocated and mapped skb in a loop, but the loop condition terminates when the index reaches zero, which fails to free the first allocated skb at index zero. Check with i-- so that skb at index 0 is freed as well. Cc: stable@vger.kernel.org Fixes: d642b012df70 ("net: wwan: t7xx: Add data path interface") Acked-by: Sergey Ryazanov Signed-off-by: Jinjie Ruan Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/20241101025316.3234023-1-ruanjinjie@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c index 91a0eb19e0d8..f4f924d75103 100644 --- a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c +++ b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c @@ -262,7 +262,7 @@ int t7xx_dpmaif_rx_buf_alloc(struct dpmaif_ctrl *dpmaif_ctrl, return 0; err_unmap_skbs: - while (--i > 0) + while (i--) t7xx_unmap_bat_skb(dpmaif_ctrl->dev, bat_req->bat_skb, i); return ret; From 2cf0e77f5a0aa1ff336aa71743eda55c73902187 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Tue, 5 Nov 2024 17:31:01 +0100 Subject: [PATCH 107/174] net: vertexcom: mse102x: Fix possible double free of TX skb commit 1f26339b2ed63d1e8e18a18674fb73a392f3660e upstream. The scope of the TX skb is wider than just mse102x_tx_frame_spi(), so in case the TX skb room needs to be expanded, we should free the the temporary skb instead of the original skb. Otherwise the original TX skb pointer would be freed again in mse102x_tx_work(), which leads to crashes: Internal error: Oops: 0000000096000004 [#2] PREEMPT SMP CPU: 0 PID: 712 Comm: kworker/0:1 Tainted: G D 6.6.23 Hardware name: chargebyte Charge SOM DC-ONE (DT) Workqueue: events mse102x_tx_work [mse102x] pstate: 20400009 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : skb_release_data+0xb8/0x1d8 lr : skb_release_data+0x1ac/0x1d8 sp : ffff8000819a3cc0 x29: ffff8000819a3cc0 x28: ffff0000046daa60 x27: ffff0000057f2dc0 x26: ffff000005386c00 x25: 0000000000000002 x24: 00000000ffffffff x23: 0000000000000000 x22: 0000000000000001 x21: ffff0000057f2e50 x20: 0000000000000006 x19: 0000000000000000 x18: ffff00003fdacfcc x17: e69ad452d0c49def x16: 84a005feff870102 x15: 0000000000000000 x14: 000000000000024a x13: 0000000000000002 x12: 0000000000000000 x11: 0000000000000400 x10: 0000000000000930 x9 : ffff00003fd913e8 x8 : fffffc00001bc008 x7 : 0000000000000000 x6 : 0000000000000008 x5 : ffff00003fd91340 x4 : 0000000000000000 x3 : 0000000000000009 x2 : 00000000fffffffe x1 : 0000000000000000 x0 : 0000000000000000 Call trace: skb_release_data+0xb8/0x1d8 kfree_skb_reason+0x48/0xb0 mse102x_tx_work+0x164/0x35c [mse102x] process_one_work+0x138/0x260 worker_thread+0x32c/0x438 kthread+0x118/0x11c ret_from_fork+0x10/0x20 Code: aa1303e0 97fffab6 72001c1f 54000141 (f9400660) Cc: stable@vger.kernel.org Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Link: https://patch.msgid.link/20241105163101.33216-1-wahrenst@gmx.net Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/vertexcom/mse102x.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index aeed2a093e34..dd766e175f7d 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -222,7 +222,7 @@ static int mse102x_tx_frame_spi(struct mse102x_net *mse, struct sk_buff *txp, struct mse102x_net_spi *mses = to_mse102x_spi(mse); struct spi_transfer *xfer = &mses->spi_xfer; struct spi_message *msg = &mses->spi_msg; - struct sk_buff *tskb; + struct sk_buff *tskb = NULL; int ret; netif_dbg(mse, tx_queued, mse->ndev, "%s: skb %p, %d@%p\n", @@ -235,7 +235,6 @@ static int mse102x_tx_frame_spi(struct mse102x_net *mse, struct sk_buff *txp, if (!tskb) return -ENOMEM; - dev_kfree_skb(txp); txp = tskb; } @@ -257,6 +256,8 @@ static int mse102x_tx_frame_spi(struct mse102x_net *mse, struct sk_buff *txp, mse->stats.xfer_err++; } + dev_kfree_skb(tskb); + return ret; } From aa3e68bd4589256eb5a6e97c909afc6dc570c96b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 4 Nov 2024 13:31:42 +0100 Subject: [PATCH 108/174] mptcp: use sock_kfree_s instead of kfree commit 99635c91fb8b860a6404b9bc8b769df7bdaa2ae3 upstream. The local address entries on userspace_pm_local_addr_list are allocated by sock_kmalloc(). It's then required to use sock_kfree_s() instead of kfree() to free these entries in order to adjust the allocated size on the sk side. Fixes: 24430f8bf516 ("mptcp: add address into userspace pm list") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241104-net-mptcp-misc-6-12-v1-2-c13f2ff1656f@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_userspace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index f2b90053ecae..748e3876ec6d 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -89,6 +89,7 @@ static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *addr) { struct mptcp_pm_addr_entry *entry, *tmp; + struct sock *sk = (struct sock *)msk; list_for_each_entry_safe(entry, tmp, &msk->pm.userspace_pm_local_addr_list, list) { if (mptcp_addresses_equal(&entry->addr, &addr->addr, false)) { @@ -96,7 +97,7 @@ static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk, * be used multiple times (e.g. fullmesh mode). */ list_del_rcu(&entry->list); - kfree(entry); + sock_kfree_s(sk, entry, sizeof(*entry)); msk->pm.local_addr_used--; return 0; } From ba884534f1e97571db082da9878fdeefdcb2a01c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 6 Nov 2024 16:42:20 +0000 Subject: [PATCH 109/174] arm64: Kconfig: Make SME depend on BROKEN for now commit 81235ae0c846e1fb46a2c6fe9283fe2b2b24f7dc upstream. Although support for SME was merged in v5.19, we've since uncovered a number of issues with the implementation, including issues which might corrupt the FPSIMD/SVE/SME state of arbitrary tasks. While there are patches to address some of these issues, ongoing review has highlighted additional functional problems, and more time is necessary to analyse and fix these. For now, mark SME as BROKEN in the hope that we can fix things properly in the near future. As SME is an OPTIONAL part of ARMv9.2+, and there is very little extant hardware, this should not adversely affect the vast majority of users. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Cc: stable@vger.kernel.org # 5.19 Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20241106164220.2789279-1-mark.rutland@arm.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1a62ef142a98..57b437ed0974 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2113,6 +2113,7 @@ config ARM64_SME bool "ARM Scalable Matrix Extension support" default y depends on ARM64_SVE + depends on BROKEN help The Scalable Matrix Extension (SME) is an extension to the AArch64 execution state which utilises a substantial subset of the SVE From 2cb1a73d1d44a1c11b0ee5eeced765dd80ec48e6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 4 Nov 2024 12:11:15 +0000 Subject: [PATCH 110/174] btrfs: reinitialize delayed ref list after deleting it from the list commit c9a75ec45f1111ef530ab186c2a7684d0a0c9245 upstream. At insert_delayed_ref() if we need to update the action of an existing ref to BTRFS_DROP_DELAYED_REF, we delete the ref from its ref head's ref_add_list using list_del(), which leaves the ref's add_list member not reinitialized, as list_del() sets the next and prev members of the list to LIST_POISON1 and LIST_POISON2, respectively. If later we end up calling drop_delayed_ref() against the ref, which can happen during merging or when destroying delayed refs due to a transaction abort, we can trigger a crash since at drop_delayed_ref() we call list_empty() against the ref's add_list, which returns false since the list was not reinitialized after the list_del() and as a consequence we call list_del() again at drop_delayed_ref(). This results in an invalid list access since the next and prev members are set to poison pointers, resulting in a splat if CONFIG_LIST_HARDENED and CONFIG_DEBUG_LIST are set or invalid poison pointer dereferences otherwise. So fix this by deleting from the list with list_del_init() instead. Fixes: 1d57ee941692 ("btrfs: improve delayed refs iterations") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/delayed-ref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index e08e3852c478..99a78ca28db2 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -622,7 +622,7 @@ static int insert_delayed_ref(struct btrfs_trans_handle *trans, &href->ref_add_list); else if (ref->action == BTRFS_DROP_DELAYED_REF) { ASSERT(!list_empty(&exist->add_list)); - list_del(&exist->add_list); + list_del_init(&exist->add_list); } else { ASSERT(0); } From 10ffafb456f293976c42f700578ef740467cb569 Mon Sep 17 00:00:00 2001 From: Daniel Maslowski Date: Fri, 19 Jul 2024 19:04:37 +0200 Subject: [PATCH 111/174] riscv/purgatory: align riscv_kernel_entry commit fb197c5d2fd24b9af3d4697d0cf778645846d6d5 upstream. When alignment handling is delegated to the kernel, everything must be word-aligned in purgatory, since the trap handler is then set to the kexec one. Without the alignment, hitting the exception would ultimately crash. On other occasions, the kernel's handler would take care of exceptions. This has been tested on a JH7110 SoC with oreboot and its SBI delegating unaligned access exceptions and the kernel configured to handle them. Fixes: 736e30af583fb ("RISC-V: Add purgatory") Signed-off-by: Daniel Maslowski Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240719170437.247457-1-cyrevolt@gmail.com Signed-off-by: Palmer Dabbelt Signed-off-by: Xiangyu Chen Signed-off-by: Greg Kroah-Hartman --- arch/riscv/purgatory/entry.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/purgatory/entry.S b/arch/riscv/purgatory/entry.S index 0194f4554130..a4ede42bc151 100644 --- a/arch/riscv/purgatory/entry.S +++ b/arch/riscv/purgatory/entry.S @@ -11,6 +11,8 @@ .macro size, sym:req .size \sym, . - \sym .endm +#include +#include .text @@ -39,6 +41,7 @@ size purgatory_start .data +.align LGREG .globl riscv_kernel_entry riscv_kernel_entry: .quad 0 From 84d2f29152184f0d72ed7c9648c4ee6927df4e59 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Tue, 7 May 2024 12:39:28 +0200 Subject: [PATCH 112/174] bnxt_re: avoid shift undefined behavior in bnxt_qplib_alloc_init_hwq commit 78cfd17142ef70599d6409cbd709d94b3da58659 upstream. Undefined behavior is triggered when bnxt_qplib_alloc_init_hwq is called with hwq_attr->aux_depth != 0 and hwq_attr->aux_stride == 0. In that case, "roundup_pow_of_two(hwq_attr->aux_stride)" gets called. roundup_pow_of_two is documented as undefined for 0. Fix it in the one caller that had this combination. The undefined behavior was detected by UBSAN: UBSAN: shift-out-of-bounds in ./include/linux/log2.h:57:13 shift exponent 64 is too large for 64-bit type 'long unsigned int' CPU: 24 PID: 1075 Comm: (udev-worker) Not tainted 6.9.0-rc6+ #4 Hardware name: Abacus electric, s.r.o. - servis@abacus.cz Super Server/H12SSW-iN, BIOS 2.7 10/25/2023 Call Trace: dump_stack_lvl+0x5d/0x80 ubsan_epilogue+0x5/0x30 __ubsan_handle_shift_out_of_bounds.cold+0x61/0xec __roundup_pow_of_two+0x25/0x35 [bnxt_re] bnxt_qplib_alloc_init_hwq+0xa1/0x470 [bnxt_re] bnxt_qplib_create_qp+0x19e/0x840 [bnxt_re] bnxt_re_create_qp+0x9b1/0xcd0 [bnxt_re] ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? __kmalloc+0x1b6/0x4f0 ? create_qp.part.0+0x128/0x1c0 [ib_core] ? __pfx_bnxt_re_create_qp+0x10/0x10 [bnxt_re] create_qp.part.0+0x128/0x1c0 [ib_core] ib_create_qp_kernel+0x50/0xd0 [ib_core] create_mad_qp+0x8e/0xe0 [ib_core] ? __pfx_qp_event_handler+0x10/0x10 [ib_core] ib_mad_init_device+0x2be/0x680 [ib_core] add_client_context+0x10d/0x1a0 [ib_core] enable_device_and_get+0xe0/0x1d0 [ib_core] ib_register_device+0x53c/0x630 [ib_core] ? srso_alias_return_thunk+0x5/0xfbef5 bnxt_re_probe+0xbd8/0xe50 [bnxt_re] ? __pfx_bnxt_re_probe+0x10/0x10 [bnxt_re] auxiliary_bus_probe+0x49/0x80 ? driver_sysfs_add+0x57/0xc0 really_probe+0xde/0x340 ? pm_runtime_barrier+0x54/0x90 ? __pfx___driver_attach+0x10/0x10 __driver_probe_device+0x78/0x110 driver_probe_device+0x1f/0xa0 __driver_attach+0xba/0x1c0 bus_for_each_dev+0x8f/0xe0 bus_add_driver+0x146/0x220 driver_register+0x72/0xd0 __auxiliary_driver_register+0x6e/0xd0 ? __pfx_bnxt_re_mod_init+0x10/0x10 [bnxt_re] bnxt_re_mod_init+0x3e/0xff0 [bnxt_re] ? __pfx_bnxt_re_mod_init+0x10/0x10 [bnxt_re] do_one_initcall+0x5b/0x310 do_init_module+0x90/0x250 init_module_from_file+0x86/0xc0 idempotent_init_module+0x121/0x2b0 __x64_sys_finit_module+0x5e/0xb0 do_syscall_64+0x82/0x160 ? srso_alias_return_thunk+0x5/0xfbef5 ? syscall_exit_to_user_mode_prepare+0x149/0x170 ? srso_alias_return_thunk+0x5/0xfbef5 ? syscall_exit_to_user_mode+0x75/0x230 ? srso_alias_return_thunk+0x5/0xfbef5 ? do_syscall_64+0x8e/0x160 ? srso_alias_return_thunk+0x5/0xfbef5 ? __count_memcg_events+0x69/0x100 ? srso_alias_return_thunk+0x5/0xfbef5 ? count_memcg_events.constprop.0+0x1a/0x30 ? srso_alias_return_thunk+0x5/0xfbef5 ? handle_mm_fault+0x1f0/0x300 ? srso_alias_return_thunk+0x5/0xfbef5 ? do_user_addr_fault+0x34e/0x640 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f4e5132821d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e3 db 0c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffca9c906a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 0000563ec8a8f130 RCX: 00007f4e5132821d RDX: 0000000000000000 RSI: 00007f4e518fa07d RDI: 000000000000003b RBP: 00007ffca9c90760 R08: 00007f4e513f6b20 R09: 00007ffca9c906f0 R10: 0000563ec8a8faa0 R11: 0000000000000246 R12: 00007f4e518fa07d R13: 0000000000020000 R14: 0000563ec8409e90 R15: 0000563ec8a8fa60 ---[ end trace ]--- Fixes: 0c4dcd602817 ("RDMA/bnxt_re: Refactor hardware queue memory allocation") Signed-off-by: Michal Schmidt Link: https://lore.kernel.org/r/20240507103929.30003-1-mschmidt@redhat.com Acked-by: Selvin Xavier Signed-off-by: Leon Romanovsky Signed-off-by: Xiangyu Chen Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 3a5c58694e07..7bd7ac8d52e6 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -1014,7 +1014,8 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.stride = sizeof(struct sq_sge); hwq_attr.depth = bnxt_qplib_get_depth(sq); hwq_attr.aux_stride = psn_sz; - hwq_attr.aux_depth = bnxt_qplib_set_sq_size(sq, qp->wqe_mode); + hwq_attr.aux_depth = psn_sz ? bnxt_qplib_set_sq_size(sq, qp->wqe_mode) + : 0; hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) From 5cf45281a6ee768b77057cfad448569132e056cb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 10 Nov 2024 06:02:40 +0100 Subject: [PATCH 113/174] Revert "wifi: mac80211: fix RCU list iterations" This reverts commit b0b2dc1eaa7ec509e07a78c9974097168ae565b7 which is commit ac35180032fbc5d80b29af00ba4881815ceefcb6 upstream. It should not have been backported here due to lack of other rcu changes in the stable branches. Cc: Johannes Berg Cc: Miriam Rachel Korenblit Signed-off-by: Greg Kroah-Hartman --- net/mac80211/chan.c | 4 +--- net/mac80211/mlme.c | 2 +- net/mac80211/scan.c | 2 +- net/mac80211/util.c | 4 +--- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 807bea1a7d3c..f07e34bed8f3 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -245,9 +245,7 @@ ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata, enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT; struct sta_info *sta; - lockdep_assert_wiphy(sdata->local->hw.wiphy); - - list_for_each_entry(sta, &sdata->local->sta_list, list) { + list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) { if (sdata != sta->sdata && !(sta->sdata->bss && sta->sdata->bss == sdata->bss)) continue; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ee9ec74b9553..9a5530ca2f6b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -660,7 +660,7 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, bool disable_mu_mimo = false; struct ieee80211_sub_if_data *other; - list_for_each_entry(other, &local->interfaces, list) { + list_for_each_entry_rcu(other, &local->interfaces, list) { if (other->vif.bss_conf.mu_mimo_owner) { disable_mu_mimo = true; break; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index edbf468e0bea..f1147d156c1f 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -501,7 +501,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) * the scan was in progress; if there was none this will * just be a no-op for the particular interface. */ - list_for_each_entry(sdata, &local->interfaces, list) { + list_for_each_entry_rcu(sdata, &local->interfaces, list) { if (ieee80211_sdata_running(sdata)) ieee80211_queue_work(&sdata->local->hw, &sdata->work); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 3fe15089b24f..738f1f139a90 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -767,9 +767,7 @@ static void __iterate_interfaces(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata; bool active_only = iter_flags & IEEE80211_IFACE_ITER_ACTIVE; - list_for_each_entry_rcu(sdata, &local->interfaces, list, - lockdep_is_held(&local->iflist_mtx) || - lockdep_is_held(&local->hw.wiphy->mtx)) { + list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) From a60db84f772fc3a906c6c4072f9207579c41166f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Oct 2024 14:31:10 +0000 Subject: [PATCH 114/174] net: do not delay dst_entries_add() in dst_release() commit ac888d58869bb99753e7652be19a151df9ecb35d upstream. dst_entries_add() uses per-cpu data that might be freed at netns dismantle from ip6_route_net_exit() calling dst_entries_destroy() Before ip6_route_net_exit() can be called, we release all the dsts associated with this netns, via calls to dst_release(), which waits an rcu grace period before calling dst_destroy() dst_entries_add() use in dst_destroy() is racy, because dst_entries_destroy() could have been called already. Decrementing the number of dsts must happen sooner. Notes: 1) in CONFIG_XFRM case, dst_destroy() can call dst_release_immediate(child), this might also cause UAF if the child does not have DST_NOCOUNT set. IPSEC maintainers might take a look and see how to address this. 2) There is also discussion about removing this count of dst, which might happen in future kernels. Fixes: f88649721268 ("ipv4: fix dst race in sk_dst_get()") Closes: https://lore.kernel.org/lkml/CANn89iLCCGsP7SFn9HKpvnKu96Td4KD08xf7aGtiYgZnkjaL=w@mail.gmail.com/T/ Reported-by: Naresh Kamboju Tested-by: Linux Kernel Functional Testing Tested-by: Naresh Kamboju Signed-off-by: Eric Dumazet Cc: Xin Long Cc: Steffen Klassert Reviewed-by: Xin Long Link: https://patch.msgid.link/20241008143110.1064899-1-edumazet@google.com Signed-off-by: Paolo Abeni [ resolved conflict due to bc9d3a9f2afc ("net: dst: Switch to rcuref_t reference counting") is not in the tree ] Signed-off-by: Abdelkareem Abdelsaamad Signed-off-by: Greg Kroah-Hartman --- net/core/dst.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/core/dst.c b/net/core/dst.c index d178c564138e..8db87258d145 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -108,9 +108,6 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) child = xdst->child; } #endif - if (!(dst->flags & DST_NOCOUNT)) - dst_entries_add(dst->ops, -1); - if (dst->ops->destroy) dst->ops->destroy(dst); netdev_put(dst->dev, &dst->dev_tracker); @@ -160,6 +157,12 @@ void dst_dev_put(struct dst_entry *dst) } EXPORT_SYMBOL(dst_dev_put); +static void dst_count_dec(struct dst_entry *dst) +{ + if (!(dst->flags & DST_NOCOUNT)) + dst_entries_add(dst->ops, -1); +} + void dst_release(struct dst_entry *dst) { if (dst) { @@ -169,8 +172,10 @@ void dst_release(struct dst_entry *dst) if (WARN_ONCE(newrefcnt < 0, "dst_release underflow")) net_warn_ratelimited("%s: dst:%p refcnt:%d\n", __func__, dst, newrefcnt); - if (!newrefcnt) + if (!newrefcnt){ + dst_count_dec(dst); call_rcu(&dst->rcu_head, dst_destroy_rcu); + } } } EXPORT_SYMBOL(dst_release); @@ -184,8 +189,10 @@ void dst_release_immediate(struct dst_entry *dst) if (WARN_ONCE(newrefcnt < 0, "dst_release_immediate underflow")) net_warn_ratelimited("%s: dst:%p refcnt:%d\n", __func__, dst, newrefcnt); - if (!newrefcnt) + if (!newrefcnt){ + dst_count_dec(dst); dst_destroy(dst); + } } } EXPORT_SYMBOL(dst_release_immediate); From c8ec4e437aaa4b9856162ae64dd60f06988393d2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 12 Jan 2023 19:51:50 +0000 Subject: [PATCH 115/174] kselftest/arm64: Initialise current at build time in signal tests commit 6e4b4f0eca88e47def703f90a403fef5b96730d5 upstream. When building with clang the toolchain refuses to link the signals testcases since the assembly code has a reference to current which has no initialiser so is placed in the BSS: /tmp/signals-af2042.o: in function `fake_sigreturn': :51:(.text+0x40): relocation truncated to fit: R_AARCH64_LD_PREL_LO19 against symbol `current' defined in .bss section in /tmp/test_signals-ec1160.o Since the first statement in main() initialises current we may as well fix this by moving the initialisation to build time so the variable doesn't end up in the BSS. Signed-off-by: Mark Brown Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20230111-arm64-kselftest-clang-v1-4-89c69d377727@kernel.org Signed-off-by: Catalin Marinas Signed-off-by: Mahmoud Adam Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/arm64/signal/test_signals.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/testing/selftests/arm64/signal/test_signals.c b/tools/testing/selftests/arm64/signal/test_signals.c index 416b1ff43199..00051b40d71e 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.c +++ b/tools/testing/selftests/arm64/signal/test_signals.c @@ -12,12 +12,10 @@ #include "test_signals.h" #include "test_signals_utils.h" -struct tdescr *current; +struct tdescr *current = &tde; int main(int argc, char *argv[]) { - current = &tde; - ksft_print_msg("%s :: %s\n", current->name, current->descr); if (test_setup(current) && test_init(current)) { test_run(current); From beced2cb09b58c1243733f374c560a55382003d6 Mon Sep 17 00:00:00 2001 From: Benoit Sevens Date: Thu, 7 Nov 2024 14:22:02 +0000 Subject: [PATCH 116/174] media: uvcvideo: Skip parsing frames of type UVC_VS_UNDEFINED in uvc_parse_format commit ecf2b43018da9579842c774b7f35dbe11b5c38dd upstream. This can lead to out of bounds writes since frames of this type were not taken into account when calculating the size of the frames buffer in uvc_parse_streaming. Fixes: c0efd232929c ("V4L/DVB (8145a): USB Video Class driver") Signed-off-by: Benoit Sevens Cc: stable@vger.kernel.org Acked-by: Greg Kroah-Hartman Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/uvc/uvc_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 004511d918c6..9b87a13a92fe 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -368,7 +368,7 @@ static int uvc_parse_format(struct uvc_device *dev, * Parse the frame descriptors. Only uncompressed, MJPEG and frame * based formats have frame descriptors. */ - while (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE && + while (ftype && buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE && buffer[2] == ftype) { frame = &format->frame[format->nframes]; if (ftype != UVC_VS_FRAME_FRAME_BASED) From 26530b757c81f1389fb33ae0357500150933161b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 13 Sep 2024 13:57:04 -0400 Subject: [PATCH 117/174] filemap: Fix bounds checking in filemap_read() commit ace149e0830c380ddfce7e466fe860ca502fe4ee upstream. If the caller supplies an iocb->ki_pos value that is close to the filesystem upper limit, and an iterator with a count that causes us to overflow that limit, then filemap_read() enters an infinite loop. This behaviour was discovered when testing xfstests generic/525 with the "localio" optimisation for loopback NFS mounts. Reported-by: Mike Snitzer Fixes: c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()") Tested-by: Mike Snitzer Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index d3b925232a59..dc23b1336a8b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2738,7 +2738,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, if (unlikely(!iov_iter_count(iter))) return 0; - iov_iter_truncate(iter, inode->i_sb->s_maxbytes); + iov_iter_truncate(iter, inode->i_sb->s_maxbytes - iocb->ki_pos); folio_batch_init(&fbatch); do { From 190911cebd1d725af056e147d0d3818b4f9a6aa7 Mon Sep 17 00:00:00 2001 From: Qi Xi Date: Fri, 1 Nov 2024 11:48:03 +0800 Subject: [PATCH 118/174] fs/proc: fix compile warning about variable 'vmcore_mmap_ops' commit b8ee299855f08539e04d6c1a6acb3dc9e5423c00 upstream. When build with !CONFIG_MMU, the variable 'vmcore_mmap_ops' is defined but not used: >> fs/proc/vmcore.c:458:42: warning: unused variable 'vmcore_mmap_ops' 458 | static const struct vm_operations_struct vmcore_mmap_ops = { Fix this by only defining it when CONFIG_MMU is enabled. Link: https://lkml.kernel.org/r/20241101034803.9298-1-xiqi2@huawei.com Fixes: 9cb218131de1 ("vmcore: introduce remap_oldmem_pfn_range()") Signed-off-by: Qi Xi Reported-by: kernel test robot Closes: https://lore.kernel.org/lkml/202410301936.GcE8yUos-lkp@intel.com/ Cc: Baoquan He Cc: Dave Young Cc: Michael Holzheu Cc: Vivek Goyal Cc: Wang ShaoBo Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/proc/vmcore.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index f4d5db359718..147cc771d5a6 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -457,10 +457,6 @@ static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf) #endif } -static const struct vm_operations_struct vmcore_mmap_ops = { - .fault = mmap_vmcore_fault, -}; - /** * vmcore_alloc_buf - allocate buffer in vmalloc memory * @size: size of buffer @@ -488,6 +484,11 @@ static inline char *vmcore_alloc_buf(size_t size) * virtually contiguous user-space in ELF layout. */ #ifdef CONFIG_MMU + +static const struct vm_operations_struct vmcore_mmap_ops = { + .fault = mmap_vmcore_fault, +}; + /* * remap_oldmem_pfn_checked - do remap_oldmem_pfn_range replacing all pages * reported as not being ram with the zero page. From 012f4d5d25e9ef92ee129bd5aa7aa60f692681e1 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 4 Nov 2024 19:54:19 +0000 Subject: [PATCH 119/174] signal: restore the override_rlimit logic commit 9e05e5c7ee8758141d2db7e8fea2cab34500c6ed upstream. Prior to commit d64696905554 ("Reimplement RLIMIT_SIGPENDING on top of ucounts") UCOUNT_RLIMIT_SIGPENDING rlimit was not enforced for a class of signals. However now it's enforced unconditionally, even if override_rlimit is set. This behavior change caused production issues. For example, if the limit is reached and a process receives a SIGSEGV signal, sigqueue_alloc fails to allocate the necessary resources for the signal delivery, preventing the signal from being delivered with siginfo. This prevents the process from correctly identifying the fault address and handling the error. From the user-space perspective, applications are unaware that the limit has been reached and that the siginfo is effectively 'corrupted'. This can lead to unpredictable behavior and crashes, as we observed with java applications. Fix this by passing override_rlimit into inc_rlimit_get_ucounts() and skip the comparison to max there if override_rlimit is set. This effectively restores the old behavior. Link: https://lkml.kernel.org/r/20241104195419.3962584-1-roman.gushchin@linux.dev Fixes: d64696905554 ("Reimplement RLIMIT_SIGPENDING on top of ucounts") Signed-off-by: Roman Gushchin Co-developed-by: Andrei Vagin Signed-off-by: Andrei Vagin Acked-by: Oleg Nesterov Acked-by: Alexey Gladkov Cc: Kees Cook Cc: "Eric W. Biederman" Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/user_namespace.h | 3 ++- kernel/signal.c | 3 ++- kernel/ucount.c | 6 ++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 45f09bec02c4..733f2a97589b 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -131,7 +131,8 @@ static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type ty long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); -long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type); +long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type, + bool override_rlimit); void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type); bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long max); diff --git a/kernel/signal.c b/kernel/signal.c index 4bebd2443cc3..723c84d162dd 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -426,7 +426,8 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags, */ rcu_read_lock(); ucounts = task_ucounts(t); - sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING); + sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, + override_rlimit); rcu_read_unlock(); if (!sigpending) return NULL; diff --git a/kernel/ucount.c b/kernel/ucount.c index ee8e57fd6f90..88e7c382399c 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -307,7 +307,8 @@ void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type) do_dec_rlimit_put_ucounts(ucounts, NULL, type); } -long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type) +long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type, + bool override_rlimit) { /* Caller must hold a reference to ucounts */ struct ucounts *iter; @@ -320,7 +321,8 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type) goto unwind; if (iter == ucounts) ret = new; - max = get_userns_rlimit_max(iter->ns, type); + if (!override_rlimit) + max = get_userns_rlimit_max(iter->ns, type); /* * Grab an extra ucount reference for the caller when * the rlimit count was previously 0. From ccd811c304d2ee56189bfbc49302cb3c44361893 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Tue, 29 Oct 2024 23:13:38 +0800 Subject: [PATCH 120/174] usb: musb: sunxi: Fix accessing an released usb phy commit 498dbd9aea205db9da674994b74c7bf8e18448bd upstream. Commit 6ed05c68cbca ("usb: musb: sunxi: Explicitly release USB PHY on exit") will cause that usb phy @glue->xceiv is accessed after released. 1) register platform driver @sunxi_musb_driver // get the usb phy @glue->xceiv sunxi_musb_probe() -> devm_usb_get_phy(). 2) register and unregister platform driver @musb_driver musb_probe() -> sunxi_musb_init() use the phy here //the phy is released here musb_remove() -> sunxi_musb_exit() -> devm_usb_put_phy() 3) register @musb_driver again musb_probe() -> sunxi_musb_init() use the phy here but the phy has been released at 2). ... Fixed by reverting the commit, namely, removing devm_usb_put_phy() from sunxi_musb_exit(). Fixes: 6ed05c68cbca ("usb: musb: sunxi: Explicitly release USB PHY on exit") Cc: stable@vger.kernel.org Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20241029-sunxi_fix-v1-1-9431ed2ab826@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/sunxi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c index 7f9a999cd5ff..4ffe0d4f2ac5 100644 --- a/drivers/usb/musb/sunxi.c +++ b/drivers/usb/musb/sunxi.c @@ -286,8 +286,6 @@ static int sunxi_musb_exit(struct musb *musb) if (test_bit(SUNXI_MUSB_FL_HAS_SRAM, &glue->flags)) sunxi_sram_release(musb->controller->parent); - devm_usb_put_phy(glue->dev, glue->xceiv); - return 0; } From 562804b1561cc248cc37746a1c96c83cab1d7209 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 4 Nov 2024 16:00:11 +0200 Subject: [PATCH 121/174] usb: dwc3: fix fault at system suspend if device was already runtime suspended commit 9cfb31e4c89d200d8ab7cb1e0bb9e6e8d621ca0b upstream. If the device was already runtime suspended then during system suspend we cannot access the device registers else it will crash. Also we cannot access any registers after dwc3_core_exit() on some platforms so move the dwc3_enable_susphy() call to the top. Cc: stable@vger.kernel.org # v5.15+ Reported-by: William McVicker Closes: https://lore.kernel.org/all/ZyVfcUuPq56R2m1Y@google.com Fixes: 705e3ce37bcc ("usb: dwc3: core: Fix system suspend on TI AM62 platforms") Signed-off-by: Roger Quadros Acked-by: Thinh Nguyen Tested-by: Will McVicker Link: https://lore.kernel.org/r/20241104-am62-lpm-usb-fix-v1-1-e93df73a4f0d@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 22edd8d451da..297e6032bdd1 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -2131,10 +2131,18 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) { u32 reg; - dwc->susphy_state = (dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)) & - DWC3_GUSB2PHYCFG_SUSPHY) || - (dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)) & - DWC3_GUSB3PIPECTL_SUSPHY); + if (!pm_runtime_suspended(dwc->dev) && !PMSG_IS_AUTO(msg)) { + dwc->susphy_state = (dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)) & + DWC3_GUSB2PHYCFG_SUSPHY) || + (dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)) & + DWC3_GUSB3PIPECTL_SUSPHY); + /* + * TI AM62 platform requires SUSPHY to be + * enabled for system suspend to work. + */ + if (!dwc->susphy_state) + dwc3_enable_susphy(dwc, true); + } switch (dwc->current_dr_role) { case DWC3_GCTL_PRTCAP_DEVICE: @@ -2183,15 +2191,6 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) break; } - if (!PMSG_IS_AUTO(msg)) { - /* - * TI AM62 platform requires SUSPHY to be - * enabled for system suspend to work. - */ - if (!dwc->susphy_state) - dwc3_enable_susphy(dwc, true); - } - return 0; } From 604314ecd682913925980dc955caea2d036eab5f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 4 Nov 2024 20:16:42 +0300 Subject: [PATCH 122/174] usb: typec: fix potential out of bounds in ucsi_ccg_update_set_new_cam_cmd() commit 7dd08a0b4193087976db6b3ee7807de7e8316f96 upstream. The "*cmd" variable can be controlled by the user via debugfs. That means "new_cam" can be as high as 255 while the size of the uc->updated[] array is UCSI_MAX_ALTMODES (30). The call tree is: ucsi_cmd() // val comes from simple_attr_write_xsigned() -> ucsi_send_command() -> ucsi_send_command_common() -> ucsi_run_command() // calls ucsi->ops->sync_control() -> ucsi_ccg_sync_control() Fixes: 170a6726d0e2 ("usb: typec: ucsi: add support for separate DP altmode devices") Cc: stable Signed-off-by: Dan Carpenter Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/325102b3-eaa8-4918-a947-22aca1146586@stanley.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_ccg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index 835f1c4372ba..8e500fe41e78 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -441,6 +441,8 @@ static void ucsi_ccg_update_set_new_cam_cmd(struct ucsi_ccg *uc, port = uc->orig; new_cam = UCSI_SET_NEW_CAM_GET_AM(*cmd); + if (new_cam >= ARRAY_SIZE(uc->updated)) + return; new_port = &uc->updated[new_cam]; cam = new_port->linked_idx; enter_new_mode = UCSI_SET_NEW_CAM_ENTER(*cmd); From 275258c30bbda29467216e96fb655b16bcc9992b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 31 Oct 2024 12:48:30 +0300 Subject: [PATCH 123/174] USB: serial: io_edgeport: fix use after free in debug printk commit 37bb5628379295c1254c113a407cab03a0f4d0b4 upstream. The "dev_dbg(&urb->dev->dev, ..." which happens after usb_free_urb(urb) is a use after free of the "urb" pointer. Store the "dev" pointer at the start of the function to avoid this issue. Fixes: 984f68683298 ("USB: serial: io_edgeport.c: remove dbg() usage") Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/io_edgeport.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index 3a4c0febf335..67c0c51603e5 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -770,11 +770,12 @@ static void edge_bulk_out_data_callback(struct urb *urb) static void edge_bulk_out_cmd_callback(struct urb *urb) { struct edgeport_port *edge_port = urb->context; + struct device *dev = &urb->dev->dev; int status = urb->status; atomic_dec(&CmdUrbs); - dev_dbg(&urb->dev->dev, "%s - FREE URB %p (outstanding %d)\n", - __func__, urb, atomic_read(&CmdUrbs)); + dev_dbg(dev, "%s - FREE URB %p (outstanding %d)\n", __func__, urb, + atomic_read(&CmdUrbs)); /* clean up the transfer buffer */ @@ -784,8 +785,7 @@ static void edge_bulk_out_cmd_callback(struct urb *urb) usb_free_urb(urb); if (status) { - dev_dbg(&urb->dev->dev, - "%s - nonzero write bulk status received: %d\n", + dev_dbg(dev, "%s - nonzero write bulk status received: %d\n", __func__, status); return; } From 5a4a73a6e6b3d78c51336be825faf4d8481ba96a Mon Sep 17 00:00:00 2001 From: Jack Wu Date: Wed, 6 Nov 2024 18:50:29 +0800 Subject: [PATCH 124/174] USB: serial: qcserial: add support for Sierra Wireless EM86xx commit 25eb47eed52979c2f5eee3f37e6c67714e02c49c upstream. Add support for Sierra Wireless EM86xx with USB-id 0x1199:0x90e5 and 0x1199:0x90e4. 0x1199:0x90e5 T: Bus=03 Lev=01 Prnt=01 Port=05 Cnt=01 Dev#= 14 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=1199 ProdID=90e5 Rev= 5.15 S: Manufacturer=Sierra Wireless, Incorporated S: Product=Semtech EM8695 Mobile Broadband Adapter S: SerialNumber=004403161882339 C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#=12 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=qcserial E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=qcserial E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I:* If#=12 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=87(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#=13 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#=13 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x1199:0x90e4 T: Bus=03 Lev=01 Prnt=01 Port=05 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1199 ProdID=90e4 Rev= 0.00 S: Manufacturer=Sierra Wireless, Incorporated S: SerialNumber=004403161882339 C:* #Ifs= 1 Cfg#= 1 Atr=a0 MxPwr= 2mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=10 Driver=qcserial E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Jack Wu Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 703a9c563557..061ff754b307 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -166,6 +166,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */ {DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */ {DEVICE_SWI(0x1199, 0x90d2)}, /* Sierra Wireless EM9191 QDL */ + {DEVICE_SWI(0x1199, 0x90e4)}, /* Sierra Wireless EM86xx QDL*/ + {DEVICE_SWI(0x1199, 0x90e5)}, /* Sierra Wireless EM86xx */ {DEVICE_SWI(0x1199, 0xc080)}, /* Sierra Wireless EM7590 QDL */ {DEVICE_SWI(0x1199, 0xc081)}, /* Sierra Wireless EM7590 */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ From 9b298c819afe9aa1f8f4b7ceba02bbd7a0fe3a8a Mon Sep 17 00:00:00 2001 From: Reinhard Speyerer Date: Fri, 18 Oct 2024 23:07:06 +0200 Subject: [PATCH 125/174] USB: serial: option: add Fibocom FG132 0x0112 composition commit 393c74ccbd847bacf18865a01b422586fc7341cf upstream. Add Fibocom FG132 0x0112 composition: T: Bus=03 Lev=02 Prnt=06 Port=01 Cnt=02 Dev#= 10 Spd=12 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=0112 Rev= 5.15 S: Manufacturer=Fibocom Wireless Inc. S: Product=Fibocom Module S: SerialNumber=xxxxxxxx C:* #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=81(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms Signed-off-by: Reinhard Speyerer Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 9d2c8dc14945..24e61ee79a1b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2320,6 +2320,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x0112, 0xff, 0xff, 0x30) }, /* Fibocom FG132 Diag */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x0112, 0xff, 0xff, 0x40) }, /* Fibocom FG132 AT */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x0112, 0xff, 0, 0) }, /* Fibocom FG132 NMEA */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0115, 0xff), /* Fibocom FM135 (laptop MBIM) */ .driver_info = RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ From ded5200f83a1c7eb2f51b033f8f541b42b68cb3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Monin?= Date: Thu, 24 Oct 2024 17:09:19 +0200 Subject: [PATCH 126/174] USB: serial: option: add Quectel RG650V MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3b05949ba39f305b585452d0e177470607842165 upstream. Add support for Quectel RG650V which is based on Qualcomm SDX65 chip. The composition is DIAG / NMEA / AT / AT / QMI. T: Bus=02 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 4 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2c7c ProdID=0122 Rev=05.15 S: Manufacturer=Quectel S: Product=RG650V-EU S: SerialNumber=xxxxxxx C: #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=9ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=9ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=05(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=9ms Signed-off-by: Benoît Monin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 24e61ee79a1b..ec737fcd2c40 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -251,6 +251,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_VENDOR_ID 0x2c7c /* These Quectel products use Quectel's vendor ID */ #define QUECTEL_PRODUCT_EC21 0x0121 +#define QUECTEL_PRODUCT_RG650V 0x0122 #define QUECTEL_PRODUCT_EM061K_LTA 0x0123 #define QUECTEL_PRODUCT_EM061K_LMS 0x0124 #define QUECTEL_PRODUCT_EC25 0x0125 @@ -1273,6 +1274,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG912Y, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG916Q, 0xff, 0x00, 0x00) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG650V, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG650V, 0xff, 0, 0) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, From 8525160ea2ab58dfd8dac7151723e0a105176901 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Nov 2024 08:44:18 +0000 Subject: [PATCH 127/174] irqchip/gic-v3: Force propagation of the active state with a read-back commit 464cb98f1c07298c4c10e714ae0c36338d18d316 upstream. Christoffer reports that on some implementations, writing to GICR_ISACTIVER0 (and similar GICD registers) can race badly with a guest issuing a deactivation of that interrupt via the system register interface. There are multiple reasons to this: - this uses an early write-acknoledgement memory type (nGnRE), meaning that the write may only have made it as far as some interconnect by the time the store is considered "done" - the GIC itself is allowed to buffer the write until it decides to take it into account (as long as it is in finite time) The effects are that the activation may not have taken effect by the time the kernel enters the guest, forcing an immediate exit, or that a guest deactivation occurs before the interrupt is active, doing nothing. In order to guarantee that the write to the ISACTIVER register has taken effect, read back from it, forcing the interconnect to propagate the write, and the GIC to process the write before returning the read. Reported-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Acked-by: Christoffer Dall Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20241106084418.3794612-1-maz@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 11f7c53e4b63..96d04b5c9d76 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -473,6 +473,13 @@ static int gic_irq_set_irqchip_state(struct irq_data *d, } gic_poke_irq(d, reg); + + /* + * Force read-back to guarantee that the active state has taken + * effect, and won't race with a guest-driven deactivation. + */ + if (reg == GICD_ISACTIVER) + gic_peek_irq(d, reg); return 0; } From 86dd0e8d42828923c68ad506933336bcd6f2317d Mon Sep 17 00:00:00 2001 From: Andrew Kanner Date: Sun, 3 Nov 2024 20:38:45 +0100 Subject: [PATCH 128/174] ocfs2: remove entry once instead of null-ptr-dereference in ocfs2_xa_remove() commit 0b63c0e01fba40e3992bc627272ec7b618ccaef7 upstream. Syzkaller is able to provoke null-ptr-dereference in ocfs2_xa_remove(): [ 57.319872] (a.out,1161,7):ocfs2_xa_remove:2028 ERROR: status = -12 [ 57.320420] (a.out,1161,7):ocfs2_xa_cleanup_value_truncate:1999 ERROR: Partial truncate while removing xattr overlay.upper. Leaking 1 clusters and removing the entry [ 57.321727] BUG: kernel NULL pointer dereference, address: 0000000000000004 [...] [ 57.325727] RIP: 0010:ocfs2_xa_block_wipe_namevalue+0x2a/0xc0 [...] [ 57.331328] Call Trace: [ 57.331477] [...] [ 57.333511] ? do_user_addr_fault+0x3e5/0x740 [ 57.333778] ? exc_page_fault+0x70/0x170 [ 57.334016] ? asm_exc_page_fault+0x2b/0x30 [ 57.334263] ? __pfx_ocfs2_xa_block_wipe_namevalue+0x10/0x10 [ 57.334596] ? ocfs2_xa_block_wipe_namevalue+0x2a/0xc0 [ 57.334913] ocfs2_xa_remove_entry+0x23/0xc0 [ 57.335164] ocfs2_xa_set+0x704/0xcf0 [ 57.335381] ? _raw_spin_unlock+0x1a/0x40 [ 57.335620] ? ocfs2_inode_cache_unlock+0x16/0x20 [ 57.335915] ? trace_preempt_on+0x1e/0x70 [ 57.336153] ? start_this_handle+0x16c/0x500 [ 57.336410] ? preempt_count_sub+0x50/0x80 [ 57.336656] ? _raw_read_unlock+0x20/0x40 [ 57.336906] ? start_this_handle+0x16c/0x500 [ 57.337162] ocfs2_xattr_block_set+0xa6/0x1e0 [ 57.337424] __ocfs2_xattr_set_handle+0x1fd/0x5d0 [ 57.337706] ? ocfs2_start_trans+0x13d/0x290 [ 57.337971] ocfs2_xattr_set+0xb13/0xfb0 [ 57.338207] ? dput+0x46/0x1c0 [ 57.338393] ocfs2_xattr_trusted_set+0x28/0x30 [ 57.338665] ? ocfs2_xattr_trusted_set+0x28/0x30 [ 57.338948] __vfs_removexattr+0x92/0xc0 [ 57.339182] __vfs_removexattr_locked+0xd5/0x190 [ 57.339456] ? preempt_count_sub+0x50/0x80 [ 57.339705] vfs_removexattr+0x5f/0x100 [...] Reproducer uses faultinject facility to fail ocfs2_xa_remove() -> ocfs2_xa_value_truncate() with -ENOMEM. In this case the comment mentions that we can return 0 if ocfs2_xa_cleanup_value_truncate() is going to wipe the entry anyway. But the following 'rc' check is wrong and execution flow do 'ocfs2_xa_remove_entry(loc);' twice: * 1st: in ocfs2_xa_cleanup_value_truncate(); * 2nd: returning back to ocfs2_xa_remove() instead of going to 'out'. Fix this by skipping the 2nd removal of the same entry and making syzkaller repro happy. Link: https://lkml.kernel.org/r/20241103193845.2940988-1-andrew.kanner@gmail.com Fixes: 399ff3a748cf ("ocfs2: Handle errors while setting external xattr values.") Signed-off-by: Andrew Kanner Reported-by: syzbot+386ce9e60fa1b18aac5b@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/671e13ab.050a0220.2b8c0f.01d0.GAE@google.com/T/ Tested-by: syzbot+386ce9e60fa1b18aac5b@syzkaller.appspotmail.com Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/xattr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index eec2ead5df6d..69fab4e5bc2a 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2040,8 +2040,7 @@ static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc, rc = 0; ocfs2_xa_cleanup_value_truncate(loc, "removing", orig_clusters); - if (rc) - goto out; + goto out; } } From 7bce2c7ac814be0c77b8571dd0a4cb656c69d9bb Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 1 Nov 2024 19:19:40 +0000 Subject: [PATCH 129/174] ucounts: fix counter leak in inc_rlimit_get_ucounts() commit 432dc0654c612457285a5dcf9bb13968ac6f0804 upstream. The inc_rlimit_get_ucounts() increments the specified rlimit counter and then checks its limit. If the value exceeds the limit, the function returns an error without decrementing the counter. Link: https://lkml.kernel.org/r/20241101191940.3211128-1-roman.gushchin@linux.dev Fixes: 15bc01effefe ("ucounts: Fix signal ucount refcounting") Signed-off-by: Andrei Vagin Co-developed-by: Roman Gushchin Signed-off-by: Roman Gushchin Tested-by: Roman Gushchin Acked-by: Alexey Gladkov Cc: Kees Cook Cc: Andrei Vagin Cc: "Eric W. Biederman" Cc: Alexey Gladkov Cc: Oleg Nesterov Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- kernel/ucount.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index 88e7c382399c..ced99a4bb562 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -318,7 +318,7 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type, for (iter = ucounts; iter; iter = iter->ns->ucounts) { long new = atomic_long_add_return(1, &iter->rlimit[type]); if (new < 0 || new > max) - goto unwind; + goto dec_unwind; if (iter == ucounts) ret = new; if (!override_rlimit) @@ -336,7 +336,6 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type, dec_unwind: dec = atomic_long_sub_return(1, &iter->rlimit[type]); WARN_ON_ONCE(dec < 0); -unwind: do_dec_rlimit_put_ucounts(ucounts, iter, type); return 0; } From 7f6c3c7f8d4d81eae58c7516d69dc47866726181 Mon Sep 17 00:00:00 2001 From: Mingcong Bai Date: Wed, 6 Nov 2024 10:40:50 +0800 Subject: [PATCH 130/174] ASoC: amd: yc: fix internal mic on Xiaomi Book Pro 14 2022 commit de156f3cf70e17dc6ff4c3c364bb97a6db961ffd upstream. Xiaomi Book Pro 14 2022 (MIA2210-AD) requires a quirk entry for its internal microphone to be enabled. This is likely due to similar reasons as seen previously on Redmi Book 14/15 Pro 2022 models (since they likely came with similar firmware): - commit dcff8b7ca92d ("ASoC: amd: yc: Add Xiaomi Redmi Book Pro 15 2022 into DMI table") - commit c1dd6bf61997 ("ASoC: amd: yc: Add Xiaomi Redmi Book Pro 14 2022 into DMI table") A quirk would likely be needed for Xiaomi Book Pro 15 2022 models, too. However, I do not have such device on hand so I will leave it for now. Signed-off-by: Mingcong Bai Link: https://patch.msgid.link/20241106024052.15748-1-jeffbai@aosc.io Signed-off-by: Mark Brown Signed-off-by: WangYuli Signed-off-by: Greg Kroah-Hartman --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index de655f687dd7..c18549759eab 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -339,6 +339,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Redmi Book Pro 15 2022"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "TIMI"), + DMI_MATCH(DMI_PRODUCT_NAME, "Xiaomi Book Pro 14 2022"), + } + }, { .driver_data = &acp6x_card, .matches = { From b911fa9e92ee586e36479ad57b88f20471acaca1 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 18 Oct 2024 08:13:39 +0300 Subject: [PATCH 131/174] net: sched: use RCU read-side critical section in taprio_dump() commit b22db8b8befe90b61c98626ca1a2fbb0505e9fe3 upstream. Fix possible use-after-free in 'taprio_dump()' by adding RCU read-side critical section there. Never seen on x86 but found on a KASAN-enabled arm64 system when investigating https://syzkaller.appspot.com/bug?extid=b65e0af58423fc8a73aa: [T15862] BUG: KASAN: slab-use-after-free in taprio_dump+0xa0c/0xbb0 [T15862] Read of size 4 at addr ffff0000d4bb88f8 by task repro/15862 [T15862] [T15862] CPU: 0 UID: 0 PID: 15862 Comm: repro Not tainted 6.11.0-rc1-00293-gdefaf1a2113a-dirty #2 [T15862] Hardware name: QEMU QEMU Virtual Machine, BIOS edk2-20240524-5.fc40 05/24/2024 [T15862] Call trace: [T15862] dump_backtrace+0x20c/0x220 [T15862] show_stack+0x2c/0x40 [T15862] dump_stack_lvl+0xf8/0x174 [T15862] print_report+0x170/0x4d8 [T15862] kasan_report+0xb8/0x1d4 [T15862] __asan_report_load4_noabort+0x20/0x2c [T15862] taprio_dump+0xa0c/0xbb0 [T15862] tc_fill_qdisc+0x540/0x1020 [T15862] qdisc_notify.isra.0+0x330/0x3a0 [T15862] tc_modify_qdisc+0x7b8/0x1838 [T15862] rtnetlink_rcv_msg+0x3c8/0xc20 [T15862] netlink_rcv_skb+0x1f8/0x3d4 [T15862] rtnetlink_rcv+0x28/0x40 [T15862] netlink_unicast+0x51c/0x790 [T15862] netlink_sendmsg+0x79c/0xc20 [T15862] __sock_sendmsg+0xe0/0x1a0 [T15862] ____sys_sendmsg+0x6c0/0x840 [T15862] ___sys_sendmsg+0x1ac/0x1f0 [T15862] __sys_sendmsg+0x110/0x1d0 [T15862] __arm64_sys_sendmsg+0x74/0xb0 [T15862] invoke_syscall+0x88/0x2e0 [T15862] el0_svc_common.constprop.0+0xe4/0x2a0 [T15862] do_el0_svc+0x44/0x60 [T15862] el0_svc+0x50/0x184 [T15862] el0t_64_sync_handler+0x120/0x12c [T15862] el0t_64_sync+0x190/0x194 [T15862] [T15862] Allocated by task 15857: [T15862] kasan_save_stack+0x3c/0x70 [T15862] kasan_save_track+0x20/0x3c [T15862] kasan_save_alloc_info+0x40/0x60 [T15862] __kasan_kmalloc+0xd4/0xe0 [T15862] __kmalloc_cache_noprof+0x194/0x334 [T15862] taprio_change+0x45c/0x2fe0 [T15862] tc_modify_qdisc+0x6a8/0x1838 [T15862] rtnetlink_rcv_msg+0x3c8/0xc20 [T15862] netlink_rcv_skb+0x1f8/0x3d4 [T15862] rtnetlink_rcv+0x28/0x40 [T15862] netlink_unicast+0x51c/0x790 [T15862] netlink_sendmsg+0x79c/0xc20 [T15862] __sock_sendmsg+0xe0/0x1a0 [T15862] ____sys_sendmsg+0x6c0/0x840 [T15862] ___sys_sendmsg+0x1ac/0x1f0 [T15862] __sys_sendmsg+0x110/0x1d0 [T15862] __arm64_sys_sendmsg+0x74/0xb0 [T15862] invoke_syscall+0x88/0x2e0 [T15862] el0_svc_common.constprop.0+0xe4/0x2a0 [T15862] do_el0_svc+0x44/0x60 [T15862] el0_svc+0x50/0x184 [T15862] el0t_64_sync_handler+0x120/0x12c [T15862] el0t_64_sync+0x190/0x194 [T15862] [T15862] Freed by task 6192: [T15862] kasan_save_stack+0x3c/0x70 [T15862] kasan_save_track+0x20/0x3c [T15862] kasan_save_free_info+0x4c/0x80 [T15862] poison_slab_object+0x110/0x160 [T15862] __kasan_slab_free+0x3c/0x74 [T15862] kfree+0x134/0x3c0 [T15862] taprio_free_sched_cb+0x18c/0x220 [T15862] rcu_core+0x920/0x1b7c [T15862] rcu_core_si+0x10/0x1c [T15862] handle_softirqs+0x2e8/0xd64 [T15862] __do_softirq+0x14/0x20 Fixes: 18cdd2f0998a ("net/sched: taprio: taprio_dump and taprio_change are protected by rtnl_mutex") Acked-by: Vinicius Costa Gomes Signed-off-by: Dmitry Antipov Link: https://patch.msgid.link/20241018051339.418890-2-dmantipov@yandex.ru Signed-off-by: Paolo Abeni [Lee: Backported from linux-6.6.y to linux-6.1.y and fixed conflicts] Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_taprio.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 212fef2b72f5..1d5cdc987abd 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1995,9 +1995,6 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *nest, *sched_nest; unsigned int i; - oper = rtnl_dereference(q->oper_sched); - admin = rtnl_dereference(q->admin_sched); - opt.num_tc = netdev_get_num_tc(dev); memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); @@ -2024,18 +2021,23 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay)) goto options_error; + rcu_read_lock(); + + oper = rtnl_dereference(q->oper_sched); + admin = rtnl_dereference(q->admin_sched); + if (taprio_dump_tc_entries(q, skb)) - goto options_error; + goto options_error_rcu; if (oper && dump_schedule(skb, oper)) - goto options_error; + goto options_error_rcu; if (!admin) goto done; sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED); if (!sched_nest) - goto options_error; + goto options_error_rcu; if (dump_schedule(skb, admin)) goto admin_error; @@ -2043,11 +2045,15 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) nla_nest_end(skb, sched_nest); done: + rcu_read_unlock(); return nla_nest_end(skb, nest); admin_error: nla_nest_cancel(skb, sched_nest); +options_error_rcu: + rcu_read_unlock(); + options_error: nla_nest_cancel(skb, nest); From 98d8dde9232250a57ad5ef16479bf6a349e09b80 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Wed, 6 Nov 2024 04:36:04 -0500 Subject: [PATCH 132/174] hv_sock: Initializing vsk->trans to NULL to prevent a dangling pointer commit e629295bd60abf4da1db85b82819ca6a4f6c1e79 upstream. When hvs is released, there is a possibility that vsk->trans may not be initialized to NULL, which could lead to a dangling pointer. This issue is resolved by initializing vsk->trans to NULL. Signed-off-by: Hyunwoo Kim Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Link: https://patch.msgid.link/Zys4hCj61V+mQfX2@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/hyperv_transport.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 59c3e2697069..53566c77814b 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -549,6 +549,7 @@ static void hvs_destruct(struct vsock_sock *vsk) vmbus_hvsock_device_unregister(chan); kfree(hvs); + vsk->trans = NULL; } static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr) From 44d29897eafd0e1196453d3003a4d5e0b968eeab Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Tue, 22 Oct 2024 09:32:56 +0200 Subject: [PATCH 133/174] vsock/virtio: Initialization of the dangling pointer occurring in vsk->trans commit 6ca575374dd9a507cdd16dfa0e78c2e9e20bd05f upstream. During loopback communication, a dangling pointer can be created in vsk->trans, potentially leading to a Use-After-Free condition. This issue is resolved by initializing vsk->trans to NULL. Cc: stable Fixes: 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko") Signed-off-by: Hyunwoo Kim Signed-off-by: Wongi Lee Signed-off-by: Greg Kroah-Hartman Message-Id: <2024102245-strive-crib-c8d3@gregkh> Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/virtio_transport_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 9983b833b55d..b22dc7bed218 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -827,6 +827,7 @@ void virtio_transport_destruct(struct vsock_sock *vsk) struct virtio_vsock_sock *vvs = vsk->trans; kfree(vvs); + vsk->trans = NULL; } EXPORT_SYMBOL_GPL(virtio_transport_destruct); From 178379985ee811c91a2fc9fd453f3b03b04150f8 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 7 Nov 2023 08:15:21 +0100 Subject: [PATCH 134/174] media: amphion: Fix VPU core alias name commit f033c87fda47e272bb4f75dc7b03677261d91158 upstream. Starting with commit f6038de293f2 ("arm64: dts: imx8qm: Fix VPU core alias name") the alias for VPU cores uses dashes instead of underscores. Adjust the alias stem accordingly. Fixes the errors: amphion-vpu-core 2d040000.vpu-core: can't get vpu core id amphion-vpu-core 2d050000.vpu-core: can't get vpu core id Fixes: f6038de293f2 ("arm64: dts: imx8qm: Fix VPU core alias name") Signed-off-by: Alexander Stein Reviewed-by: Ming Qian Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/amphion/vpu_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/amphion/vpu_core.c b/drivers/media/platform/amphion/vpu_core.c index 9add73b9b45f..fabbf442c66a 100644 --- a/drivers/media/platform/amphion/vpu_core.c +++ b/drivers/media/platform/amphion/vpu_core.c @@ -642,7 +642,7 @@ static int vpu_core_probe(struct platform_device *pdev) return -ENODEV; core->type = core->res->type; - core->id = of_alias_get_id(dev->of_node, "vpu_core"); + core->id = of_alias_get_id(dev->of_node, "vpu-core"); if (core->id < 0) { dev_err(dev, "can't get vpu core id\n"); return core->id; From 59d7b1a7104a2011038fe7b9922de9f10e688333 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 14 Nov 2024 13:15:20 +0100 Subject: [PATCH 135/174] Linux 6.1.117 Link: https://lore.kernel.org/r/20241112101844.263449965@linuxfoundation.org Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Tested-by: Salvatore Bonaccorso Tested-by: Florian Fainelli Tested-by: Ron Economos Tested-by: Linux Kernel Functional Testing Tested-by: Mark Brown Tested-by: Peter Schneider Tested-by: Jon Hunter Tested-by: Hardik Garg Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8f1f0f4a96b4..c88229bb9626 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 116 +SUBLEVEL = 117 EXTRAVERSION = NAME = Curry Ramen From 21b39fa2051084611221c6aa80ba0615c1200c61 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Nov 2024 15:42:18 +0100 Subject: [PATCH 136/174] Revert "Bluetooth: fix use-after-free in accessing skb after sending it" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 715264ad09fd4004e347cdb79fa58a4f2344f13f which is commit 947ec0d002dce8577b655793dcc6fc78d67b7cb6 upstream. It is reported to cause regressions in the 6.1.y tree, so revert it for now. Link: https://lore.kernel.org/all/CADRbXaDqx6S+7tzdDPPEpRu9eDLrHQkqoWTTGfKJSRxY=hT5MQ@mail.gmail.com/ Reported-by: Jeremy Lainé Cc: Salvatore Bonaccorso Cc: Mike Cc: Marcel Holtmann Cc: Johan Hedberg Cc: Paul Menzel Cc: Pauli Virtanen Cc: Luiz Augusto von Dentz Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 993b98257bc2..796d9258787c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -4146,7 +4146,7 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) if (hci_req_status_pend(hdev) && !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { kfree_skb(hdev->req_skb); - hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); + hdev->req_skb = skb_clone(skb, GFP_KERNEL); } atomic_dec(&hdev->cmd_cnt); From 4c69abb4d41ece30d9f4cfdf51cf3ee838f48723 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Nov 2024 15:44:07 +0100 Subject: [PATCH 137/174] Revert "Bluetooth: hci_core: Fix possible buffer overflow" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 68644bf5ec6baaff40fc39b3529c874bfda709bd which is commit 81137162bfaa7278785b24c1fd2e9e74f082e8e4 upstream. It is reported to cause regressions in the 6.1.y tree, so revert it for now. Link: https://lore.kernel.org/all/CADRbXaDqx6S+7tzdDPPEpRu9eDLrHQkqoWTTGfKJSRxY=hT5MQ@mail.gmail.com/ Reported-by: Jeremy Lainé Cc: Salvatore Bonaccorso Cc: Mike Cc: Marcel Holtmann Cc: Johan Hedberg Cc: Paul Menzel Cc: Pauli Virtanen Cc: Luiz Augusto von Dentz Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index baa1fb498cf1..789f7f4a0908 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -869,7 +869,7 @@ int hci_get_dev_info(void __user *arg) else flags = hdev->flags; - strscpy(di.name, hdev->name, sizeof(di.name)); + strcpy(di.name, hdev->name); di.bdaddr = hdev->bdaddr; di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4); di.flags = flags; From 0337fb092873a4146aa854acb554675b49c8f6b5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Nov 2024 15:42:48 +0100 Subject: [PATCH 138/174] Revert "Bluetooth: hci_sync: Fix overwriting request callback" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit da77c1d39bc527b31890bfa0405763c82828defb which is commit 2615fd9a7c2507eb3be3fbe49dcec88a2f56454a upstream. It is reported to cause regressions in the 6.1.y tree, so revert it for now. Link: https://lore.kernel.org/all/CADRbXaDqx6S+7tzdDPPEpRu9eDLrHQkqoWTTGfKJSRxY=hT5MQ@mail.gmail.com/ Reported-by: Jeremy Lainé Cc: Salvatore Bonaccorso Cc: Mike Cc: Marcel Holtmann Cc: Johan Hedberg Cc: Paul Menzel Cc: Pauli Virtanen Cc: Luiz Augusto von Dentz Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/hci_core.h | 1 - net/bluetooth/hci_conn.c | 2 +- net/bluetooth/hci_core.c | 46 ++++++++++---------------------- net/bluetooth/hci_event.c | 18 ++++++------- net/bluetooth/hci_sync.c | 21 +++------------ 5 files changed, 27 insertions(+), 61 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 215b56dc26df..ad1efeb135d6 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -544,7 +544,6 @@ struct hci_dev { __u32 req_status; __u32 req_result; struct sk_buff *req_skb; - struct sk_buff *req_rsp; void *smp_data; void *smp_bredr_data; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 5ec2160108a1..ba708974fa51 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2816,7 +2816,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) case HCI_EV_LE_CONN_COMPLETE: case HCI_EV_LE_ENHANCED_CONN_COMPLETE: case HCI_EVT_LE_CIS_ESTABLISHED: - hci_cmd_sync_cancel(hdev, ECANCELED); + hci_cmd_sync_cancel(hdev, -ECANCELED); break; } } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 796d9258787c..baa1fb498cf1 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1452,8 +1452,8 @@ static void hci_cmd_timeout(struct work_struct *work) struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_timer.work); - if (hdev->req_skb) { - u16 opcode = hci_skb_opcode(hdev->req_skb); + if (hdev->sent_cmd) { + u16 opcode = hci_skb_opcode(hdev->sent_cmd); bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode); @@ -2762,7 +2762,6 @@ void hci_release_dev(struct hci_dev *hdev) ida_simple_remove(&hci_index_ida, hdev->id); kfree_skb(hdev->sent_cmd); - kfree_skb(hdev->req_skb); kfree_skb(hdev->recv_event); kfree(hdev); } @@ -3092,33 +3091,21 @@ int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen, EXPORT_SYMBOL(__hci_cmd_send); /* Get data from the previously sent command */ -static void *hci_cmd_data(struct sk_buff *skb, __u16 opcode) +void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) { struct hci_command_hdr *hdr; - if (!skb || skb->len < HCI_COMMAND_HDR_SIZE) + if (!hdev->sent_cmd) return NULL; - hdr = (void *)skb->data; + hdr = (void *) hdev->sent_cmd->data; if (hdr->opcode != cpu_to_le16(opcode)) return NULL; - return skb->data + HCI_COMMAND_HDR_SIZE; -} + BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); -/* Get data from the previously sent command */ -void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) -{ - void *data; - - /* Check if opcode matches last sent command */ - data = hci_cmd_data(hdev->sent_cmd, opcode); - if (!data) - /* Check if opcode matches last request */ - data = hci_cmd_data(hdev->req_skb, opcode); - - return data; + return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE; } /* Get data from last received event */ @@ -4014,19 +4001,17 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, if (!status && !hci_req_is_complete(hdev)) return; - skb = hdev->req_skb; - /* If this was the last command in a request the complete - * callback would be found in hdev->req_skb instead of the + * callback would be found in hdev->sent_cmd instead of the * command queue (hdev->cmd_q). */ - if (skb && bt_cb(skb)->hci.req_flags & HCI_REQ_SKB) { - *req_complete_skb = bt_cb(skb)->hci.req_complete_skb; + if (bt_cb(hdev->sent_cmd)->hci.req_flags & HCI_REQ_SKB) { + *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb; return; } - if (skb && bt_cb(skb)->hci.req_complete) { - *req_complete = bt_cb(skb)->hci.req_complete; + if (bt_cb(hdev->sent_cmd)->hci.req_complete) { + *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete; return; } @@ -4143,11 +4128,8 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) return; } - if (hci_req_status_pend(hdev) && - !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { - kfree_skb(hdev->req_skb); - hdev->req_skb = skb_clone(skb, GFP_KERNEL); - } + if (hci_req_status_pend(hdev)) + hci_dev_set_flag(hdev, HCI_CMD_PENDING); atomic_dec(&hdev->cmd_cnt); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0c0141c59fd1..ac9bdfac773f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4354,7 +4354,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, void *data, * (since for this kind of commands there will not be a command * complete event). */ - if (ev->status || (hdev->req_skb && !hci_skb_event(hdev->req_skb))) { + if (ev->status || (hdev->sent_cmd && !hci_skb_event(hdev->sent_cmd))) { hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete, req_complete_skb); if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) { @@ -7171,10 +7171,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "subevent 0x%2.2x", ev->subevent); /* Only match event if command OGF is for LE */ - if (hdev->req_skb && - hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) == 0x08 && - hci_skb_event(hdev->req_skb) == ev->subevent) { - *opcode = hci_skb_opcode(hdev->req_skb); + if (hdev->sent_cmd && + hci_opcode_ogf(hci_skb_opcode(hdev->sent_cmd)) == 0x08 && + hci_skb_event(hdev->sent_cmd) == ev->subevent) { + *opcode = hci_skb_opcode(hdev->sent_cmd); hci_req_cmd_complete(hdev, *opcode, 0x00, req_complete, req_complete_skb); } @@ -7561,10 +7561,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) } /* Only match event if command OGF is not for LE */ - if (hdev->req_skb && - hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) != 0x08 && - hci_skb_event(hdev->req_skb) == event) { - hci_req_cmd_complete(hdev, hci_skb_opcode(hdev->req_skb), + if (hdev->sent_cmd && + hci_opcode_ogf(hci_skb_opcode(hdev->sent_cmd)) != 0x08 && + hci_skb_event(hdev->sent_cmd) == event) { + hci_req_cmd_complete(hdev, hci_skb_opcode(hdev->sent_cmd), status, &req_complete, &req_complete_skb); req_evt = event; } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index c368235202b2..5a6d70d6424f 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -31,10 +31,6 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, hdev->req_result = result; hdev->req_status = HCI_REQ_DONE; - /* Free the request command so it is not used as response */ - kfree_skb(hdev->req_skb); - hdev->req_skb = NULL; - if (skb) { struct sock *sk = hci_skb_sk(skb); @@ -42,7 +38,7 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, if (sk) sock_put(sk); - hdev->req_rsp = skb_get(skb); + hdev->req_skb = skb_get(skb); } wake_up_interruptible(&hdev->req_wait_q); @@ -190,8 +186,8 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, hdev->req_status = 0; hdev->req_result = 0; - skb = hdev->req_rsp; - hdev->req_rsp = NULL; + skb = hdev->req_skb; + hdev->req_skb = NULL; bt_dev_dbg(hdev, "end: err %d", err); @@ -4941,11 +4937,6 @@ int hci_dev_open_sync(struct hci_dev *hdev) hdev->sent_cmd = NULL; } - if (hdev->req_skb) { - kfree_skb(hdev->req_skb); - hdev->req_skb = NULL; - } - clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); @@ -5107,12 +5098,6 @@ int hci_dev_close_sync(struct hci_dev *hdev) hdev->sent_cmd = NULL; } - /* Drop last request */ - if (hdev->req_skb) { - kfree_skb(hdev->req_skb); - hdev->req_skb = NULL; - } - clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); From 903227b6168bb99fd57d4e3c9c1b5014986198e0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Nov 2024 15:45:02 +0100 Subject: [PATCH 139/174] Revert "Bluetooth: hci_conn: Consolidate code for aborting connections" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 6083089ab00631617f9eac678df3ab050a9d837a which is commit a13f316e90fdb1fb6df6582e845aa9b3270f3581 upstream. It is reported to cause regressions in the 6.1.y tree, so revert it for now. Link: https://lore.kernel.org/all/CADRbXaDqx6S+7tzdDPPEpRu9eDLrHQkqoWTTGfKJSRxY=hT5MQ@mail.gmail.com/ Reported-by: Jeremy Lainé Cc: Salvatore Bonaccorso Cc: Mike Cc: Marcel Holtmann Cc: Johan Hedberg Cc: Paul Menzel Cc: Pauli Virtanen Cc: Luiz Augusto von Dentz Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_conn.c | 158 ++++++++++++++++++++++++------- net/bluetooth/hci_sync.c | 23 ++--- net/bluetooth/mgmt.c | 15 ++- 4 files changed, 149 insertions(+), 49 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index ad1efeb135d6..d26b57e87f7f 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -734,7 +734,6 @@ struct hci_conn { unsigned long flags; enum conn_reasons conn_reason; - __u8 abort_reason; __u32 clock; __u16 clock_accuracy; @@ -754,6 +753,7 @@ struct hci_conn { struct delayed_work auto_accept_work; struct delayed_work idle_work; struct delayed_work le_conn_timeout; + struct work_struct le_scan_cleanup; struct device dev; struct dentry *debugfs; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index ba708974fa51..49b9dd21b73e 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -174,6 +174,57 @@ static void hci_conn_cleanup(struct hci_conn *conn) hci_dev_put(hdev); } +static void le_scan_cleanup(struct work_struct *work) +{ + struct hci_conn *conn = container_of(work, struct hci_conn, + le_scan_cleanup); + struct hci_dev *hdev = conn->hdev; + struct hci_conn *c = NULL; + + BT_DBG("%s hcon %p", hdev->name, conn); + + hci_dev_lock(hdev); + + /* Check that the hci_conn is still around */ + rcu_read_lock(); + list_for_each_entry_rcu(c, &hdev->conn_hash.list, list) { + if (c == conn) + break; + } + rcu_read_unlock(); + + if (c == conn) { + hci_connect_le_scan_cleanup(conn, 0x00); + hci_conn_cleanup(conn); + } + + hci_dev_unlock(hdev); + hci_dev_put(hdev); + hci_conn_put(conn); +} + +static void hci_connect_le_scan_remove(struct hci_conn *conn) +{ + BT_DBG("%s hcon %p", conn->hdev->name, conn); + + /* We can't call hci_conn_del/hci_conn_cleanup here since that + * could deadlock with another hci_conn_del() call that's holding + * hci_dev_lock and doing cancel_delayed_work_sync(&conn->disc_work). + * Instead, grab temporary extra references to the hci_dev and + * hci_conn and perform the necessary cleanup in a separate work + * callback. + */ + + hci_dev_hold(conn->hdev); + hci_conn_get(conn); + + /* Even though we hold a reference to the hdev, many other + * things might get cleaned up meanwhile, including the hdev's + * own workqueue, so we can't use that for scheduling. + */ + schedule_work(&conn->le_scan_cleanup); +} + static void hci_acl_create_connection(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; @@ -625,6 +676,13 @@ static void hci_conn_timeout(struct work_struct *work) if (refcnt > 0) return; + /* LE connections in scanning state need special handling */ + if (conn->state == BT_CONNECT && conn->type == LE_LINK && + test_bit(HCI_CONN_SCANNING, &conn->flags)) { + hci_connect_le_scan_remove(conn); + return; + } + hci_abort_conn(conn, hci_proto_disconn_ind(conn)); } @@ -996,6 +1054,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept); INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle); INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout); + INIT_WORK(&conn->le_scan_cleanup, le_scan_cleanup); atomic_set(&conn->refcnt, 0); @@ -2781,46 +2840,81 @@ u32 hci_conn_get_phy(struct hci_conn *conn) return phys; } -static int abort_conn_sync(struct hci_dev *hdev, void *data) -{ - struct hci_conn *conn; - u16 handle = PTR_ERR(data); - - conn = hci_conn_hash_lookup_handle(hdev, handle); - if (!conn) - return 0; - - return hci_abort_conn_sync(hdev, conn, conn->abort_reason); -} - int hci_abort_conn(struct hci_conn *conn, u8 reason) { - struct hci_dev *hdev = conn->hdev; + int r = 0; - /* If abort_reason has already been set it means the connection is - * already being aborted so don't attempt to overwrite it. - */ - if (conn->abort_reason) + if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) return 0; - bt_dev_dbg(hdev, "handle 0x%2.2x reason 0x%2.2x", conn->handle, reason); + switch (conn->state) { + case BT_CONNECTED: + case BT_CONFIG: + if (conn->type == AMP_LINK) { + struct hci_cp_disconn_phy_link cp; - conn->abort_reason = reason; + cp.phy_handle = HCI_PHY_HANDLE(conn->handle); + cp.reason = reason; + r = hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK, + sizeof(cp), &cp); + } else { + struct hci_cp_disconnect dc; - /* If the connection is pending check the command opcode since that - * might be blocking on hci_cmd_sync_work while waiting its respective - * event so we need to hci_cmd_sync_cancel to cancel it. - */ - if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) { - switch (hci_skb_event(hdev->sent_cmd)) { - case HCI_EV_LE_CONN_COMPLETE: - case HCI_EV_LE_ENHANCED_CONN_COMPLETE: - case HCI_EVT_LE_CIS_ESTABLISHED: - hci_cmd_sync_cancel(hdev, -ECANCELED); - break; + dc.handle = cpu_to_le16(conn->handle); + dc.reason = reason; + r = hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, + sizeof(dc), &dc); } + + conn->state = BT_DISCONN; + + break; + case BT_CONNECT: + if (conn->type == LE_LINK) { + if (test_bit(HCI_CONN_SCANNING, &conn->flags)) + break; + r = hci_send_cmd(conn->hdev, + HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL); + } else if (conn->type == ACL_LINK) { + if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2) + break; + r = hci_send_cmd(conn->hdev, + HCI_OP_CREATE_CONN_CANCEL, + 6, &conn->dst); + } + break; + case BT_CONNECT2: + if (conn->type == ACL_LINK) { + struct hci_cp_reject_conn_req rej; + + bacpy(&rej.bdaddr, &conn->dst); + rej.reason = reason; + + r = hci_send_cmd(conn->hdev, + HCI_OP_REJECT_CONN_REQ, + sizeof(rej), &rej); + } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) { + struct hci_cp_reject_sync_conn_req rej; + + bacpy(&rej.bdaddr, &conn->dst); + + /* SCO rejection has its own limited set of + * allowed error values (0x0D-0x0F) which isn't + * compatible with most values passed to this + * function. To be safe hard-code one of the + * values that's suitable for SCO. + */ + rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES; + + r = hci_send_cmd(conn->hdev, + HCI_OP_REJECT_SYNC_CONN_REQ, + sizeof(rej), &rej); + } + break; + default: + conn->state = BT_CLOSED; + break; } - return hci_cmd_sync_queue(hdev, abort_conn_sync, ERR_PTR(conn->handle), - NULL); + return r; } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 5a6d70d6424f..862ac5e1f4b4 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5293,27 +5293,22 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn, } static int hci_le_connect_cancel_sync(struct hci_dev *hdev, - struct hci_conn *conn, u8 reason) + struct hci_conn *conn) { - /* Return reason if scanning since the connection shall probably be - * cleanup directly. - */ if (test_bit(HCI_CONN_SCANNING, &conn->flags)) - return reason; + return 0; - if (conn->role == HCI_ROLE_SLAVE || - test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) + if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL, HCI_CMD_TIMEOUT); } -static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn, - u8 reason) +static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn) { if (conn->type == LE_LINK) - return hci_le_connect_cancel_sync(hdev, conn, reason); + return hci_le_connect_cancel_sync(hdev, conn); if (hdev->hci_ver < BLUETOOTH_VER_1_2) return 0; @@ -5366,11 +5361,9 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) case BT_CONFIG: return hci_disconnect_sync(hdev, conn, reason); case BT_CONNECT: - err = hci_connect_cancel_sync(hdev, conn, reason); + err = hci_connect_cancel_sync(hdev, conn); /* Cleanup hci_conn object if it cannot be cancelled as it - * likelly means the controller and host stack are out of sync - * or in case of LE it was still scanning so it can be cleanup - * safely. + * likelly means the controller and host stack are out of sync. */ if (err) { hci_dev_lock(hdev); @@ -6285,7 +6278,7 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn) done: if (err == -ETIMEDOUT) - hci_le_connect_cancel_sync(hdev, conn, 0x00); + hci_le_connect_cancel_sync(hdev, conn); /* Re-enable advertising after the connection attempt is finished. */ hci_resume_advertising_sync(hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 284a0672dcc3..5a1015ccc063 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3600,6 +3600,18 @@ unlock: return err; } +static int abort_conn_sync(struct hci_dev *hdev, void *data) +{ + struct hci_conn *conn; + u16 handle = PTR_ERR(data); + + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (!conn) + return 0; + + return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM); +} + static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -3650,7 +3662,8 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, le_addr_type(addr->type)); if (conn->conn_reason == CONN_REASON_PAIR_DEVICE) - hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); + hci_cmd_sync_queue(hdev, abort_conn_sync, ERR_PTR(conn->handle), + NULL); unlock: hci_dev_unlock(hdev); From 0625d7c240b307b78640dcd823cb738cb900a8ba Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Nov 2024 15:43:55 +0100 Subject: [PATCH 140/174] Revert "Bluetooth: af_bluetooth: Fix deadlock" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit cb8adca52f306563d958a863bb0cbae9c184d1ae which is commit f7b94bdc1ec107c92262716b073b3e816d4784fb upstream. It is reported to cause regressions in the 6.1.y tree, so revert it for now. Link: https://lore.kernel.org/all/CADRbXaDqx6S+7tzdDPPEpRu9eDLrHQkqoWTTGfKJSRxY=hT5MQ@mail.gmail.com/ Reported-by: Jeremy Lainé Cc: Salvatore Bonaccorso Cc: Mike Cc: Marcel Holtmann Cc: Johan Hedberg Cc: Paul Menzel Cc: Pauli Virtanen Cc: Luiz Augusto von Dentz Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/af_bluetooth.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 4c3bd0516038..7960fd514e5a 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -307,11 +307,14 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & MSG_OOB) return -EOPNOTSUPP; + lock_sock(sk); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; + release_sock(sk); return err; } @@ -337,6 +340,8 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_free_datagram(sk, skb); + release_sock(sk); + if (flags & MSG_TRUNC) copied = skblen; @@ -559,11 +564,10 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (sk->sk_state == BT_LISTEN) return -EINVAL; - spin_lock(&sk->sk_receive_queue.lock); + lock_sock(sk); skb = skb_peek(&sk->sk_receive_queue); amount = skb ? skb->len : 0; - spin_unlock(&sk->sk_receive_queue.lock); - + release_sock(sk); err = put_user(amount, (int __user *)arg); break; From af39f19c71ded776120bd1134b39666c71ecb12c Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Wed, 7 Aug 2024 10:47:25 +0100 Subject: [PATCH 141/174] 9p: Avoid creating multiple slab caches with the same name [ Upstream commit 79efebae4afc2221fa814c3cae001bede66ab259 ] In the spirit of [1], avoid creating multiple slab caches with the same name. Instead, add the dev_name into the mix. [1]: https://lore.kernel.org/all/20240807090746.2146479-1-pedro.falcato@gmail.com/ Signed-off-by: Pedro Falcato Reported-by: syzbot+3c5d43e97993e1fa612b@syzkaller.appspotmail.com Message-ID: <20240807094725.2193423-1-pedro.falcato@gmail.com> Signed-off-by: Dominique Martinet Signed-off-by: Sasha Levin --- net/9p/client.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/9p/client.c b/net/9p/client.c index 0fc2d706d9c2..18db0e23e2f1 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -969,6 +969,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) int err; struct p9_client *clnt; char *client_id; + char *cache_name; err = 0; clnt = kmalloc(sizeof(*clnt), GFP_KERNEL); @@ -1026,15 +1027,22 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (err) goto close_trans; + cache_name = kasprintf(GFP_KERNEL, "9p-fcall-cache-%s", dev_name); + if (!cache_name) { + err = -ENOMEM; + goto close_trans; + } + /* P9_HDRSZ + 4 is the smallest packet header we can have that is * followed by data accessed from userspace by read */ clnt->fcall_cache = - kmem_cache_create_usercopy("9p-fcall-cache", clnt->msize, + kmem_cache_create_usercopy(cache_name, clnt->msize, 0, 0, P9_HDRSZ + 4, clnt->msize - (P9_HDRSZ + 4), NULL); + kfree(cache_name); return clnt; close_trans: From 94a4d966f7e25da74ba5617d793b36a8addebfed Mon Sep 17 00:00:00 2001 From: Sergey Matsievskiy Date: Wed, 25 Sep 2024 21:44:15 +0300 Subject: [PATCH 142/174] irqchip/ocelot: Fix trigger register address [ Upstream commit 9e9c4666abb5bb444dac37e2d7eb5250c8d52a45 ] Controllers, supported by this driver, have two sets of registers: * (main) interrupt registers control peripheral interrupt sources. * device interrupt registers configure per-device (network interface) interrupts and act as an extra stage before the main interrupt registers. In the driver unmask code, device trigger registers are used in the mask calculation of the main interrupt sticky register, mixing two kinds of registers. Use the main interrupt trigger register instead. Signed-off-by: Sergey Matsievskiy Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240925184416.54204-2-matsievskiysv@gmail.com Signed-off-by: Sasha Levin --- drivers/irqchip/irq-mscc-ocelot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-mscc-ocelot.c b/drivers/irqchip/irq-mscc-ocelot.c index 4d0c3532dbe7..c19ab379e8c5 100644 --- a/drivers/irqchip/irq-mscc-ocelot.c +++ b/drivers/irqchip/irq-mscc-ocelot.c @@ -37,7 +37,7 @@ static struct chip_props ocelot_props = { .reg_off_ena_clr = 0x1c, .reg_off_ena_set = 0x20, .reg_off_ident = 0x38, - .reg_off_trigger = 0x5c, + .reg_off_trigger = 0x4, .n_irq = 24, }; @@ -70,7 +70,7 @@ static struct chip_props jaguar2_props = { .reg_off_ena_clr = 0x1c, .reg_off_ena_set = 0x20, .reg_off_ident = 0x38, - .reg_off_trigger = 0x5c, + .reg_off_trigger = 0x4, .n_irq = 29, }; From 4f946479b326a3cbb193f2b8368aed9269514c35 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 2 Oct 2024 13:51:41 +0900 Subject: [PATCH 143/174] nvme: tcp: avoid race between queue_lock lock and destroy [ Upstream commit 782373ba27660ba7d330208cf5509ece6feb4545 ] Commit 76d54bf20cdc ("nvme-tcp: don't access released socket during error recovery") added a mutex_lock() call for the queue->queue_lock in nvme_tcp_get_address(). However, the mutex_lock() races with mutex_destroy() in nvme_tcp_free_queue(), and causes the WARN below. DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 3 PID: 34077 at kernel/locking/mutex.c:587 __mutex_lock+0xcf0/0x1220 Modules linked in: nvmet_tcp nvmet nvme_tcp nvme_fabrics iw_cm ib_cm ib_core pktcdvd nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables qrtr sunrpc ppdev 9pnet_virtio 9pnet pcspkr netfs parport_pc parport e1000 i2c_piix4 i2c_smbus loop fuse nfnetlink zram bochs drm_vram_helper drm_ttm_helper ttm drm_kms_helper xfs drm sym53c8xx floppy nvme scsi_transport_spi nvme_core nvme_auth serio_raw ata_generic pata_acpi dm_multipath qemu_fw_cfg [last unloaded: ib_uverbs] CPU: 3 UID: 0 PID: 34077 Comm: udisksd Not tainted 6.11.0-rc7 #319 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-2.fc40 04/01/2014 RIP: 0010:__mutex_lock+0xcf0/0x1220 Code: 08 84 d2 0f 85 c8 04 00 00 8b 15 ef b6 c8 01 85 d2 0f 85 78 f4 ff ff 48 c7 c6 20 93 ee af 48 c7 c7 60 91 ee af e8 f0 a7 6d fd <0f> 0b e9 5e f4 ff ff 48 b8 00 00 00 00 00 fc ff df 4c 89 f2 48 c1 RSP: 0018:ffff88811305f760 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff88812c652058 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000001 RBP: ffff88811305f8b0 R08: 0000000000000001 R09: ffffed1075c36341 R10: ffff8883ae1b1a0b R11: 0000000000010498 R12: 0000000000000000 R13: 0000000000000000 R14: dffffc0000000000 R15: ffff88812c652058 FS: 00007f9713ae4980(0000) GS:ffff8883ae180000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fcd78483c7c CR3: 0000000122c38000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? __warn.cold+0x5b/0x1af ? __mutex_lock+0xcf0/0x1220 ? report_bug+0x1ec/0x390 ? handle_bug+0x3c/0x80 ? exc_invalid_op+0x13/0x40 ? asm_exc_invalid_op+0x16/0x20 ? __mutex_lock+0xcf0/0x1220 ? nvme_tcp_get_address+0xc2/0x1e0 [nvme_tcp] ? __pfx___mutex_lock+0x10/0x10 ? __lock_acquire+0xd6a/0x59e0 ? nvme_tcp_get_address+0xc2/0x1e0 [nvme_tcp] nvme_tcp_get_address+0xc2/0x1e0 [nvme_tcp] ? __pfx_nvme_tcp_get_address+0x10/0x10 [nvme_tcp] nvme_sysfs_show_address+0x81/0xc0 [nvme_core] dev_attr_show+0x42/0x80 ? __asan_memset+0x1f/0x40 sysfs_kf_seq_show+0x1f0/0x370 seq_read_iter+0x2cb/0x1130 ? rw_verify_area+0x3b1/0x590 ? __mutex_lock+0x433/0x1220 vfs_read+0x6a6/0xa20 ? lockdep_hardirqs_on+0x78/0x100 ? __pfx_vfs_read+0x10/0x10 ksys_read+0xf7/0x1d0 ? __pfx_ksys_read+0x10/0x10 ? __x64_sys_openat+0x105/0x1d0 do_syscall_64+0x93/0x180 ? lockdep_hardirqs_on_prepare+0x16d/0x400 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on+0x78/0x100 ? do_syscall_64+0x9f/0x180 ? __pfx_ksys_read+0x10/0x10 ? lockdep_hardirqs_on_prepare+0x16d/0x400 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on+0x78/0x100 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on_prepare+0x16d/0x400 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on+0x78/0x100 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on_prepare+0x16d/0x400 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on+0x78/0x100 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on_prepare+0x16d/0x400 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on+0x78/0x100 ? do_syscall_64+0x9f/0x180 ? do_syscall_64+0x9f/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f9713f55cfa Code: 55 48 89 e5 48 83 ec 20 48 89 55 e8 48 89 75 f0 89 7d f8 e8 e8 74 f8 ff 48 8b 55 e8 48 8b 75 f0 41 89 c0 8b 7d f8 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 2e 44 89 c7 48 89 45 f8 e8 42 75 f8 ff 48 8b RSP: 002b:00007ffd7f512e70 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 000055c38f316859 RCX: 00007f9713f55cfa RDX: 0000000000000fff RSI: 00007ffd7f512eb0 RDI: 0000000000000011 RBP: 00007ffd7f512e90 R08: 0000000000000000 R09: 00000000ffffffff R10: 0000000000000000 R11: 0000000000000246 R12: 000055c38f317148 R13: 0000000000000000 R14: 00007f96f4004f30 R15: 000055c3b6b623c0 The WARN is observed when the blktests test case nvme/014 is repeated with tcp transport. It is rare, and 200 times repeat is required to recreate in some test environments. To avoid the WARN, check the NVME_TCP_Q_LIVE flag before locking queue->queue_lock. The flag is cleared long time before the lock gets destroyed. Signed-off-by: Hannes Reinecke Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/tcp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index f2fedd25915f..29489c2c52fb 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2495,10 +2495,11 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) len = nvmf_get_address(ctrl, buf, size); + if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) + return len; + mutex_lock(&queue->queue_lock); - if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) - goto done; ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr); if (ret > 0) { if (len > 0) @@ -2506,7 +2507,7 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n", (len) ? "," : "", &src_addr); } -done: + mutex_unlock(&queue->queue_lock); return len; From 77b0a8b062b1d16848d3e20981ac5882e858caac Mon Sep 17 00:00:00 2001 From: SurajSonawane2415 Date: Mon, 7 Oct 2024 16:44:16 +0530 Subject: [PATCH 144/174] block: Fix elevator_get_default() checking for NULL q->tag_set [ Upstream commit b402328a24ee7193a8ab84277c0c90ae16768126 ] elevator_get_default() and elv_support_iosched() both check for whether or not q->tag_set is non-NULL, however it's not possible for them to be NULL. This messes up some static checkers, as the checking of tag_set isn't consistent. Remove the checks, which both simplifies the logic and avoids checker errors. Signed-off-by: SurajSonawane2415 Link: https://lore.kernel.org/r/20241007111416.13814-1-surajsonawane0215@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/elevator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index bd71f0fc4e4b..06288117e2dd 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -624,7 +624,7 @@ out: static inline bool elv_support_iosched(struct request_queue *q) { if (!queue_is_mq(q) || - (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))) + (q->tag_set->flags & BLK_MQ_F_NO_SCHED)) return false; return true; } @@ -635,7 +635,7 @@ static inline bool elv_support_iosched(struct request_queue *q) */ static struct elevator_type *elevator_get_default(struct request_queue *q) { - if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT) + if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT) return NULL; if (q->nr_hw_queues != 1 && From c2242ebafdcc015022bc77972f492ad24386259d Mon Sep 17 00:00:00 2001 From: Stefan Blum Date: Sun, 6 Oct 2024 10:12:23 +0200 Subject: [PATCH 145/174] HID: multitouch: Add support for B2402FVA track point [ Upstream commit 1a5cbb526ec4b885177d06a8bc04f38da7dbb1d9 ] By default the track point does not work on the Asus Expertbook B2402FVA. From libinput record i got the ID of the track point device: evdev: # Name: ASUE1201:00 04F3:32AE # ID: bus 0x18 vendor 0x4f3 product 0x32ae version 0x100 I found that the track point is functional, when i set the MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU class for the reported device. Signed-off-by: Stefan Blum Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-multitouch.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index e7199ae2e3d9..7584e5a3aafe 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2020,6 +2020,10 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_ELAN, 0x3148) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_ELAN, 0x32ae) }, + /* Elitegroup panel */ { .driver_data = MT_CLS_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_ELITEGROUP, From eb6751a26aacbbab489da9ff4bacd416b4ddf013 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Mon, 7 Oct 2024 12:08:03 +0800 Subject: [PATCH 146/174] HID: multitouch: Add quirk for HONOR MagicBook Art 14 touchpad [ Upstream commit 7a5ab8071114344f62a8b1e64ed3452a77257d76 ] The behavior of HONOR MagicBook Art 14 touchpad is not consistent after reboots, as sometimes it reports itself as a touchpad, and sometimes as a mouse. Similarly to GLO-GXXX it is possible to call MT_QUIRK_FORCE_GET_FEATURE as a workaround to force set feature in mt_set_input_mode() for such special touchpad device. [jkosina@suse.com: reword changelog a little bit] Link: https://gitlab.freedesktop.org/libinput/libinput/-/issues/1040 Signed-off-by: Wentao Guan Signed-off-by: WangYuli Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-multitouch.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 7584e5a3aafe..c2d79b2d6cdd 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2093,6 +2093,11 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, 0x347d, 0x7853) }, + /* HONOR MagicBook Art 14 touchpad */ + { .driver_data = MT_CLS_VTL, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + 0x35cc, 0x0104) }, + /* Ilitek dual touch panel */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_ILITEK, From ccf7d314810c9a3600063f4d48b09b895b3ee7a4 Mon Sep 17 00:00:00 2001 From: Greg Joyce Date: Mon, 7 Oct 2024 14:33:24 -0500 Subject: [PATCH 147/174] nvme: disable CC.CRIME (NVME_CC_CRIME) [ Upstream commit 0ce96a6708f34280a536263ee5c67e20c433dcce ] Disable NVME_CC_CRIME so that CSTS.RDY indicates that the media is ready and able to handle commands without returning NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY. Signed-off-by: Greg Joyce Reviewed-by: Nilay Shroff Tested-by: Nilay Shroff Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0729ab543072..dc25d9189132 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2394,8 +2394,13 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) else ctrl->ctrl_config = NVME_CC_CSS_NVM; - if (ctrl->cap & NVME_CAP_CRMS_CRWMS && ctrl->cap & NVME_CAP_CRMS_CRIMS) - ctrl->ctrl_config |= NVME_CC_CRIME; + /* + * Setting CRIME results in CSTS.RDY before the media is ready. This + * makes it possible for media related commands to return the error + * NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY. Until the driver is + * restructured to handle retries, disable CC.CRIME. + */ + ctrl->ctrl_config &= ~NVME_CC_CRIME; ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT; ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; @@ -2430,10 +2435,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) * devices are known to get this wrong. Use the larger of the * two values. */ - if (ctrl->ctrl_config & NVME_CC_CRIME) - ready_timeout = NVME_CRTO_CRIMT(crto); - else - ready_timeout = NVME_CRTO_CRWMT(crto); + ready_timeout = NVME_CRTO_CRWMT(crto); if (ready_timeout < timeout) dev_warn_once(ctrl->device, "bad crto:%x cap:%llx\n", From 911c9bc04809ad814b2955151e9553e974eb4bce Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 8 Oct 2024 17:07:35 -0400 Subject: [PATCH 148/174] bpf: use kvzmalloc to allocate BPF verifier environment [ Upstream commit 434247637c66e1be2bc71a9987d4c3f0d8672387 ] The kzmalloc call in bpf_check can fail when memory is very fragmented, which in turn can lead to an OOM kill. Use kvzmalloc to fall back to vmalloc when memory is too fragmented to allocate an order 3 sized bpf verifier environment. Admittedly this is not a very common case, and only happens on systems where memory has already been squeezed close to the limit, but this does not seem like much of a hot path, and it's a simple enough fix. Signed-off-by: Rik van Riel Reviewed-by: Shakeel Butt Link: https://lore.kernel.org/r/20241008170735.16766766@imladris.surriel.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bb54f1f4fafb..da90f565317d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15500,7 +15500,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) /* 'struct bpf_verifier_env' can be global, but since it's not small, * allocate/free it every time bpf_check() is called */ - env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); + env = kvzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); if (!env) return -ENOMEM; log = &env->log; @@ -15721,6 +15721,6 @@ err_unlock: mutex_unlock(&bpf_verifier_lock); vfree(env->insn_aux_data); err_free_env: - kfree(env); + kvfree(env); return ret; } From 1bc59a7c0773a6bfbd4cc9760ee3633549fe6ee7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 6 Oct 2024 09:18:37 +0800 Subject: [PATCH 149/174] crypto: api - Fix liveliness check in crypto_alg_tested [ Upstream commit b81e286ba154a4e0f01a94d99179a97f4ba3e396 ] As algorithm testing is carried out without holding the main crypto lock, it is always possible for the algorithm to go away during the test. So before crypto_alg_tested updates the status of the tested alg, it checks whether it's still on the list of all algorithms. This is inaccurate because it may be off the main list but still on the list of algorithms to be removed. Updating the algorithm status is safe per se as the larval still holds a reference to it. However, killing spawns of other algorithms that are of lower priority is clearly a deficiency as it adds unnecessary churn. Fix the test by checking whether the algorithm is dead. Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/algapi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index 5dc9ccdd5a51..206a13f39596 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -341,7 +341,7 @@ found: q->cra_flags |= CRYPTO_ALG_DEAD; alg = test->adult; - if (list_empty(&alg->cra_list)) + if (crypto_is_dead(alg)) goto complete; if (err == -ECANCELED) From 75a1b35dcc47c01fef4801f63a9f8946943ac9f3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 9 Oct 2024 16:38:48 +0800 Subject: [PATCH 150/174] crypto: marvell/cesa - Disable hash algorithms [ Upstream commit e845d2399a00f866f287e0cefbd4fc7d8ef0d2f7 ] Disable cesa hash algorithms by lowering the priority because they appear to be broken when invoked in parallel. This allows them to still be tested for debugging purposes. Reported-by: Klaus Kudielka Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/marvell/cesa/hash.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/marvell/cesa/hash.c b/drivers/crypto/marvell/cesa/hash.c index c72b0672fc71..84c106509279 100644 --- a/drivers/crypto/marvell/cesa/hash.c +++ b/drivers/crypto/marvell/cesa/hash.c @@ -947,7 +947,7 @@ struct ahash_alg mv_md5_alg = { .base = { .cra_name = "md5", .cra_driver_name = "mv-md5", - .cra_priority = 300, + .cra_priority = 0, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY, @@ -1018,7 +1018,7 @@ struct ahash_alg mv_sha1_alg = { .base = { .cra_name = "sha1", .cra_driver_name = "mv-sha1", - .cra_priority = 300, + .cra_priority = 0, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY, @@ -1092,7 +1092,7 @@ struct ahash_alg mv_sha256_alg = { .base = { .cra_name = "sha256", .cra_driver_name = "mv-sha256", - .cra_priority = 300, + .cra_priority = 0, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY, @@ -1327,7 +1327,7 @@ struct ahash_alg mv_ahmac_md5_alg = { .base = { .cra_name = "hmac(md5)", .cra_driver_name = "mv-hmac-md5", - .cra_priority = 300, + .cra_priority = 0, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY, @@ -1398,7 +1398,7 @@ struct ahash_alg mv_ahmac_sha1_alg = { .base = { .cra_name = "hmac(sha1)", .cra_driver_name = "mv-hmac-sha1", - .cra_priority = 300, + .cra_priority = 0, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY, @@ -1469,7 +1469,7 @@ struct ahash_alg mv_ahmac_sha256_alg = { .base = { .cra_name = "hmac(sha256)", .cra_driver_name = "mv-hmac-sha256", - .cra_priority = 300, + .cra_priority = 0, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY, From b8d1f4d38306226054ab78ec3943b222ecc20f3e Mon Sep 17 00:00:00 2001 From: Julian Vetter Date: Thu, 10 Oct 2024 14:46:01 +0200 Subject: [PATCH 151/174] sound: Make CONFIG_SND depend on INDIRECT_IOMEM instead of UML [ Upstream commit ad6639f143a0b42d7fb110ad14f5949f7c218890 ] When building for the UM arch and neither INDIRECT_IOMEM=y, nor HAS_IOMEM=y is selected, it will fall back to the implementations from asm-generic/io.h for IO memcpy. But these fall-back functions just do a memcpy. So, instead of depending on UML, add dependency on 'HAS_IOMEM || INDIRECT_IOMEM'. Reviewed-by: Yann Sionneau Signed-off-by: Julian Vetter Link: https://patch.msgid.link/20241010124601.700528-1-jvetter@kalrayinc.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/Kconfig b/sound/Kconfig index 1903c35d799e..5848eedcc3c9 100644 --- a/sound/Kconfig +++ b/sound/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only menuconfig SOUND tristate "Sound card support" - depends on HAS_IOMEM || UML + depends on HAS_IOMEM || INDIRECT_IOMEM help If you have a sound card in your computer, i.e. if it can say more than an occasional beep, say Y. From f17c880a4736d8207c71581ce44e518a581c355b Mon Sep 17 00:00:00 2001 From: Ian Forbes Date: Thu, 8 Aug 2024 15:06:34 -0500 Subject: [PATCH 152/174] drm/vmwgfx: Limit display layout ioctl array size to VMWGFX_NUM_DISPLAY_UNITS [ Upstream commit 28a5dfd4f615539fb22fb6d5c219c199c14e6eb6 ] Currently the array size is only limited by the largest kmalloc size which is incorrect. This change will also return a more specific error message than ENOMEM to userspace. Signed-off-by: Ian Forbes Reviewed-by: Zack Rusin Reviewed-by: Martin Krastev Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20240808200634.1074083-1-ian.forbes@broadcom.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 4 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 4 +++- drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 3 --- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index bca10214e0bf..abdca2346f1a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -59,7 +59,7 @@ #define VMWGFX_DRIVER_MINOR 20 #define VMWGFX_DRIVER_PATCHLEVEL 0 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024) -#define VMWGFX_MAX_DISPLAYS 16 +#define VMWGFX_NUM_DISPLAY_UNITS 8 #define VMWGFX_CMD_BOUNCE_INIT_SIZE 32768 #define VMWGFX_MIN_INITIAL_WIDTH 1280 @@ -79,7 +79,7 @@ #define VMWGFX_NUM_GB_CONTEXT 256 #define VMWGFX_NUM_GB_SHADER 20000 #define VMWGFX_NUM_GB_SURFACE 32768 -#define VMWGFX_NUM_GB_SCREEN_TARGET VMWGFX_MAX_DISPLAYS +#define VMWGFX_NUM_GB_SCREEN_TARGET VMWGFX_NUM_DISPLAY_UNITS #define VMWGFX_NUM_DXCONTEXT 256 #define VMWGFX_NUM_DXQUERY 512 #define VMWGFX_NUM_MOB (VMWGFX_NUM_GB_CONTEXT +\ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index a8f349e748e5..5210b8084217 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2261,7 +2261,7 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, struct drm_mode_config *mode_config = &dev->mode_config; struct drm_vmw_update_layout_arg *arg = (struct drm_vmw_update_layout_arg *)data; - void __user *user_rects; + const void __user *user_rects; struct drm_vmw_rect *rects; struct drm_rect *drm_rects; unsigned rects_size; @@ -2273,6 +2273,8 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, VMWGFX_MIN_INITIAL_HEIGHT}; vmw_du_update_layout(dev_priv, 1, &def_rect); return 0; + } else if (arg->num_outputs > VMWGFX_NUM_DISPLAY_UNITS) { + return -E2BIG; } rects_size = arg->num_outputs * sizeof(struct drm_vmw_rect); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index 1099de1ece4b..a2a294841df4 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -199,9 +199,6 @@ struct vmw_kms_dirty { s32 unit_y2; }; -#define VMWGFX_NUM_DISPLAY_UNITS 8 - - #define vmw_framebuffer_to_vfb(x) \ container_of(x, struct vmw_framebuffer, base) #define vmw_framebuffer_to_vfbs(x) \ From 60de2e03f984cfbcdc12fa552f95087c35a05a98 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 15 Oct 2024 07:30:17 -0700 Subject: [PATCH 153/174] nvme-multipath: defer partition scanning [ Upstream commit 1f021341eef41e77a633186e9be5223de2ce5d48 ] We need to suppress the partition scan from occuring within the controller's scan_work context. If a path error occurs here, the IO will wait until a path becomes available or all paths are torn down, but that action also occurs within scan_work, so it would deadlock. Defer the partion scan to a different context that does not block scan_work. Reported-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/multipath.c | 33 +++++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 1 + 2 files changed, 34 insertions(+) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 93ada8941a4c..43b89c7d585f 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -463,6 +463,20 @@ static int nvme_add_ns_head_cdev(struct nvme_ns_head *head) return ret; } +static void nvme_partition_scan_work(struct work_struct *work) +{ + struct nvme_ns_head *head = + container_of(work, struct nvme_ns_head, partition_scan_work); + + if (WARN_ON_ONCE(!test_and_clear_bit(GD_SUPPRESS_PART_SCAN, + &head->disk->state))) + return; + + mutex_lock(&head->disk->open_mutex); + bdev_disk_changed(head->disk, false); + mutex_unlock(&head->disk->open_mutex); +} + static void nvme_requeue_work(struct work_struct *work) { struct nvme_ns_head *head = @@ -489,6 +503,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) bio_list_init(&head->requeue_list); spin_lock_init(&head->requeue_lock); INIT_WORK(&head->requeue_work, nvme_requeue_work); + INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work); /* * Add a multipath node if the subsystems supports multiple controllers. @@ -504,6 +519,16 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) return -ENOMEM; head->disk->fops = &nvme_ns_head_ops; head->disk->private_data = head; + + /* + * We need to suppress the partition scan from occuring within the + * controller's scan_work context. If a path error occurs here, the IO + * will wait until a path becomes available or all paths are torn down, + * but that action also occurs within scan_work, so it would deadlock. + * Defer the partion scan to a different context that does not block + * scan_work. + */ + set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state); sprintf(head->disk->disk_name, "nvme%dn%d", ctrl->subsys->instance, head->instance); @@ -552,6 +577,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) return; } nvme_add_ns_head_cdev(head); + kblockd_schedule_work(&head->partition_scan_work); } mutex_lock(&head->lock); @@ -851,6 +877,12 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) kblockd_schedule_work(&head->requeue_work); if (test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { nvme_cdev_del(&head->cdev, &head->cdev_device); + /* + * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared + * to allow multipath to fail all I/O. + */ + synchronize_srcu(&head->srcu); + kblockd_schedule_work(&head->requeue_work); del_gendisk(head->disk); } } @@ -862,6 +894,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) /* make sure all pending bios are cleaned up */ kblockd_schedule_work(&head->requeue_work); flush_work(&head->requeue_work); + flush_work(&head->partition_scan_work); put_disk(head->disk); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 5f8a146b7014..0f49b779dec6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -460,6 +460,7 @@ struct nvme_ns_head { struct bio_list requeue_list; spinlock_t requeue_lock; struct work_struct requeue_work; + struct work_struct partition_scan_work; struct mutex lock; unsigned long flags; #define NVME_NSHEAD_DISK_LIVE 0 From ffdebf3da116904baf24ab42fef0c88496446b67 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 20 Sep 2024 19:35:20 +1000 Subject: [PATCH 154/174] powerpc/powernv: Free name on error in opal_event_init() [ Upstream commit cf8989d20d64ad702a6210c11a0347ebf3852aa7 ] In opal_event_init() if request_irq() fails name is not freed, leading to a memory leak. The code only runs at boot time, there's no way for a user to trigger it, so there's no security impact. Fix the leak by freeing name in the error path. Reported-by: 2639161967 <2639161967@qq.com> Closes: https://lore.kernel.org/linuxppc-dev/87wmjp3wig.fsf@mail.lhotse Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240920093520.67997-1-mpe@ellerman.id.au Signed-off-by: Sasha Levin --- arch/powerpc/platforms/powernv/opal-irqchip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index 391f50535200..e9849d70aee4 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -282,6 +282,7 @@ int __init opal_event_init(void) name, NULL); if (rc) { pr_warn("Error %d requesting OPAL irq %d\n", rc, (int)r->start); + kfree(name); continue; } } From afa229465399f89d3af9d72ced865144c9748846 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Wed, 16 Oct 2024 08:33:15 +0530 Subject: [PATCH 155/174] nvme: make keep-alive synchronous operation [ Upstream commit d06923670b5a5f609603d4a9fee4dec02d38de9c ] The nvme keep-alive operation, which executes at a periodic interval, could potentially sneak in while shutting down a fabric controller. This may lead to a race between the fabric controller admin queue destroy code path (invoked while shutting down controller) and hw/hctx queue dispatcher called from the nvme keep-alive async request queuing operation. This race could lead to the kernel crash shown below: Call Trace: autoremove_wake_function+0x0/0xbc (unreliable) __blk_mq_sched_dispatch_requests+0x114/0x24c blk_mq_sched_dispatch_requests+0x44/0x84 blk_mq_run_hw_queue+0x140/0x220 nvme_keep_alive_work+0xc8/0x19c [nvme_core] process_one_work+0x200/0x4e0 worker_thread+0x340/0x504 kthread+0x138/0x140 start_kernel_thread+0x14/0x18 While shutting down fabric controller, if nvme keep-alive request sneaks in then it would be flushed off. The nvme_keep_alive_end_io function is then invoked to handle the end of the keep-alive operation which decrements the admin->q_usage_counter and assuming this is the last/only request in the admin queue then the admin->q_usage_counter becomes zero. If that happens then blk-mq destroy queue operation (blk_mq_destroy_ queue()) which could be potentially running simultaneously on another cpu (as this is the controller shutdown code path) would forward progress and deletes the admin queue. So, now from this point onward we are not supposed to access the admin queue resources. However the issue here's that the nvme keep-alive thread running hw/hctx queue dispatch operation hasn't yet finished its work and so it could still potentially access the admin queue resource while the admin queue had been already deleted and that causes the above crash. This fix helps avoid the observed crash by implementing keep-alive as a synchronous operation so that we decrement admin->q_usage_counter only after keep-alive command finished its execution and returns the command status back up to its caller (blk_execute_rq()). This would ensure that fabric shutdown code path doesn't destroy the fabric admin queue until keep-alive request finished execution and also keep-alive thread is not running hw/hctx queue dispatch operation. Reviewed-by: Christoph Hellwig Signed-off-by: Nilay Shroff Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dc25d9189132..92ffeb660561 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1231,10 +1231,9 @@ static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl) nvme_keep_alive_work_period(ctrl)); } -static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, - blk_status_t status) +static void nvme_keep_alive_finish(struct request *rq, + blk_status_t status, struct nvme_ctrl *ctrl) { - struct nvme_ctrl *ctrl = rq->end_io_data; unsigned long flags; bool startka = false; unsigned long rtt = jiffies - (rq->deadline - rq->timeout); @@ -1252,13 +1251,11 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, delay = 0; } - blk_mq_free_request(rq); - if (status) { dev_err(ctrl->device, "failed nvme_keep_alive_end_io error=%d\n", status); - return RQ_END_IO_NONE; + return; } ctrl->ka_last_check_time = jiffies; @@ -1270,7 +1267,6 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, spin_unlock_irqrestore(&ctrl->lock, flags); if (startka) queue_delayed_work(nvme_wq, &ctrl->ka_work, delay); - return RQ_END_IO_NONE; } static void nvme_keep_alive_work(struct work_struct *work) @@ -1279,6 +1275,7 @@ static void nvme_keep_alive_work(struct work_struct *work) struct nvme_ctrl, ka_work); bool comp_seen = ctrl->comp_seen; struct request *rq; + blk_status_t status; ctrl->ka_last_check_time = jiffies; @@ -1301,9 +1298,9 @@ static void nvme_keep_alive_work(struct work_struct *work) nvme_init_request(rq, &ctrl->ka_cmd); rq->timeout = ctrl->kato * HZ; - rq->end_io = nvme_keep_alive_end_io; - rq->end_io_data = ctrl; - blk_execute_rq_nowait(rq, false); + status = blk_execute_rq(rq, false); + nvme_keep_alive_finish(rq, status, ctrl); + blk_mq_free_request(rq); } static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) From 32c982b520a5fe7db66bb44eea1e686cd103ade8 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Thu, 17 Oct 2024 09:38:12 +0800 Subject: [PATCH 156/174] vDPA/ifcvf: Fix pci_read_config_byte() return code handling [ Upstream commit 7f8825b2a78ac392d3fbb3a2e65e56d9e39d75e9 ] ifcvf_init_hw() uses pci_read_config_byte() that returns PCIBIOS_* codes. The error handling, however, assumes the codes are normal errnos because it checks for < 0. Convert the error check to plain non-zero check. Fixes: 5a2414bc454e ("virtio: Intel IFC VF driver for VDPA") Signed-off-by: Yuan Can Message-Id: <20241017013812.129952-1-yuancan@huawei.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Acked-by: Zhu Lingshan Signed-off-by: Sasha Levin --- drivers/vdpa/ifcvf/ifcvf_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c index 3ec5ca3aefe1..c80cb72b0649 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.c +++ b/drivers/vdpa/ifcvf/ifcvf_base.c @@ -78,7 +78,7 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev) u32 i; ret = pci_read_config_byte(pdev, PCI_CAPABILITY_LIST, &pos); - if (ret < 0) { + if (ret) { IFCVF_ERR(pdev, "Failed to read PCI capability list\n"); return -EIO; } From cdd28621bbc6eb117fd81ac61443b974c26cd86b Mon Sep 17 00:00:00 2001 From: Jiawei Ye Date: Fri, 8 Nov 2024 08:18:52 +0000 Subject: [PATCH 157/174] bpf: Fix mismatched RCU unlock flavour in bpf_out_neigh_v6 [ Upstream commit fb86c42a2a5d44e849ddfbc98b8d2f4f40d36ee3 ] In the bpf_out_neigh_v6 function, rcu_read_lock() is used to begin an RCU read-side critical section. However, when unlocking, one branch incorrectly uses a different RCU unlock flavour rcu_read_unlock_bh() instead of rcu_read_unlock(). This mismatch in RCU locking flavours can lead to unexpected behavior and potential concurrency issues. This possible bug was identified using a static analysis tool developed by myself, specifically designed to detect RCU-related issues. This patch corrects the mismatched unlock flavour by replacing the incorrect rcu_read_unlock_bh() with the appropriate rcu_read_unlock(), ensuring that the RCU critical section is properly exited. This change prevents potential synchronization issues and aligns with proper RCU usage patterns. Fixes: 09eed1192cec ("neighbour: switch to standard rcu, instead of rcu_bh") Signed-off-by: Jiawei Ye Acked-by: Yonghong Song Link: https://lore.kernel.org/r/tencent_CFD3D1C3D68B45EA9F52D8EC76D2C4134306@qq.com Signed-off-by: Martin KaFai Lau Signed-off-by: Sasha Levin --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 3f3286cf438e..2f6fef5f5864 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2226,7 +2226,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, rcu_read_unlock(); return ret; } - rcu_read_unlock_bh(); + rcu_read_unlock(); if (dst) IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); out_drop: From 5a72b0d3497b818d8f000c347a7c11801eb27bfc Mon Sep 17 00:00:00 2001 From: Alessandro Zanni Date: Thu, 17 Oct 2024 14:05:51 +0200 Subject: [PATCH 158/174] fs: Fix uninitialized value issue in from_kuid and from_kgid [ Upstream commit 15f34347481648a567db67fb473c23befb796af5 ] ocfs2_setattr() uses attr->ia_mode, attr->ia_uid and attr->ia_gid in a trace point even though ATTR_MODE, ATTR_UID and ATTR_GID aren't set. Initialize all fields of newattrs to avoid uninitialized variables, by checking if ATTR_MODE, ATTR_UID, ATTR_GID are initialized, otherwise 0. Reported-by: syzbot+6c55f725d1bdc8c52058@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=6c55f725d1bdc8c52058 Signed-off-by: Alessandro Zanni Link: https://lore.kernel.org/r/20241017120553.55331-1-alessandro.zanni87@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/ocfs2/file.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ea7c79e8ce42..e96b947c3f5d 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1129,9 +1129,12 @@ int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, trace_ocfs2_setattr(inode, dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, dentry->d_name.len, dentry->d_name.name, - attr->ia_valid, attr->ia_mode, - from_kuid(&init_user_ns, attr->ia_uid), - from_kgid(&init_user_ns, attr->ia_gid)); + attr->ia_valid, + attr->ia_valid & ATTR_MODE ? attr->ia_mode : 0, + attr->ia_valid & ATTR_UID ? + from_kuid(&init_user_ns, attr->ia_uid) : 0, + attr->ia_valid & ATTR_GID ? + from_kgid(&init_user_ns, attr->ia_gid) : 0); /* ensuring we don't even attempt to truncate a symlink */ if (S_ISLNK(inode->i_mode)) From 5622881cf00196a63db84aed32cd3ad9200715a2 Mon Sep 17 00:00:00 2001 From: Kenneth Albanowski Date: Fri, 4 Oct 2024 10:24:29 -0700 Subject: [PATCH 159/174] HID: multitouch: Add quirk for Logitech Bolt receiver w/ Casa touchpad [ Upstream commit 526748b925185e95f1415900ee13c2469d4b64cc ] The Logitech Casa Touchpad does not reliably send touch release signals when communicating through the Logitech Bolt wireless-to-USB receiver. Adjusting the device class to add MT_QUIRK_NOT_SEEN_MEANS_UP to make sure that no touches become stuck, MT_QUIRK_FORCE_MULTI_INPUT is not needed, but harmless. Linux does not have information on which devices are connected to the Bolt receiver, so we have to enable this for the entire device. Signed-off-by: Kenneth Albanowski Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-multitouch.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index f3b183a7b7fa..f1c106f5e90b 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -859,6 +859,7 @@ #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1 0xc539 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_1 0xc53f #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_POWERPLAY 0xc53a +#define USB_DEVICE_ID_LOGITECH_BOLT_RECEIVER 0xc548 #define USB_DEVICE_ID_SPACETRAVELLER 0xc623 #define USB_DEVICE_ID_SPACENAVIGATOR 0xc626 #define USB_DEVICE_ID_DINOVO_DESKTOP 0xc704 diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index c2d79b2d6cdd..bf9cad711259 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2140,6 +2140,10 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_BLUETOOTH, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_CASA_TOUCHPAD) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, + HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_LOGITECH, + USB_DEVICE_ID_LOGITECH_BOLT_RECEIVER) }, /* MosArt panels */ { .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE, From 807692abc6906cc94125a8ef6cf0934686e0dad4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 10 Oct 2024 11:45:12 +0200 Subject: [PATCH 160/174] HID: lenovo: Add support for Thinkpad X1 Tablet Gen 3 keyboard [ Upstream commit 51268879eb2bfc563a91cdce69362d9dbf707e7e ] The Thinkpad X1 Tablet Gen 3 keyboard has the same Lenovo specific quirks as the original Thinkpad X1 Tablet keyboard. Add the PID for the "Thinkpad X1 Tablet Gen 3 keyboard" to the hid-lenovo driver to fix the FnLock, Mute and media buttons not working. Suggested-by: Izhar Firdaus Closes https://bugzilla.redhat.com/show_bug.cgi?id=2315395 Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-lenovo.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index f86c1ea83a03..a4062f617ba2 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -473,6 +473,7 @@ static int lenovo_input_mapping(struct hid_device *hdev, return lenovo_input_mapping_tp10_ultrabook_kbd(hdev, hi, field, usage, bit, max); case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB3: return lenovo_input_mapping_x1_tab_kbd(hdev, hi, field, usage, bit, max); default: return 0; @@ -583,6 +584,7 @@ static ssize_t attr_fn_lock_store(struct device *dev, break; case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB3: ret = lenovo_led_set_tp10ubkbd(hdev, TP10UBKBD_FN_LOCK_LED, value); if (ret) return ret; @@ -777,6 +779,7 @@ static int lenovo_event(struct hid_device *hdev, struct hid_field *field, return lenovo_event_cptkbd(hdev, field, usage, value); case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB3: return lenovo_event_tp10ubkbd(hdev, field, usage, value); default: return 0; @@ -1059,6 +1062,7 @@ static int lenovo_led_brightness_set(struct led_classdev *led_cdev, break; case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB3: ret = lenovo_led_set_tp10ubkbd(hdev, tp10ubkbd_led[led_nr], value); break; } @@ -1289,6 +1293,7 @@ static int lenovo_probe(struct hid_device *hdev, break; case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB3: ret = lenovo_probe_tp10ubkbd(hdev); break; default: @@ -1375,6 +1380,7 @@ static void lenovo_remove(struct hid_device *hdev) break; case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB3: lenovo_remove_tp10ubkbd(hdev); break; } @@ -1424,6 +1430,8 @@ static const struct hid_device_id lenovo_devices[] = { */ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB3) }, { } }; From 8118551b4ffa6bf524250d24f432097749bdae70 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Mon, 21 Oct 2024 22:11:18 +0800 Subject: [PATCH 161/174] LoongArch: Use "Exception return address" to comment ERA [ Upstream commit b69269c870ece1bc7d2e3e39ca76f4602f2cb0dd ] The information contained in the comment for LOONGARCH_CSR_ERA is even less informative than the macro itself, which can cause confusion for junior developers. Let's use the full English term. Signed-off-by: Yanteng Si Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/include/asm/loongarch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 3d15fa5bef37..710b005fc8a6 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -325,7 +325,7 @@ static __always_inline void iocsr_write64(u64 val, u32 reg) #define CSR_ESTAT_IS_WIDTH 15 #define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT) -#define LOONGARCH_CSR_ERA 0x6 /* ERA */ +#define LOONGARCH_CSR_ERA 0x6 /* Exception return address */ #define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */ From d4b003f7203eba1f6195383480665655e3c6c074 Mon Sep 17 00:00:00 2001 From: Reinhard Speyerer Date: Fri, 18 Oct 2024 22:52:55 +0200 Subject: [PATCH 162/174] net: usb: qmi_wwan: add Fibocom FG132 0x0112 composition [ Upstream commit 64761c980cbf71fb7a532a8c7299907ea972a88c ] Add Fibocom FG132 0x0112 composition: T: Bus=03 Lev=02 Prnt=06 Port=01 Cnt=02 Dev#= 10 Spd=12 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=0112 Rev= 5.15 S: Manufacturer=Fibocom Wireless Inc. S: Product=Fibocom Module S: SerialNumber=xxxxxxxx C:* #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=81(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms Signed-off-by: Reinhard Speyerer Link: https://patch.msgid.link/ZxLKp5YZDy-OM0-e@arcor.de Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 72a2c41b9dbf..fe9abc4ea3af 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1431,6 +1431,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x030e, 4)}, /* Quectel EM05GV2 */ {QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */ + {QMI_QUIRK_SET_DTR(0x2cb7, 0x0112, 0)}, /* Fibocom FG132 */ {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */ {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/ {QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */ From 9478355c63099f1fb0b0674d74a402933c0d8f7f Mon Sep 17 00:00:00 2001 From: Li Nan Date: Sat, 27 May 2023 15:22:16 +0800 Subject: [PATCH 163/174] md/raid10: improve code of mrdev in raid10_sync_request commit 59f8f0b54c8ffb4521f6bbd1cb6f4dfa5022e75e upstream. 'need_recover' and 'mrdev' are equivalent in raid10_sync_request(), and inc mrdev->nr_pending is unreasonable if don't need recovery. Replace 'need_recover' with 'mrdev', and only inc nr_pending when needed. Signed-off-by: Li Nan Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230527072218.2365857-3-linan666@huaweicloud.com Cc: Hagar Gamal Halim Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 009f7ffe4e10..f608f30c7d6a 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3432,7 +3432,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, sector_t sect; int must_sync; int any_working; - int need_recover = 0; struct raid10_info *mirror = &conf->mirrors[i]; struct md_rdev *mrdev, *mreplace; @@ -3440,14 +3439,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, mrdev = rcu_dereference(mirror->rdev); mreplace = rcu_dereference(mirror->replacement); - if (mrdev != NULL && - !test_bit(Faulty, &mrdev->flags) && - !test_bit(In_sync, &mrdev->flags)) - need_recover = 1; + if (mrdev && (test_bit(Faulty, &mrdev->flags) || + test_bit(In_sync, &mrdev->flags))) + mrdev = NULL; if (mreplace && test_bit(Faulty, &mreplace->flags)) mreplace = NULL; - if (!need_recover && !mreplace) { + if (!mrdev && !mreplace) { rcu_read_unlock(); continue; } @@ -3481,7 +3479,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, rcu_read_unlock(); continue; } - atomic_inc(&mrdev->nr_pending); + if (mrdev) + atomic_inc(&mrdev->nr_pending); if (mreplace) atomic_inc(&mreplace->nr_pending); rcu_read_unlock(); @@ -3568,7 +3567,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, r10_bio->devs[1].devnum = i; r10_bio->devs[1].addr = to_addr; - if (need_recover) { + if (mrdev) { bio = r10_bio->devs[1].bio; bio->bi_next = biolist; biolist = bio; @@ -3613,7 +3612,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, for (k = 0; k < conf->copies; k++) if (r10_bio->devs[k].devnum == i) break; - if (!test_bit(In_sync, + if (mrdev && !test_bit(In_sync, &mrdev->flags) && !rdev_set_badblocks( mrdev, @@ -3639,12 +3638,14 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, if (rb2) atomic_dec(&rb2->remaining); r10_bio = rb2; - rdev_dec_pending(mrdev, mddev); + if (mrdev) + rdev_dec_pending(mrdev, mddev); if (mreplace) rdev_dec_pending(mreplace, mddev); break; } - rdev_dec_pending(mrdev, mddev); + if (mrdev) + rdev_dec_pending(mrdev, mddev); if (mreplace) rdev_dec_pending(mreplace, mddev); if (r10_bio->devs[0].bio->bi_opf & MD_FAILFAST) { From fdacd09f2ddf7a00787291f08ee48c0421e5b709 Mon Sep 17 00:00:00 2001 From: Hagar Hemdan Date: Tue, 4 Jun 2024 13:05:27 +0000 Subject: [PATCH 164/174] io_uring: fix possible deadlock in io_register_iowq_max_workers() commit 73254a297c2dd094abec7c9efee32455ae875bdf upstream. The io_register_iowq_max_workers() function calls io_put_sq_data(), which acquires the sqd->lock without releasing the uring_lock. Similar to the commit 009ad9f0c6ee ("io_uring: drop ctx->uring_lock before acquiring sqd->lock"), this can lead to a potential deadlock situation. To resolve this issue, the uring_lock is released before calling io_put_sq_data(), and then it is re-acquired after the function call. This change ensures that the locks are acquired in the correct order, preventing the possibility of a deadlock. Suggested-by: Maximilian Heyne Signed-off-by: Hagar Hemdan Link: https://lore.kernel.org/r/20240604130527.3597-1-hagarhem@amazon.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 92c1aa8f3501..4f0ae938b146 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3921,8 +3921,10 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, } if (sqd) { + mutex_unlock(&ctx->uring_lock); mutex_unlock(&sqd->lock); io_put_sq_data(sqd); + mutex_lock(&ctx->uring_lock); } if (copy_to_user(arg, new_count, sizeof(new_count))) @@ -3947,8 +3949,11 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, return 0; err: if (sqd) { + mutex_unlock(&ctx->uring_lock); mutex_unlock(&sqd->lock); io_put_sq_data(sqd); + mutex_lock(&ctx->uring_lock); + } return ret; } From 02079f092250f031203b7d5674368dbdeebc38a4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 18 Mar 2024 11:17:26 -0700 Subject: [PATCH 165/174] uprobes: encapsulate preparation of uprobe args buffer commit 3eaea21b4d27cff0017c20549aeb53034c58fc23 upstream. Move the logic of fetching temporary per-CPU uprobe buffer and storing uprobes args into it to a new helper function. Store data size as part of this buffer, simplifying interfaces a bit, as now we only pass single uprobe_cpu_buffer reference around, instead of pointer + dsize. This logic was duplicated across uprobe_dispatcher and uretprobe_dispatcher, and now will be centralized. All this is also in preparation to make this uprobe_cpu_buffer handling logic optional in the next patch. Link: https://lore.kernel.org/all/20240318181728.2795838-2-andrii@kernel.org/ [Masami: update for v6.9-rc3 kernel] Signed-off-by: Andrii Nakryiko Reviewed-by: Jiri Olsa Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) Stable-dep-of: 373b9338c972 ("uprobe: avoid out-of-bounds memory access of fetching args") Signed-off-by: Sasha Levin Signed-off-by: Vamsi Krishna Brahmajosyula Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_uprobe.c | 79 +++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 127c78aec17d..e09eef65d32f 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -858,6 +858,7 @@ static const struct file_operations uprobe_profile_ops = { struct uprobe_cpu_buffer { struct mutex mutex; void *buf; + int dsize; }; static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; static int uprobe_buffer_refcnt; @@ -947,9 +948,26 @@ static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) mutex_unlock(&ucb->mutex); } +static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu, + struct pt_regs *regs) +{ + struct uprobe_cpu_buffer *ucb; + int dsize, esize; + + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + dsize = __get_data_size(&tu->tp, regs); + + ucb = uprobe_buffer_get(); + ucb->dsize = tu->tp.size + dsize; + + store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize); + + return ucb; +} + static void __uprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb, int dsize, + struct uprobe_cpu_buffer *ucb, struct trace_event_file *trace_file) { struct uprobe_trace_entry_head *entry; @@ -960,14 +978,14 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, WARN_ON(call != trace_file->event_call); - if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE)) + if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE)) return; if (trace_trigger_soft_disabled(trace_file)) return; esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); - size = esize + tu->tp.size + dsize; + size = esize + ucb->dsize; entry = trace_event_buffer_reserve(&fbuffer, trace_file, size); if (!entry) return; @@ -981,14 +999,14 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, data = DATAOF_TRACE_ENTRY(entry, false); } - memcpy(data, ucb->buf, tu->tp.size + dsize); + memcpy(data, ucb->buf, ucb->dsize); trace_event_buffer_commit(&fbuffer); } /* uprobe handler */ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb, int dsize) + struct uprobe_cpu_buffer *ucb) { struct event_file_link *link; @@ -997,7 +1015,7 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, rcu_read_lock(); trace_probe_for_each_link_rcu(link, &tu->tp) - __uprobe_trace_func(tu, 0, regs, ucb, dsize, link->file); + __uprobe_trace_func(tu, 0, regs, ucb, link->file); rcu_read_unlock(); return 0; @@ -1005,13 +1023,13 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb, int dsize) + struct uprobe_cpu_buffer *ucb) { struct event_file_link *link; rcu_read_lock(); trace_probe_for_each_link_rcu(link, &tu->tp) - __uprobe_trace_func(tu, func, regs, ucb, dsize, link->file); + __uprobe_trace_func(tu, func, regs, ucb, link->file); rcu_read_unlock(); } @@ -1339,7 +1357,7 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc, static void __uprobe_perf_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb, int dsize) + struct uprobe_cpu_buffer *ucb) { struct trace_event_call *call = trace_probe_event_call(&tu->tp); struct uprobe_trace_entry_head *entry; @@ -1360,7 +1378,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); - size = esize + tu->tp.size + dsize; + size = esize + ucb->dsize; size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) return; @@ -1383,13 +1401,10 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, data = DATAOF_TRACE_ENTRY(entry, false); } - memcpy(data, ucb->buf, tu->tp.size + dsize); + memcpy(data, ucb->buf, ucb->dsize); - if (size - esize > tu->tp.size + dsize) { - int len = tu->tp.size + dsize; - - memset(data + len, 0, size - esize - len); - } + if (size - esize > ucb->dsize) + memset(data + ucb->dsize, 0, size - esize - ucb->dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); @@ -1399,21 +1414,21 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, /* uprobe profile handler */ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb, int dsize) + struct uprobe_cpu_buffer *ucb) { if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) return UPROBE_HANDLER_REMOVE; if (!is_ret_probe(tu)) - __uprobe_perf_func(tu, 0, regs, ucb, dsize); + __uprobe_perf_func(tu, 0, regs, ucb); return 0; } static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, - struct uprobe_cpu_buffer *ucb, int dsize) + struct uprobe_cpu_buffer *ucb) { - __uprobe_perf_func(tu, func, regs, ucb, dsize); + __uprobe_perf_func(tu, func, regs, ucb); } int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, @@ -1479,10 +1494,8 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) struct trace_uprobe *tu; struct uprobe_dispatch_data udd; struct uprobe_cpu_buffer *ucb; - int dsize, esize; int ret = 0; - tu = container_of(con, struct trace_uprobe, consumer); tu->nhit++; @@ -1494,18 +1507,14 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) if (WARN_ON_ONCE(!uprobe_cpu_buffer)) return 0; - dsize = __get_data_size(&tu->tp, regs); - esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); - - ucb = uprobe_buffer_get(); - store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize); + ucb = prepare_uprobe_buffer(tu, regs); if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) - ret |= uprobe_trace_func(tu, regs, ucb, dsize); + ret |= uprobe_trace_func(tu, regs, ucb); #ifdef CONFIG_PERF_EVENTS if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) - ret |= uprobe_perf_func(tu, regs, ucb, dsize); + ret |= uprobe_perf_func(tu, regs, ucb); #endif uprobe_buffer_put(ucb); return ret; @@ -1517,7 +1526,6 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, struct trace_uprobe *tu; struct uprobe_dispatch_data udd; struct uprobe_cpu_buffer *ucb; - int dsize, esize; tu = container_of(con, struct trace_uprobe, consumer); @@ -1529,18 +1537,13 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, if (WARN_ON_ONCE(!uprobe_cpu_buffer)) return 0; - dsize = __get_data_size(&tu->tp, regs); - esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); - - ucb = uprobe_buffer_get(); - store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize); - + ucb = prepare_uprobe_buffer(tu, regs); if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) - uretprobe_trace_func(tu, func, regs, ucb, dsize); + uretprobe_trace_func(tu, func, regs, ucb); #ifdef CONFIG_PERF_EVENTS if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) - uretprobe_perf_func(tu, func, regs, ucb, dsize); + uretprobe_perf_func(tu, func, regs, ucb); #endif uprobe_buffer_put(ucb); return 0; From 0dc3ad9ad2188da7f090b3dbe4d2fcd9ae8ae64f Mon Sep 17 00:00:00 2001 From: Qiao Ma Date: Tue, 15 Oct 2024 14:01:48 +0800 Subject: [PATCH 166/174] uprobe: avoid out-of-bounds memory access of fetching args commit 373b9338c9722a368925d83bc622c596896b328e upstream. Uprobe needs to fetch args into a percpu buffer, and then copy to ring buffer to avoid non-atomic context problem. Sometimes user-space strings, arrays can be very large, but the size of percpu buffer is only page size. And store_trace_args() won't check whether these data exceeds a single page or not, caused out-of-bounds memory access. It could be reproduced by following steps: 1. build kernel with CONFIG_KASAN enabled 2. save follow program as test.c ``` \#include \#include \#include // If string length large than MAX_STRING_SIZE, the fetch_store_strlen() // will return 0, cause __get_data_size() return shorter size, and // store_trace_args() will not trigger out-of-bounds access. // So make string length less than 4096. \#define STRLEN 4093 void generate_string(char *str, int n) { int i; for (i = 0; i < n; ++i) { char c = i % 26 + 'a'; str[i] = c; } str[n-1] = '\0'; } void print_string(char *str) { printf("%s\n", str); } int main() { char tmp[STRLEN]; generate_string(tmp, STRLEN); print_string(tmp); return 0; } ``` 3. compile program `gcc -o test test.c` 4. get the offset of `print_string()` ``` objdump -t test | grep -w print_string 0000000000401199 g F .text 000000000000001b print_string ``` 5. configure uprobe with offset 0x1199 ``` off=0x1199 cd /sys/kernel/debug/tracing/ echo "p /root/test:${off} arg1=+0(%di):ustring arg2=\$comm arg3=+0(%di):ustring" > uprobe_events echo 1 > events/uprobes/enable echo 1 > tracing_on ``` 6. run `test`, and kasan will report error. ================================================================== BUG: KASAN: use-after-free in strncpy_from_user+0x1d6/0x1f0 Write of size 8 at addr ffff88812311c004 by task test/499CPU: 0 UID: 0 PID: 499 Comm: test Not tainted 6.12.0-rc3+ #18 Hardware name: Red Hat KVM, BIOS 1.16.0-4.al8 04/01/2014 Call Trace: dump_stack_lvl+0x55/0x70 print_address_description.constprop.0+0x27/0x310 kasan_report+0x10f/0x120 ? strncpy_from_user+0x1d6/0x1f0 strncpy_from_user+0x1d6/0x1f0 ? rmqueue.constprop.0+0x70d/0x2ad0 process_fetch_insn+0xb26/0x1470 ? __pfx_process_fetch_insn+0x10/0x10 ? _raw_spin_lock+0x85/0xe0 ? __pfx__raw_spin_lock+0x10/0x10 ? __pte_offset_map+0x1f/0x2d0 ? unwind_next_frame+0xc5f/0x1f80 ? arch_stack_walk+0x68/0xf0 ? is_bpf_text_address+0x23/0x30 ? kernel_text_address.part.0+0xbb/0xd0 ? __kernel_text_address+0x66/0xb0 ? unwind_get_return_address+0x5e/0xa0 ? __pfx_stack_trace_consume_entry+0x10/0x10 ? arch_stack_walk+0xa2/0xf0 ? _raw_spin_lock_irqsave+0x8b/0xf0 ? __pfx__raw_spin_lock_irqsave+0x10/0x10 ? depot_alloc_stack+0x4c/0x1f0 ? _raw_spin_unlock_irqrestore+0xe/0x30 ? stack_depot_save_flags+0x35d/0x4f0 ? kasan_save_stack+0x34/0x50 ? kasan_save_stack+0x24/0x50 ? mutex_lock+0x91/0xe0 ? __pfx_mutex_lock+0x10/0x10 prepare_uprobe_buffer.part.0+0x2cd/0x500 uprobe_dispatcher+0x2c3/0x6a0 ? __pfx_uprobe_dispatcher+0x10/0x10 ? __kasan_slab_alloc+0x4d/0x90 handler_chain+0xdd/0x3e0 handle_swbp+0x26e/0x3d0 ? __pfx_handle_swbp+0x10/0x10 ? uprobe_pre_sstep_notifier+0x151/0x1b0 irqentry_exit_to_user_mode+0xe2/0x1b0 asm_exc_int3+0x39/0x40 RIP: 0033:0x401199 Code: 01 c2 0f b6 45 fb 88 02 83 45 fc 01 8b 45 fc 3b 45 e4 7c b7 8b 45 e4 48 98 48 8d 50 ff 48 8b 45 e8 48 01 d0 ce RSP: 002b:00007ffdf00576a8 EFLAGS: 00000206 RAX: 00007ffdf00576b0 RBX: 0000000000000000 RCX: 0000000000000ff2 RDX: 0000000000000ffc RSI: 0000000000000ffd RDI: 00007ffdf00576b0 RBP: 00007ffdf00586b0 R08: 00007feb2f9c0d20 R09: 00007feb2f9c0d20 R10: 0000000000000001 R11: 0000000000000202 R12: 0000000000401040 R13: 00007ffdf0058780 R14: 0000000000000000 R15: 0000000000000000 This commit enforces the buffer's maxlen less than a page-size to avoid store_trace_args() out-of-memory access. Link: https://lore.kernel.org/all/20241015060148.1108331-1-mqaio@linux.alibaba.com/ Fixes: dcad1a204f72 ("tracing/uprobes: Fetch args before reserving a ring buffer") Signed-off-by: Qiao Ma Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Sasha Levin Signed-off-by: Vamsi Krishna Brahmajosyula Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_uprobe.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index e09eef65d32f..a6a3ff2a441e 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -862,6 +862,7 @@ struct uprobe_cpu_buffer { }; static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; static int uprobe_buffer_refcnt; +#define MAX_UCB_BUFFER_SIZE PAGE_SIZE static int uprobe_buffer_init(void) { @@ -960,6 +961,11 @@ static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu, ucb = uprobe_buffer_get(); ucb->dsize = tu->tp.size + dsize; + if (WARN_ON_ONCE(ucb->dsize > MAX_UCB_BUFFER_SIZE)) { + ucb->dsize = MAX_UCB_BUFFER_SIZE; + dsize = MAX_UCB_BUFFER_SIZE - tu->tp.size; + } + store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize); return ucb; @@ -978,9 +984,6 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, WARN_ON(call != trace_file->event_call); - if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE)) - return; - if (trace_trigger_soft_disabled(trace_file)) return; From e7831613cbbcd9058d3658fbcdc5d5884ceb2e0c Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Sun, 14 Jul 2024 11:11:05 -0400 Subject: [PATCH 167/174] drm/amdkfd: amdkfd_free_gtt_mem clear the correct pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c86ad39140bbcb9dc75a10046c2221f657e8083b upstream. Pass pointer reference to amdgpu_bo_unref to clear the correct pointer, otherwise amdgpu_bo_unref clear the local variable, the original pointer not set to NULL, this could cause use-after-free bug. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Vamsi Krishna Brahmajosyula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 14 +++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 ++-- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 ++-- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 5d9a34601a1a..c31e5f9d63da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -344,15 +344,15 @@ allocate_mem_reserve_bo_failed: return r; } -void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj) +void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj) { - struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; + struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj; - amdgpu_bo_reserve(bo, true); - amdgpu_bo_kunmap(bo); - amdgpu_bo_unpin(bo); - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&(bo)); + amdgpu_bo_reserve(*bo, true); + amdgpu_bo_kunmap(*bo); + amdgpu_bo_unpin(*bo); + amdgpu_bo_unreserve(*bo); + amdgpu_bo_unref(bo); } int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4b694886715c..c7672a1d1560 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -210,7 +210,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool mqd_gfx9); -void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj); +void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj); int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, void **mem_obj); void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index e3cd66c4d95d..f83574107eb8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -408,7 +408,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, err_create_queue: if (wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&wptr_bo); err_wptr_map_gart: err_alloc_doorbells: err_bind_process: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 27820f0a282d..e2c055abfea9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -673,7 +673,7 @@ kfd_interrupt_error: kfd_doorbell_error: kfd_gtt_sa_fini(kfd); kfd_gtt_sa_init_error: - amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem); alloc_gtt_mem_failure: if (kfd->gws) amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); @@ -693,7 +693,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfd_doorbell_fini(kfd); ida_destroy(&kfd->doorbell_ida); kfd_gtt_sa_fini(kfd); - amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem); if (kfd->gws) amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 1b7b29426480..3ab0a796af06 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2392,7 +2392,7 @@ static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, { WARN(!mqd, "No hiq sdma mqd trunk to free"); - amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(dev->adev, &mqd->gtt_mem); } void device_queue_manager_uninit(struct device_queue_manager *dqm) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 623ccd227b7d..c733d6888c30 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -204,7 +204,7 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { if (mqd_mem_obj->gtt_mem) { - amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, mqd_mem_obj->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, &mqd_mem_obj->gtt_mem); kfree(mqd_mem_obj); } else { kfd_gtt_sa_free(mm->dev, mqd_mem_obj); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 5bca6abd55ae..9582c9449fff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1052,7 +1052,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) if (pdd->dev->shared_resources.enable_mes) amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev, - pdd->proc_ctx_bo); + &pdd->proc_ctx_bo); /* * before destroying pdd, make sure to report availability * for auto suspend diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 99aa8a8399d6..1918a3c06ac8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -441,9 +441,9 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (dev->shared_resources.enable_mes) { amdgpu_amdkfd_free_gtt_mem(dev->adev, - pqn->q->gang_ctx_bo); + &pqn->q->gang_ctx_bo); if (pqn->q->wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo); } uninit_queue(pqn->q); From cf3196e5e2f36cd80dab91ffae402e13935724bc Mon Sep 17 00:00:00 2001 From: Xiaxi Shen Date: Sun, 14 Jul 2024 21:33:36 -0700 Subject: [PATCH 168/174] ext4: fix timer use-after-free on failed mount commit 0ce160c5bdb67081a62293028dc85758a8efb22a upstream. Syzbot has found an ODEBUG bug in ext4_fill_super The del_timer_sync function cancels the s_err_report timer, which reminds about filesystem errors daily. We should guarantee the timer is no longer active before kfree(sbi). When filesystem mounting fails, the flow goes to failed_mount3, where an error occurs when ext4_stop_mmpd is called, causing a read I/O failure. This triggers the ext4_handle_error function that ultimately re-arms the timer, leaving the s_err_report timer active before kfree(sbi) is called. Fix the issue by canceling the s_err_report timer after calling ext4_stop_mmpd. Signed-off-by: Xiaxi Shen Reported-and-tested-by: syzbot+59e0101c430934bc9a36@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=59e0101c430934bc9a36 Link: https://patch.msgid.link/20240715043336.98097-1-shenxiaxi26@gmail.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Xiangyu Chen Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3bf214d4afef..987d49e18dbe 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5617,8 +5617,8 @@ failed_mount3a: failed_mount3: /* flush s_error_work before sbi destroy */ flush_work(&sbi->s_error_work); - del_timer_sync(&sbi->s_err_report); ext4_stop_mmpd(sbi); + del_timer_sync(&sbi->s_err_report); ext4_group_desc_free(sbi); failed_mount: if (sbi->s_chksum_driver) From b22346eec479a30bfa4a02ad2c551b54809694d0 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 23 Sep 2024 12:47:39 -0400 Subject: [PATCH 169/174] Bluetooth: L2CAP: Fix uaf in l2cap_connect commit 333b4fd11e89b29c84c269123f871883a30be586 upstream. [Syzbot reported] BUG: KASAN: slab-use-after-free in l2cap_connect.constprop.0+0x10d8/0x1270 net/bluetooth/l2cap_core.c:3949 Read of size 8 at addr ffff8880241e9800 by task kworker/u9:0/54 CPU: 0 UID: 0 PID: 54 Comm: kworker/u9:0 Not tainted 6.11.0-rc6-syzkaller-00268-g788220eee30d #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 Workqueue: hci2 hci_rx_work Call Trace: __dump_stack lib/dump_stack.c:93 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:119 print_address_description mm/kasan/report.c:377 [inline] print_report+0xc3/0x620 mm/kasan/report.c:488 kasan_report+0xd9/0x110 mm/kasan/report.c:601 l2cap_connect.constprop.0+0x10d8/0x1270 net/bluetooth/l2cap_core.c:3949 l2cap_connect_req net/bluetooth/l2cap_core.c:4080 [inline] l2cap_bredr_sig_cmd net/bluetooth/l2cap_core.c:4772 [inline] l2cap_sig_channel net/bluetooth/l2cap_core.c:5543 [inline] l2cap_recv_frame+0xf0b/0x8eb0 net/bluetooth/l2cap_core.c:6825 l2cap_recv_acldata+0x9b4/0xb70 net/bluetooth/l2cap_core.c:7514 hci_acldata_packet net/bluetooth/hci_core.c:3791 [inline] hci_rx_work+0xaab/0x1610 net/bluetooth/hci_core.c:4028 process_one_work+0x9c5/0x1b40 kernel/workqueue.c:3231 process_scheduled_works kernel/workqueue.c:3312 [inline] worker_thread+0x6c8/0xed0 kernel/workqueue.c:3389 kthread+0x2c1/0x3a0 kernel/kthread.c:389 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 ... Freed by task 5245: kasan_save_stack+0x33/0x60 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 kasan_save_free_info+0x3b/0x60 mm/kasan/generic.c:579 poison_slab_object+0xf7/0x160 mm/kasan/common.c:240 __kasan_slab_free+0x32/0x50 mm/kasan/common.c:256 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2256 [inline] slab_free mm/slub.c:4477 [inline] kfree+0x12a/0x3b0 mm/slub.c:4598 l2cap_conn_free net/bluetooth/l2cap_core.c:1810 [inline] kref_put include/linux/kref.h:65 [inline] l2cap_conn_put net/bluetooth/l2cap_core.c:1822 [inline] l2cap_conn_del+0x59d/0x730 net/bluetooth/l2cap_core.c:1802 l2cap_connect_cfm+0x9e6/0xf80 net/bluetooth/l2cap_core.c:7241 hci_connect_cfm include/net/bluetooth/hci_core.h:1960 [inline] hci_conn_failed+0x1c3/0x370 net/bluetooth/hci_conn.c:1265 hci_abort_conn_sync+0x75a/0xb50 net/bluetooth/hci_sync.c:5583 abort_conn_sync+0x197/0x360 net/bluetooth/hci_conn.c:2917 hci_cmd_sync_work+0x1a4/0x410 net/bluetooth/hci_sync.c:328 process_one_work+0x9c5/0x1b40 kernel/workqueue.c:3231 process_scheduled_works kernel/workqueue.c:3312 [inline] worker_thread+0x6c8/0xed0 kernel/workqueue.c:3389 kthread+0x2c1/0x3a0 kernel/kthread.c:389 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Reported-by: syzbot+c12e2f941af1feb5632c@syzkaller.appspotmail.com Tested-by: syzbot+c12e2f941af1feb5632c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c12e2f941af1feb5632c Fixes: 7b064edae38d ("Bluetooth: Fix authentication if acl data comes before remote feature evt") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin [Xiangyu: Modified to bp this commit to fix CVE-2024-49950] Signed-off-by: Xiangyu Chen Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_core.c | 2 ++ net/bluetooth/hci_event.c | 2 +- net/bluetooth/l2cap_core.c | 9 --------- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 789f7f4a0908..f93f3e7a3d90 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3846,6 +3846,8 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, handle); + if (conn && hci_dev_test_flag(hdev, HCI_MGMT)) + mgmt_device_connected(hdev, conn, NULL, 0); hci_dev_unlock(hdev); if (conn) { diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ac9bdfac773f..7c1df481ebe9 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3789,7 +3789,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev, void *data, goto unlock; } - if (!ev->status && !test_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) { + if (!ev->status) { struct hci_cp_remote_name_req cp; memset(&cp, 0, sizeof(cp)); bacpy(&cp.bdaddr, &conn->dst); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 209c6d458d33..187c91843876 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4300,18 +4300,9 @@ sendresp: static int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { - struct hci_dev *hdev = conn->hcon->hdev; - struct hci_conn *hcon = conn->hcon; - if (cmd_len < sizeof(struct l2cap_conn_req)) return -EPROTO; - hci_dev_lock(hdev); - if (hci_dev_test_flag(hdev, HCI_MGMT) && - !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags)) - mgmt_device_connected(hdev, hcon, NULL, 0); - hci_dev_unlock(hdev); - l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP, 0); return 0; } From 486aeb5f1855c75dd810c25036134961bd2a6722 Mon Sep 17 00:00:00 2001 From: Qun-Wei Lin Date: Fri, 25 Oct 2024 16:58:11 +0800 Subject: [PATCH 170/174] mm: krealloc: Fix MTE false alarm in __do_krealloc commit 704573851b51808b45dae2d62059d1d8189138a2 upstream. This patch addresses an issue introduced by commit 1a83a716ec233 ("mm: krealloc: consider spare memory for __GFP_ZERO") which causes MTE (Memory Tagging Extension) to falsely report a slab-out-of-bounds error. The problem occurs when zeroing out spare memory in __do_krealloc. The original code only considered software-based KASAN and did not account for MTE. It does not reset the KASAN tag before calling memset, leading to a mismatch between the pointer tag and the memory tag, resulting in a false positive. Example of the error: ================================================================== swapper/0: BUG: KASAN: slab-out-of-bounds in __memset+0x84/0x188 swapper/0: Write at addr f4ffff8005f0fdf0 by task swapper/0/1 swapper/0: Pointer tag: [f4], memory tag: [fe] swapper/0: swapper/0: CPU: 4 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12. swapper/0: Hardware name: MT6991(ENG) (DT) swapper/0: Call trace: swapper/0: dump_backtrace+0xfc/0x17c swapper/0: show_stack+0x18/0x28 swapper/0: dump_stack_lvl+0x40/0xa0 swapper/0: print_report+0x1b8/0x71c swapper/0: kasan_report+0xec/0x14c swapper/0: __do_kernel_fault+0x60/0x29c swapper/0: do_bad_area+0x30/0xdc swapper/0: do_tag_check_fault+0x20/0x34 swapper/0: do_mem_abort+0x58/0x104 swapper/0: el1_abort+0x3c/0x5c swapper/0: el1h_64_sync_handler+0x80/0xcc swapper/0: el1h_64_sync+0x68/0x6c swapper/0: __memset+0x84/0x188 swapper/0: btf_populate_kfunc_set+0x280/0x3d8 swapper/0: __register_btf_kfunc_id_set+0x43c/0x468 swapper/0: register_btf_kfunc_id_set+0x48/0x60 swapper/0: register_nf_nat_bpf+0x1c/0x40 swapper/0: nf_nat_init+0xc0/0x128 swapper/0: do_one_initcall+0x184/0x464 swapper/0: do_initcall_level+0xdc/0x1b0 swapper/0: do_initcalls+0x70/0xc0 swapper/0: do_basic_setup+0x1c/0x28 swapper/0: kernel_init_freeable+0x144/0x1b8 swapper/0: kernel_init+0x20/0x1a8 swapper/0: ret_from_fork+0x10/0x20 ================================================================== Fixes: 1a83a716ec233 ("mm: krealloc: consider spare memory for __GFP_ZERO") Signed-off-by: Qun-Wei Lin Acked-by: David Rientjes Signed-off-by: Vlastimil Babka Signed-off-by: Greg Kroah-Hartman --- mm/slab_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index a14d655bddc6..d89e0846cb16 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1325,7 +1325,7 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags) /* Zero out spare memory. */ if (want_init_on_alloc(flags)) { kasan_disable_current(); - memset((void *)p + new_size, 0, ks - new_size); + memset(kasan_reset_tag(p) + new_size, 0, ks - new_size); kasan_enable_current(); } From ba0b09a2f327319e252d8f3032019b958c0a5cd9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 5 Oct 2024 15:05:45 +0200 Subject: [PATCH 171/174] platform/x86: x86-android-tablets: Fix use after free on platform_device_register() errors commit 2fae3129c0c08e72b1fe93e61fd8fd203252094a upstream. x86_android_tablet_remove() frees the pdevs[] array, so it should not be used after calling x86_android_tablet_remove(). When platform_device_register() fails, store the pdevs[x] PTR_ERR() value into the local ret variable before calling x86_android_tablet_remove() to avoid using pdevs[] after it has been freed. Fixes: 5eba0141206e ("platform/x86: x86-android-tablets: Add support for instantiating platform-devs") Fixes: e2200d3f26da ("platform/x86: x86-android-tablets: Add gpio_keys support to x86_android_tablet_init()") Cc: stable@vger.kernel.org Reported-by: Aleksandr Burakov Closes: https://lore.kernel.org/platform-driver-x86/20240917120458.7300-1-a.burakov@rosalinux.ru/ Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20241005130545.64136-1-hdegoede@redhat.com [Xiangyu: Modified file path to backport this commit to fix CVE: CVE-2024-49986] Signed-off-by: Xiangyu Chen Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/x86-android-tablets.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/x86-android-tablets.c b/drivers/platform/x86/x86-android-tablets.c index 9178076d9d7d..94710471d7dd 100644 --- a/drivers/platform/x86/x86-android-tablets.c +++ b/drivers/platform/x86/x86-android-tablets.c @@ -1853,8 +1853,9 @@ static __init int x86_android_tablet_init(void) for (i = 0; i < pdev_count; i++) { pdevs[i] = platform_device_register_full(&dev_info->pdev_info[i]); if (IS_ERR(pdevs[i])) { + ret = PTR_ERR(pdevs[i]); x86_android_tablet_cleanup(); - return PTR_ERR(pdevs[i]); + return ret; } } From 509c1c6b499a4d9026b58c6e1c3a10ed8db1839f Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Aug 2024 13:50:18 +0300 Subject: [PATCH 172/174] fs/ntfs3: Fix general protection fault in run_is_mapped_full commit a33fb016e49e37aafab18dc3c8314d6399cb4727 upstream. Fixed deleating of a non-resident attribute in ntfs_create_inode() rollback. Reported-by: syzbot+9af29acd8f27fbce94bc@syzkaller.appspotmail.com Signed-off-by: Konstantin Komarov Signed-off-by: Bin Lan Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/inode.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 026ed43c0670..057aa3cec902 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -1646,6 +1646,15 @@ out7: le16_to_cpu(new_de->key_size), sbi); /* ni_unlock(dir_ni); will be called later. */ out6: + attr = ni_find_attr(ni, NULL, NULL, ATTR_EA, NULL, 0, NULL, NULL); + if (attr && attr->non_res) { + /* Delete ATTR_EA, if non-resident. */ + struct runs_tree run; + run_init(&run); + attr_set_size(ni, ATTR_EA, NULL, 0, &run, 0, NULL, false, NULL); + run_close(&run); + } + if (rp_inserted) ntfs_remove_reparse(sbi, IO_REPARSE_TAG_SYMLINK, &new_de->ref); From 6a8d39ee34dc7fcdea395239b745bc3ea15336cf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 21 Oct 2024 11:57:38 -0700 Subject: [PATCH 173/174] 9p: fix slab cache name creation for real commit a360f311f57a36e96d88fa8086b749159714dcd2 upstream. This was attempted by using the dev_name in the slab cache name, but as Omar Sandoval pointed out, that can be an arbitrary string, eg something like "/dev/root". Which in turn trips verify_dirent_name(), which fails if a filename contains a slash. So just make it use a sequence counter, and make it an atomic_t to avoid any possible races or locking issues. Reported-and-tested-by: Omar Sandoval Link: https://lore.kernel.org/all/ZxafcO8KWMlXaeWE@telecaster.dhcp.thefacebook.com/ Fixes: 79efebae4afc ("9p: Avoid creating multiple slab caches with the same name") Acked-by: Vlastimil Babka Cc: Dominique Martinet Cc: Thorsten Leemhuis Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- net/9p/client.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/9p/client.c b/net/9p/client.c index 18db0e23e2f1..e876d6fea2fc 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -967,6 +967,7 @@ error: struct p9_client *p9_client_create(const char *dev_name, char *options) { int err; + static atomic_t seqno = ATOMIC_INIT(0); struct p9_client *clnt; char *client_id; char *cache_name; @@ -1027,7 +1028,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (err) goto close_trans; - cache_name = kasprintf(GFP_KERNEL, "9p-fcall-cache-%s", dev_name); + cache_name = kasprintf(GFP_KERNEL, + "9p-fcall-cache-%u", atomic_inc_return(&seqno)); if (!cache_name) { err = -ENOMEM; goto close_trans; From b67dc5c9ade9dc354b790eb64aa6a665d0a54ecd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 17 Nov 2024 15:07:22 +0100 Subject: [PATCH 174/174] Linux 6.1.118 Link: https://lore.kernel.org/r/20241115063722.599985562@linuxfoundation.org Tested-by: Peter Schneider Tested-by: Jon Hunter Tested-by: SeongJae Park Tested-by: Salvatore Bonaccorso Tested-by: Florian Fainelli Tested-by: Mark Brown Tested-by: Ron Economos Tested-by: Linux Kernel Functional Testing Tested-by: Shuah Khan Tested-by: Pavel Machek (CIP) Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c88229bb9626..ca304cece572 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 117 +SUBLEVEL = 118 EXTRAVERSION = NAME = Curry Ramen