From 65ae99b51e79ab83510dbfc22ff6cbd63628ec57 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 2 Aug 2024 14:55:55 +0100 Subject: [PATCH 01/71] clockevents/drivers/i8253: Fix stop sequence for timer 0 commit 531b2ca0a940ac9db03f246c8b77c4201de72b00 upstream. According to the data sheet, writing the MODE register should stop the counter (and thus the interrupts). This appears to work on real hardware, at least modern Intel and AMD systems. It should also work on Hyper-V. However, on some buggy virtual machines the mode change doesn't have any effect until the counter is subsequently loaded (or perhaps when the IRQ next fires). So, set MODE 0 and then load the counter, to ensure that those buggy VMs do the right thing and the interrupts stop. And then write MODE 0 *again* to stop the counter on compliant implementations too. Apparently, Hyper-V keeps firing the IRQ *repeatedly* even in mode zero when it should only happen once, but the second MODE write stops that too. Userspace test program (mostly written by tglx): ===== #include #include #include #include #include static __always_inline void __out##bwl(type value, uint16_t port) \ { \ asm volatile("out" #bwl " %" #bw "0, %w1" \ : : "a"(value), "Nd"(port)); \ } \ \ static __always_inline type __in##bwl(uint16_t port) \ { \ type value; \ asm volatile("in" #bwl " %w1, %" #bw "0" \ : "=a"(value) : "Nd"(port)); \ return value; \ } BUILDIO(b, b, uint8_t) #define inb __inb #define outb __outb #define PIT_MODE 0x43 #define PIT_CH0 0x40 #define PIT_CH2 0x42 static int is8254; static void dump_pit(void) { if (is8254) { // Latch and output counter and status outb(0xC2, PIT_MODE); printf("%02x %02x %02x\n", inb(PIT_CH0), inb(PIT_CH0), inb(PIT_CH0)); } else { // Latch and output counter outb(0x0, PIT_MODE); printf("%02x %02x\n", inb(PIT_CH0), inb(PIT_CH0)); } } int main(int argc, char* argv[]) { int nr_counts = 2; if (argc > 1) nr_counts = atoi(argv[1]); if (argc > 2) is8254 = 1; if (ioperm(0x40, 4, 1) != 0) return 1; dump_pit(); printf("Set oneshot\n"); outb(0x38, PIT_MODE); outb(0x00, PIT_CH0); outb(0x0F, PIT_CH0); dump_pit(); usleep(1000); dump_pit(); printf("Set periodic\n"); outb(0x34, PIT_MODE); outb(0x00, PIT_CH0); outb(0x0F, PIT_CH0); dump_pit(); usleep(1000); dump_pit(); dump_pit(); usleep(100000); dump_pit(); usleep(100000); dump_pit(); printf("Set stop (%d counter writes)\n", nr_counts); outb(0x30, PIT_MODE); while (nr_counts--) outb(0xFF, PIT_CH0); dump_pit(); usleep(100000); dump_pit(); usleep(100000); dump_pit(); printf("Set MODE 0\n"); outb(0x30, PIT_MODE); dump_pit(); usleep(100000); dump_pit(); usleep(100000); dump_pit(); return 0; } ===== Suggested-by: Sean Christopherson Co-developed-by: Li RongQing Signed-off-by: Li RongQing Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Link: https://lore.kernel.org/all/20240802135555.564941-2-dwmw2@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mshyperv.c | 11 ----------- drivers/clocksource/i8253.c | 36 +++++++++++++++++++++++----------- include/linux/i8253.h | 1 - 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 6090fe513d40..100fed2afbe7 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -461,16 +460,6 @@ static void __init ms_hyperv_init_platform(void) if (efi_enabled(EFI_BOOT)) x86_platform.get_nmi_reason = hv_get_nmi_reason; - /* - * Hyper-V VMs have a PIT emulation quirk such that zeroing the - * counter register during PIT shutdown restarts the PIT. So it - * continues to interrupt @18.2 HZ. Setting i8253_clear_counter - * to false tells pit_shutdown() not to zero the counter so that - * the PIT really is shutdown. Generation 2 VMs don't have a PIT, - * and setting this value has no effect. - */ - i8253_clear_counter_on_shutdown = false; - #if IS_ENABLED(CONFIG_HYPERV) /* * Setup the hook to get control post apic initialization. diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c index cb215e6f2e83..39f7c2d736d1 100644 --- a/drivers/clocksource/i8253.c +++ b/drivers/clocksource/i8253.c @@ -20,13 +20,6 @@ DEFINE_RAW_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); -/* - * Handle PIT quirk in pit_shutdown() where zeroing the counter register - * restarts the PIT, negating the shutdown. On platforms with the quirk, - * platform specific code can set this to false. - */ -bool i8253_clear_counter_on_shutdown __ro_after_init = true; - #ifdef CONFIG_CLKSRC_I8253 /* * Since the PIT overflows every tick, its not very useful @@ -112,12 +105,33 @@ void clockevent_i8253_disable(void) { raw_spin_lock(&i8253_lock); + /* + * Writing the MODE register should stop the counter, according to + * the datasheet. This appears to work on real hardware (well, on + * modern Intel and AMD boxes; I didn't dig the Pegasos out of the + * shed). + * + * However, some virtual implementations differ, and the MODE change + * doesn't have any effect until either the counter is written (KVM + * in-kernel PIT) or the next interrupt (QEMU). And in those cases, + * it may not stop the *count*, only the interrupts. Although in + * the virt case, that probably doesn't matter, as the value of the + * counter will only be calculated on demand if the guest reads it; + * it's the interrupts which cause steal time. + * + * Hyper-V apparently has a bug where even in mode 0, the IRQ keeps + * firing repeatedly if the counter is running. But it *does* do the + * right thing when the MODE register is written. + * + * So: write the MODE and then load the counter, which ensures that + * the IRQ is stopped on those buggy virt implementations. And then + * write the MODE again, which is the right way to stop it. + */ outb_p(0x30, PIT_MODE); + outb_p(0, PIT_CH0); + outb_p(0, PIT_CH0); - if (i8253_clear_counter_on_shutdown) { - outb_p(0, PIT_CH0); - outb_p(0, PIT_CH0); - } + outb_p(0x30, PIT_MODE); raw_spin_unlock(&i8253_lock); } diff --git a/include/linux/i8253.h b/include/linux/i8253.h index bf169cfef7f1..56c280eb2d4f 100644 --- a/include/linux/i8253.h +++ b/include/linux/i8253.h @@ -21,7 +21,6 @@ #define PIT_LATCH ((PIT_TICK_RATE + HZ/2) / HZ) extern raw_spinlock_t i8253_lock; -extern bool i8253_clear_counter_on_shutdown; extern struct clock_event_device i8253_clockevent; extern void clockevent_i8253_init(bool oneshot); extern void clockevent_i8253_disable(void); From ba181019d1a8c2ebebce1c2486fe2e90198d389b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 11 Apr 2024 16:39:05 +0200 Subject: [PATCH 02/71] sched/isolation: Prevent boot crash when the boot CPU is nohz_full Documentation/timers/no_hz.rst states that the "nohz_full=" mask must not include the boot CPU, which is no longer true after: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full"). However after: aae17ebb53cd ("workqueue: Avoid using isolated cpus' timers on queue_delayed_work") the kernel will crash at boot time in this case; housekeeping_any_cpu() returns an invalid CPU number until smp_init() brings the first housekeeping CPU up. Change housekeeping_any_cpu() to check the result of cpumask_any_and() and return smp_processor_id() in this case. This is just the simple and backportable workaround which fixes the symptom, but smp_processor_id() at boot time should be safe at least for type == HK_TYPE_TIMER, this more or less matches the tick_do_timer_boot_cpu logic. There is no worry about cpu_down(); tick_nohz_cpu_down() will not allow to offline tick_do_timer_cpu (the 1st online housekeeping CPU). [ Apply only documentation changes as commit which causes boot crash when boot CPU is nohz_full is not backported to stable kernels - Krishanth ] Reported-by: Chris von Recklinghausen Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Reviewed-by: Phil Auld Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20240411143905.GA19288@redhat.com Closes: https://lore.kernel.org/all/20240402105847.GA24832@redhat.com/ Signed-off-by: Krishanth Jagaduri [ strip out upstream commit and Fixes: so tools don't get confused that this commit actually does anything real - gregkh] Signed-off-by: Greg Kroah-Hartman --- Documentation/timers/no_hz.rst | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Documentation/timers/no_hz.rst b/Documentation/timers/no_hz.rst index f8786be15183..7fe8ef9718d8 100644 --- a/Documentation/timers/no_hz.rst +++ b/Documentation/timers/no_hz.rst @@ -129,11 +129,8 @@ adaptive-tick CPUs: At least one non-adaptive-tick CPU must remain online to handle timekeeping tasks in order to ensure that system calls like gettimeofday() returns accurate values on adaptive-tick CPUs. (This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running -user processes to observe slight drifts in clock rate.) Therefore, the -boot CPU is prohibited from entering adaptive-ticks mode. Specifying a -"nohz_full=" mask that includes the boot CPU will result in a boot-time -error message, and the boot CPU will be removed from the mask. Note that -this means that your system must have at least two CPUs in order for +user processes to observe slight drifts in clock rate.) Note that this +means that your system must have at least two CPUs in order for CONFIG_NO_HZ_FULL=y to do anything for you. Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded. From 4c3712c15f78138f06ffe3a6fb6680aabb7e6624 Mon Sep 17 00:00:00 2001 From: Felix Moessbauer Date: Wed, 14 Aug 2024 14:10:32 +0200 Subject: [PATCH 03/71] hrtimer: Use and report correct timerslack values for realtime tasks commit ed4fb6d7ef68111bb539283561953e5c6e9a6e38 upstream. The timerslack_ns setting is used to specify how much the hardware timers should be delayed, to potentially dispatch multiple timers in a single interrupt. This is a performance optimization. Timers of realtime tasks (having a realtime scheduling policy) should not be delayed. This logic was inconsitently applied to the hrtimers, leading to delays of realtime tasks which used timed waits for events (e.g. condition variables). Due to the downstream override of the slack for rt tasks, the procfs reported incorrect (non-zero) timerslack_ns values. This is changed by setting the timer_slack_ns task attribute to 0 for all tasks with a rt policy. By that, downstream users do not need to specially handle rt tasks (w.r.t. the slack), and the procfs entry shows the correct value of "0". Setting non-zero slack values (either via procfs or PR_SET_TIMERSLACK) on tasks with a rt policy is ignored, as stated in "man 2 PR_SET_TIMERSLACK": Timer slack is not applied to threads that are scheduled under a real-time scheduling policy (see sched_setscheduler(2)). The special handling of timerslack on rt tasks in downstream users is removed as well. Signed-off-by: Felix Moessbauer Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240814121032.368444-2-felix.moessbauer@siemens.com Signed-off-by: Greg Kroah-Hartman --- fs/proc/base.c | 9 +++++---- fs/select.c | 11 ++++------- kernel/sched/core.c | 8 ++++++++ kernel/sys.c | 2 ++ kernel/time/hrtimer.c | 18 +++--------------- 5 files changed, 22 insertions(+), 26 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index ecc45389ea79..82e4a8805bae 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2633,10 +2633,11 @@ static ssize_t timerslack_ns_write(struct file *file, const char __user *buf, } task_lock(p); - if (slack_ns == 0) - p->timer_slack_ns = p->default_timer_slack_ns; - else - p->timer_slack_ns = slack_ns; + if (task_is_realtime(p)) + slack_ns = 0; + else if (slack_ns == 0) + slack_ns = p->default_timer_slack_ns; + p->timer_slack_ns = slack_ns; task_unlock(p); out: diff --git a/fs/select.c b/fs/select.c index 3f730b8581f6..e66b6189845e 100644 --- a/fs/select.c +++ b/fs/select.c @@ -77,19 +77,16 @@ u64 select_estimate_accuracy(struct timespec64 *tv) { u64 ret; struct timespec64 now; + u64 slack = current->timer_slack_ns; - /* - * Realtime tasks get a slack of 0 for obvious reasons. - */ - - if (rt_task(current)) + if (slack == 0) return 0; ktime_get_ts64(&now); now = timespec64_sub(*tv, now); ret = __estimate_accuracy(&now); - if (ret < current->timer_slack_ns) - return current->timer_slack_ns; + if (ret < slack) + return slack; return ret; } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0a483fd9f5de..9be8a509b5f3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7380,6 +7380,14 @@ static void __setscheduler_params(struct task_struct *p, else if (fair_policy(policy)) p->static_prio = NICE_TO_PRIO(attr->sched_nice); + /* rt-policy tasks do not have a timerslack */ + if (task_is_realtime(p)) { + p->timer_slack_ns = 0; + } else if (p->timer_slack_ns == 0) { + /* when switching back to non-rt policy, restore timerslack */ + p->timer_slack_ns = p->default_timer_slack_ns; + } + /* * __sched_setscheduler() ensures attr->sched_priority == 0 when * !rt_policy. Always setting this ensures that things like diff --git a/kernel/sys.c b/kernel/sys.c index d06eda1387b6..06a9a87a8d3e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2477,6 +2477,8 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, error = current->timer_slack_ns; break; case PR_SET_TIMERSLACK: + if (task_is_realtime(current)) + break; if (arg2 <= 0) current->timer_slack_ns = current->default_timer_slack_ns; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 8db65e2db14c..f6d799646dd9 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2090,14 +2090,9 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, struct restart_block *restart; struct hrtimer_sleeper t; int ret = 0; - u64 slack; - - slack = current->timer_slack_ns; - if (dl_task(current) || rt_task(current)) - slack = 0; hrtimer_init_sleeper_on_stack(&t, clockid, mode); - hrtimer_set_expires_range_ns(&t.timer, rqtp, slack); + hrtimer_set_expires_range_ns(&t.timer, rqtp, current->timer_slack_ns); ret = do_nanosleep(&t, mode); if (ret != -ERESTART_RESTARTBLOCK) goto out; @@ -2278,7 +2273,7 @@ void __init hrtimers_init(void) /** * schedule_hrtimeout_range_clock - sleep until timeout * @expires: timeout value (ktime_t) - * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks + * @delta: slack in expires timeout (ktime_t) * @mode: timer mode * @clock_id: timer clock to be used */ @@ -2305,13 +2300,6 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, return -EINTR; } - /* - * Override any slack passed by the user if under - * rt contraints. - */ - if (rt_task(current)) - delta = 0; - hrtimer_init_sleeper_on_stack(&t, clock_id, mode); hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_sleeper_start_expires(&t, mode); @@ -2331,7 +2319,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); /** * schedule_hrtimeout_range - sleep until timeout * @expires: timeout value (ktime_t) - * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks + * @delta: slack in expires timeout (ktime_t) * @mode: timer mode * * Make the current task sleep until the given expiry time has From f9c572d02fcced59c68f0287680fef9a5e0f5c9a Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 16:25:25 +0400 Subject: [PATCH 04/71] fs/ntfs3: Fix shift-out-of-bounds in ntfs_fill_super commit 91a4b1ee78cb100b19b70f077c247f211110348f upstream. Reported-by: syzbot+478c1bf0e6bf4a8f3a04@syzkaller.appspotmail.com Signed-off-by: Konstantin Komarov Signed-off-by: Miguel Garcia Roman Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/ntfs_fs.h | 2 ++ fs/ntfs3/super.c | 68 +++++++++++++++++++++++++++++++--------------- 2 files changed, 48 insertions(+), 22 deletions(-) diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 26dbe1b46fdd..cbbf1ccc38e3 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -42,9 +42,11 @@ enum utf16_endian; #define MINUS_ONE_T ((size_t)(-1)) /* Biggest MFT / smallest cluster */ #define MAXIMUM_BYTES_PER_MFT 4096 +#define MAXIMUM_SHIFT_BYTES_PER_MFT 12 #define NTFS_BLOCKS_PER_MFT_RECORD (MAXIMUM_BYTES_PER_MFT / 512) #define MAXIMUM_BYTES_PER_INDEX 4096 +#define MAXIMUM_SHIFT_BYTES_PER_INDEX 12 #define NTFS_BLOCKS_PER_INODE (MAXIMUM_BYTES_PER_INDEX / 512) /* NTFS specific error code when fixup failed. */ diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index eee54214f4a3..674a16c0c66b 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -680,7 +680,7 @@ static u32 true_sectors_per_clst(const struct NTFS_BOOT *boot) * ntfs_init_from_boot - Init internal info from on-disk boot sector. */ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, - u64 dev_size) + u64 dev_size) { struct ntfs_sb_info *sbi = sb->s_fs_info; int err; @@ -705,12 +705,12 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, /* 0x55AA is not mandaroty. Thanks Maxim Suhanov*/ /*if (0x55 != boot->boot_magic[0] || 0xAA != boot->boot_magic[1]) - * goto out; + * goto out; */ boot_sector_size = (u32)boot->bytes_per_sector[1] << 8; if (boot->bytes_per_sector[0] || boot_sector_size < SECTOR_SIZE || - !is_power_of_2(boot_sector_size)) { + !is_power_of_2(boot_sector_size)) { goto out; } @@ -733,15 +733,49 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, /* Check MFT record size. */ if ((boot->record_size < 0 && - SECTOR_SIZE > (2U << (-boot->record_size))) || - (boot->record_size >= 0 && !is_power_of_2(boot->record_size))) { + SECTOR_SIZE > (2U << (-boot->record_size))) || + (boot->record_size >= 0 && !is_power_of_2(boot->record_size))) { + goto out; + } + + /* Calculate cluster size */ + sbi->cluster_size = boot_sector_size * sct_per_clst; + sbi->cluster_bits = blksize_bits(sbi->cluster_size); + + if (boot->record_size >= 0) { + record_size = (u32)boot->record_size << sbi->cluster_bits; + } else if (-boot->record_size <= MAXIMUM_SHIFT_BYTES_PER_MFT) { + record_size = 1u << (-boot->record_size); + } else { + ntfs_err(sb, "%s: invalid record size %d.", "NTFS", + boot->record_size); + goto out; + } + + sbi->record_size = record_size; + sbi->record_bits = blksize_bits(record_size); + sbi->attr_size_tr = (5 * record_size >> 4); // ~320 bytes + + if (record_size > MAXIMUM_BYTES_PER_MFT) { + ntfs_err(sb, "Unsupported bytes per MFT record %u.", + record_size); + goto out; + } + + if (boot->index_size >= 0) { + sbi->index_size = (u32)boot->index_size << sbi->cluster_bits; + } else if (-boot->index_size <= MAXIMUM_SHIFT_BYTES_PER_INDEX) { + sbi->index_size = 1u << (-boot->index_size); + } else { + ntfs_err(sb, "%s: invalid index size %d.", "NTFS", + boot->index_size); goto out; } /* Check index record size. */ if ((boot->index_size < 0 && - SECTOR_SIZE > (2U << (-boot->index_size))) || - (boot->index_size >= 0 && !is_power_of_2(boot->index_size))) { + SECTOR_SIZE > (2U << (-boot->index_size))) || + (boot->index_size >= 0 && !is_power_of_2(boot->index_size))) { goto out; } @@ -762,9 +796,6 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, dev_size += sector_size - 1; } - sbi->cluster_size = boot_sector_size * sct_per_clst; - sbi->cluster_bits = blksize_bits(sbi->cluster_size); - sbi->mft.lbo = mlcn << sbi->cluster_bits; sbi->mft.lbo2 = mlcn2 << sbi->cluster_bits; @@ -785,9 +816,9 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, sbi->cluster_mask = sbi->cluster_size - 1; sbi->cluster_mask_inv = ~(u64)sbi->cluster_mask; sbi->record_size = record_size = boot->record_size < 0 - ? 1 << (-boot->record_size) - : (u32)boot->record_size - << sbi->cluster_bits; + ? 1 << (-boot->record_size) + : (u32)boot->record_size + << sbi->cluster_bits; if (record_size > MAXIMUM_BYTES_PER_MFT || record_size < SECTOR_SIZE) goto out; @@ -801,8 +832,8 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, ALIGN(sizeof(enum ATTR_TYPE), 8); sbi->index_size = boot->index_size < 0 - ? 1u << (-boot->index_size) - : (u32)boot->index_size << sbi->cluster_bits; + ? 1u << (-boot->index_size) + : (u32)boot->index_size << sbi->cluster_bits; sbi->volume.ser_num = le64_to_cpu(boot->serial_num); @@ -871,13 +902,6 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, sb->s_maxbytes = 0xFFFFFFFFull << sbi->cluster_bits; #endif - /* - * Compute the MFT zone at two steps. - * It would be nice if we are able to allocate 1/8 of - * total clusters for MFT but not more then 512 MB. - */ - sbi->zone_max = min_t(CLST, 0x20000000 >> sbi->cluster_bits, clusters >> 3); - err = 0; out: From 01e8a8111b3ab840352ecb80a25d6ff0691ae0dc Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 9 Feb 2025 15:52:52 -0800 Subject: [PATCH 05/71] fbdev: hyperv_fb: iounmap() the correct memory when removing a device [ Upstream commit 7241c886a71797cc51efc6fadec7076fcf6435c2 ] When a Hyper-V framebuffer device is removed, or the driver is unbound from a device, any allocated and/or mapped memory must be released. In particular, MMIO address space that was mapped to the framebuffer must be unmapped. Current code unmaps the wrong address, resulting in an error like: [ 4093.980597] iounmap: bad address 00000000c936c05c followed by a stack dump. Commit d21987d709e8 ("video: hyperv: hyperv_fb: Support deferred IO for Hyper-V frame buffer driver") changed the kind of address stored in info->screen_base, and the iounmap() call in hvfb_putmem() was not updated accordingly. Fix this by updating hvfb_putmem() to unmap the correct address. Fixes: d21987d709e8 ("video: hyperv: hyperv_fb: Support deferred IO for Hyper-V frame buffer driver") Signed-off-by: Michael Kelley Reviewed-by: Saurabh Sengar Link: https://lore.kernel.org/r/20250209235252.2987-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20250209235252.2987-1-mhklinux@outlook.com> Signed-off-by: Sasha Levin --- drivers/video/fbdev/hyperv_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index d3d643cf7506..41c496ff55cc 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -1106,7 +1106,7 @@ static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info) if (par->need_docopy) { vfree(par->dio_vp); - iounmap(info->screen_base); + iounmap(par->mmio_vp); vmbus_free_mmio(par->mem->start, screen_fb_size); } else { hvfb_release_phymem(hdev, info->fix.smem_start, From 5f7f8d9d46d7ba10b6a613e4a328d07fde680422 Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Fri, 7 Feb 2025 21:02:41 +0100 Subject: [PATCH 06/71] pinctrl: bcm281xx: Fix incorrect regmap max_registers value [ Upstream commit 68283c1cb573143c0b7515e93206f3503616bc10 ] The max_registers value does not take into consideration the stride; currently, it's set to the number of the last pin, but this does not accurately represent the final register. Fix this by multiplying the current value by 4. Fixes: 54b1aa5a5b16 ("ARM: pinctrl: Add Broadcom Capri pinctrl driver") Signed-off-by: Artur Weber Link: https://lore.kernel.org/20250207-bcm21664-pinctrl-v1-2-e7cfac9b2d3b@gmail.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/bcm/pinctrl-bcm281xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c index fd52a83387ef..bba5496335ee 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c @@ -971,7 +971,7 @@ static const struct regmap_config bcm281xx_pinctrl_regmap_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .max_register = BCM281XX_PIN_VC_CAM3_SDA, + .max_register = BCM281XX_PIN_VC_CAM3_SDA * 4, }; static int bcm281xx_pinctrl_get_groups_count(struct pinctrl_dev *pctldev) From 7a91926c76c05ddfa577b71a086e641adf8b0581 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 17 Feb 2025 17:02:42 +0100 Subject: [PATCH 07/71] netfilter: nft_ct: Use __refcount_inc() for per-CPU nft_ct_pcpu_template. [ Upstream commit 5cfe5612ca9590db69b9be29dc83041dbf001108 ] nft_ct_pcpu_template is a per-CPU variable and relies on disabled BH for its locking. The refcounter is read and if its value is set to one then the refcounter is incremented and variable is used - otherwise it is already in use and left untouched. Without per-CPU locking in local_bh_disable() on PREEMPT_RT the read-then-increment operation is not atomic and therefore racy. This can be avoided by using unconditionally __refcount_inc() which will increment counter and return the old value as an atomic operation. In case the returned counter is not one, the variable is in use and we need to decrement counter. Otherwise we can use it. Use __refcount_inc() instead of read and a conditional increment. Fixes: edee4f1e9245 ("netfilter: nft_ct: add zone id set support") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_ct.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 6157f8b4a3ce..3641043ca8cc 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -240,6 +240,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, enum ip_conntrack_info ctinfo; u16 value = nft_reg_load16(®s->data[priv->sreg]); struct nf_conn *ct; + int oldcnt; ct = nf_ct_get(skb, &ctinfo); if (ct) /* already tracked */ @@ -260,10 +261,11 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, ct = this_cpu_read(nft_ct_pcpu_template); - if (likely(refcount_read(&ct->ct_general.use) == 1)) { - refcount_inc(&ct->ct_general.use); + __refcount_inc(&ct->ct_general.use, &oldcnt); + if (likely(oldcnt == 1)) { nf_ct_zone_add(ct, &zone); } else { + refcount_dec(&ct->ct_general.use); /* previous skb got queued to userspace, allocate temporary * one until percpu template can be reused. */ From fcbacc47d16306c87ad1b820b7a575f6e9eae58b Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Thu, 23 Jan 2025 09:15:39 +0100 Subject: [PATCH 08/71] ice: fix memory leak in aRFS after reset [ Upstream commit 23d97f18901ef5e4e264e3b1777fe65c760186b5 ] Fix aRFS (accelerated Receive Flow Steering) structures memory leak by adding a checker to verify if aRFS memory is already allocated while configuring VSI. aRFS objects are allocated in two cases: - as part of VSI initialization (at probe), and - as part of reset handling However, VSI reconfiguration executed during reset involves memory allocation one more time, without prior releasing already allocated resources. This led to the memory leak with the following signature: [root@os-delivery ~]# cat /sys/kernel/debug/kmemleak unreferenced object 0xff3c1ca7252e6000 (size 8192): comm "kworker/0:0", pid 8, jiffies 4296833052 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): [] __kmalloc_cache_noprof+0x275/0x340 [] ice_init_arfs+0x3a/0xe0 [ice] [] ice_vsi_cfg_def+0x607/0x850 [ice] [] ice_vsi_setup+0x5b/0x130 [ice] [] ice_init+0x1c1/0x460 [ice] [] ice_probe+0x2af/0x520 [ice] [] local_pci_probe+0x43/0xa0 [] work_for_cpu_fn+0x13/0x20 [] process_one_work+0x179/0x390 [] worker_thread+0x239/0x340 [] kthread+0xcc/0x100 [] ret_from_fork+0x2d/0x50 [] ret_from_fork_asm+0x1a/0x30 ... Fixes: 28bf26724fdb ("ice: Implement aRFS") Reviewed-by: Michal Swiatkowski Signed-off-by: Grzegorz Nitka Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_arfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index fba178e07600..79074f041e39 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -510,7 +510,7 @@ void ice_init_arfs(struct ice_vsi *vsi) struct hlist_head *arfs_fltr_list; unsigned int i; - if (!vsi || vsi->type != ICE_VSI_PF) + if (!vsi || vsi->type != ICE_VSI_PF || ice_is_arfs_active(vsi)) return; arfs_fltr_list = kcalloc(ICE_MAX_ARFS_LIST, sizeof(*arfs_fltr_list), From 4fe956651221d8874d8ae4d942f73f3e2ad48d9a Mon Sep 17 00:00:00 2001 From: Nicklas Bo Jensen Date: Thu, 27 Feb 2025 13:32:34 +0000 Subject: [PATCH 09/71] netfilter: nf_conncount: garbage collection is not skipped when jiffies wrap around [ Upstream commit df08c94baafb001de6cf44bb7098bb557f36c335 ] nf_conncount is supposed to skip garbage collection if it has already run garbage collection in the same jiffy. Unfortunately, this is broken when jiffies wrap around which this patch fixes. The problem is that last_gc in the nf_conncount_list struct is an u32, but jiffies is an unsigned long which is 8 bytes on my systems. When those two are compared it only works until last_gc wraps around. See bug report: https://bugzilla.netfilter.org/show_bug.cgi?id=1778 for more details. Fixes: d265929930e2 ("netfilter: nf_conncount: reduce unnecessary GC") Signed-off-by: Nicklas Bo Jensen Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conncount.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 5885810da412..71869ad46646 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -132,7 +132,7 @@ static int __nf_conncount_add(struct net *net, struct nf_conn *found_ct; unsigned int collect = 0; - if (time_is_after_eq_jiffies((unsigned long)list->last_gc)) + if ((u32)jiffies == list->last_gc) goto add_new_node; /* check the saved connections */ @@ -234,7 +234,7 @@ bool nf_conncount_gc_list(struct net *net, bool ret = false; /* don't bother if we just did GC */ - if (time_is_after_eq_jiffies((unsigned long)READ_ONCE(list->last_gc))) + if ((u32)jiffies == READ_ONCE(list->last_gc)) return false; /* don't bother if other cpu is already doing GC */ From d02c9acd68950a444acda18d514e2b41f846cb7f Mon Sep 17 00:00:00 2001 From: Jun Yang Date: Wed, 5 Mar 2025 23:44:10 +0800 Subject: [PATCH 10/71] sched: address a potential NULL pointer dereference in the GRED scheduler. [ Upstream commit 115ef44a98220fddfab37a39a19370497cd718b9 ] If kzalloc in gred_init returns a NULL pointer, the code follows the error handling path, invoking gred_destroy. This, in turn, calls gred_offload, where memset could receive a NULL pointer as input, potentially leading to a kernel crash. When table->opt is NULL in gred_init(), gred_change_table_def() is not called yet, so it is not necessary to call ->ndo_setup_tc() in gred_offload(). Signed-off-by: Jun Yang Reviewed-by: Cong Wang Fixes: f25c0515c521 ("net: sched: gred: dynamically allocate tc_gred_qopt_offload") Link: https://patch.msgid.link/20250305154410.3505642-1-juny24602@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_gred.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 872d127c9db4..fa7a1b69c0f3 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -913,7 +913,8 @@ static void gred_destroy(struct Qdisc *sch) for (i = 0; i < table->DPs; i++) gred_destroy_vq(table->tab[i]); - gred_offload(sch, TC_GRED_DESTROY); + if (table->opt) + gred_offload(sch, TC_GRED_DESTROY); kfree(table->opt); } From 0272d4af7f92997541d8bbf4c51918b93ded6ee2 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 6 Mar 2025 12:37:59 +0200 Subject: [PATCH 11/71] wifi: cfg80211: cancel wiphy_work before freeing wiphy [ Upstream commit 72d520476a2fab6f3489e8388ab524985d6c4b90 ] A wiphy_work can be queued from the moment the wiphy is allocated and initialized (i.e. wiphy_new_nm). When a wiphy_work is queued, the rdev::wiphy_work is getting queued. If wiphy_free is called before the rdev::wiphy_work had a chance to run, the wiphy memory will be freed, and then when it eventally gets to run it'll use invalid memory. Fix this by canceling the work before freeing the wiphy. Fixes: a3ee4dc84c4e ("wifi: cfg80211: add a work abstraction with special semantics") Signed-off-by: Miri Korenblit Reviewed-by: Johannes Berg Link: https://patch.msgid.link/20250306123626.efd1d19f6e07.I48229f96f4067ef73f5b87302335e2fd750136c9@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/wireless/core.c b/net/wireless/core.c index 2bed30621fa6..74904f88edfa 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1146,6 +1146,13 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev) { struct cfg80211_internal_bss *scan, *tmp; struct cfg80211_beacon_registration *reg, *treg; + unsigned long flags; + + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); + WARN_ON(!list_empty(&rdev->wiphy_work_list)); + spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); + cancel_work_sync(&rdev->wiphy_work); + rfkill_destroy(rdev->wiphy.rfkill); list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) { list_del(®->list); From 79d50ce6583572775c765e2c3bd1386f31b23117 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 28 Feb 2025 13:12:54 -0500 Subject: [PATCH 12/71] Bluetooth: hci_event: Fix enabling passive scanning [ Upstream commit 0bdd88971519cfa8a76d1a4dde182e74cfbd5d5c ] Passive scanning shall only be enabled when disconnecting LE links, otherwise it may start result in triggering scanning when e.g. an ISO link disconnects: > HCI Event: LE Meta Event (0x3e) plen 29 LE Connected Isochronous Stream Established (0x19) Status: Success (0x00) Connection Handle: 257 CIG Synchronization Delay: 0 us (0x000000) CIS Synchronization Delay: 0 us (0x000000) Central to Peripheral Latency: 10000 us (0x002710) Peripheral to Central Latency: 10000 us (0x002710) Central to Peripheral PHY: LE 2M (0x02) Peripheral to Central PHY: LE 2M (0x02) Number of Subevents: 1 Central to Peripheral Burst Number: 1 Peripheral to Central Burst Number: 1 Central to Peripheral Flush Timeout: 2 Peripheral to Central Flush Timeout: 2 Central to Peripheral MTU: 320 Peripheral to Central MTU: 160 ISO Interval: 10.00 msec (0x0008) ... > HCI Event: Disconnect Complete (0x05) plen 4 Status: Success (0x00) Handle: 257 Reason: Remote User Terminated Connection (0x13) < HCI Command: LE Set Extended Scan Enable (0x08|0x0042) plen 6 Extended scan: Enabled (0x01) Filter duplicates: Enabled (0x01) Duration: 0 msec (0x0000) Period: 0.00 sec (0x0000) Fixes: 9fcb18ef3acb ("Bluetooth: Introduce LE auto connect options") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_event.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b6fe5e15981f..6cfd61628cd7 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3477,23 +3477,30 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data, hci_update_scan(hdev); } - params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); - if (params) { - switch (params->auto_connect) { - case HCI_AUTO_CONN_LINK_LOSS: - if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT) + /* Re-enable passive scanning if disconnected device is marked + * as auto-connectable. + */ + if (conn->type == LE_LINK) { + params = hci_conn_params_lookup(hdev, &conn->dst, + conn->dst_type); + if (params) { + switch (params->auto_connect) { + case HCI_AUTO_CONN_LINK_LOSS: + if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT) + break; + fallthrough; + + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: + hci_pend_le_list_del_init(params); + hci_pend_le_list_add(params, + &hdev->pend_le_conns); + hci_update_passive_scan(hdev); break; - fallthrough; - case HCI_AUTO_CONN_DIRECT: - case HCI_AUTO_CONN_ALWAYS: - hci_pend_le_list_del_init(params); - hci_pend_le_list_add(params, &hdev->pend_le_conns); - hci_update_passive_scan(hdev); - break; - - default: - break; + default: + break; + } } } From 55a173e49f94f154dcb3438ec903bc5f5e755cd2 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 4 Mar 2025 10:06:10 -0500 Subject: [PATCH 13/71] Revert "Bluetooth: hci_core: Fix sleeping function called from invalid context" [ Upstream commit ab6ab707a4d060a51c45fc13e3b2228d5f7c0b87 ] This reverts commit 4d94f05558271654670d18c26c912da0c1c15549 which has problems (see [1]) and is no longer needed since 581dd2dc168f ("Bluetooth: hci_event: Fix using rcu_read_(un)lock while iterating") has reworked the code where the original bug has been found. [1] Link: https://lore.kernel.org/linux-bluetooth/877c55ci1r.wl-tiwai@suse.de/T/#t Fixes: 4d94f0555827 ("Bluetooth: hci_core: Fix sleeping function called from invalid context") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_core.h | 108 +++++++++++-------------------- net/bluetooth/hci_core.c | 10 ++- net/bluetooth/iso.c | 6 -- net/bluetooth/l2cap_core.c | 12 ++-- net/bluetooth/rfcomm/core.c | 6 -- net/bluetooth/sco.c | 12 ++-- 6 files changed, 57 insertions(+), 97 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index b37e95554271..d26b57e87f7f 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -815,6 +815,7 @@ struct hci_conn_params { extern struct list_head hci_dev_list; extern struct list_head hci_cb_list; extern rwlock_t hci_dev_list_lock; +extern struct mutex hci_cb_list_lock; #define hci_dev_set_flag(hdev, nr) set_bit((nr), (hdev)->dev_flags) #define hci_dev_clear_flag(hdev, nr) clear_bit((nr), (hdev)->dev_flags) @@ -1768,47 +1769,24 @@ struct hci_cb { char *name; - bool (*match) (struct hci_conn *conn); void (*connect_cfm) (struct hci_conn *conn, __u8 status); void (*disconn_cfm) (struct hci_conn *conn, __u8 status); void (*security_cfm) (struct hci_conn *conn, __u8 status, - __u8 encrypt); + __u8 encrypt); void (*key_change_cfm) (struct hci_conn *conn, __u8 status); void (*role_switch_cfm) (struct hci_conn *conn, __u8 status, __u8 role); }; -static inline void hci_cb_lookup(struct hci_conn *conn, struct list_head *list) -{ - struct hci_cb *cb, *cpy; - - rcu_read_lock(); - list_for_each_entry_rcu(cb, &hci_cb_list, list) { - if (cb->match && cb->match(conn)) { - cpy = kmalloc(sizeof(*cpy), GFP_ATOMIC); - if (!cpy) - break; - - *cpy = *cb; - INIT_LIST_HEAD(&cpy->list); - list_add_rcu(&cpy->list, list); - } - } - rcu_read_unlock(); -} - static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) { - struct list_head list; - struct hci_cb *cb, *tmp; + struct hci_cb *cb; - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); - - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->connect_cfm) cb->connect_cfm(conn, status); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); if (conn->connect_cfm_cb) conn->connect_cfm_cb(conn, status); @@ -1816,43 +1794,22 @@ static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason) { - struct list_head list; - struct hci_cb *cb, *tmp; + struct hci_cb *cb; - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); - - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->disconn_cfm) cb->disconn_cfm(conn, reason); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); if (conn->disconn_cfm_cb) conn->disconn_cfm_cb(conn, reason); } -static inline void hci_security_cfm(struct hci_conn *conn, __u8 status, - __u8 encrypt) -{ - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); - - list_for_each_entry_safe(cb, tmp, &list, list) { - if (cb->security_cfm) - cb->security_cfm(conn, status, encrypt); - kfree(cb); - } - - if (conn->security_cfm_cb) - conn->security_cfm_cb(conn, status); -} - static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) { + struct hci_cb *cb; __u8 encrypt; if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) @@ -1860,11 +1817,20 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00; - hci_security_cfm(conn, status, encrypt); + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { + if (cb->security_cfm) + cb->security_cfm(conn, status, encrypt); + } + mutex_unlock(&hci_cb_list_lock); + + if (conn->security_cfm_cb) + conn->security_cfm_cb(conn, status); } static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) { + struct hci_cb *cb; __u8 encrypt; if (conn->state == BT_CONFIG) { @@ -1891,38 +1857,40 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) conn->sec_level = conn->pending_sec_level; } - hci_security_cfm(conn, status, encrypt); + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { + if (cb->security_cfm) + cb->security_cfm(conn, status, encrypt); + } + mutex_unlock(&hci_cb_list_lock); + + if (conn->security_cfm_cb) + conn->security_cfm_cb(conn, status); } static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status) { - struct list_head list; - struct hci_cb *cb, *tmp; + struct hci_cb *cb; - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); - - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->key_change_cfm) cb->key_change_cfm(conn, status); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); } static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status, __u8 role) { - struct list_head list; - struct hci_cb *cb, *tmp; + struct hci_cb *cb; - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); - - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->role_switch_cfm) cb->role_switch_cfm(conn, status, role); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); } static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 496dac042b9c..3cd7c212375f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -58,6 +58,7 @@ DEFINE_RWLOCK(hci_dev_list_lock); /* HCI callback list */ LIST_HEAD(hci_cb_list); +DEFINE_MUTEX(hci_cb_list_lock); /* HCI ID Numbering */ static DEFINE_IDA(hci_index_ida); @@ -2977,7 +2978,9 @@ int hci_register_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - list_add_tail_rcu(&cb->list, &hci_cb_list); + mutex_lock(&hci_cb_list_lock); + list_add_tail(&cb->list, &hci_cb_list); + mutex_unlock(&hci_cb_list_lock); return 0; } @@ -2987,8 +2990,9 @@ int hci_unregister_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - list_del_rcu(&cb->list); - synchronize_rcu(); + mutex_lock(&hci_cb_list_lock); + list_del(&cb->list); + mutex_unlock(&hci_cb_list_lock); return 0; } diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index f62df9097f5e..437cbeaa9619 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1579,11 +1579,6 @@ done: return lm; } -static bool iso_match(struct hci_conn *hcon) -{ - return hcon->type == ISO_LINK || hcon->type == LE_LINK; -} - static void iso_connect_cfm(struct hci_conn *hcon, __u8 status) { if (hcon->type != ISO_LINK) { @@ -1753,7 +1748,6 @@ drop: static struct hci_cb iso_cb = { .name = "ISO", - .match = iso_match, .connect_cfm = iso_connect_cfm, .disconn_cfm = iso_disconn_cfm, }; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 36d6122f2e12..21a79ef7092d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -8278,11 +8278,6 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, return NULL; } -static bool l2cap_match(struct hci_conn *hcon) -{ - return hcon->type == ACL_LINK || hcon->type == LE_LINK; -} - static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) { struct hci_dev *hdev = hcon->hdev; @@ -8290,6 +8285,9 @@ static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) struct l2cap_chan *pchan; u8 dst_type; + if (hcon->type != ACL_LINK && hcon->type != LE_LINK) + return; + BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status); if (status) { @@ -8354,6 +8352,9 @@ int l2cap_disconn_ind(struct hci_conn *hcon) static void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { + if (hcon->type != ACL_LINK && hcon->type != LE_LINK) + return; + BT_DBG("hcon %p reason %d", hcon, reason); l2cap_conn_del(hcon, bt_to_errno(reason)); @@ -8641,7 +8642,6 @@ drop: static struct hci_cb l2cap_cb = { .name = "L2CAP", - .match = l2cap_match, .connect_cfm = l2cap_connect_cfm, .disconn_cfm = l2cap_disconn_cfm, .security_cfm = l2cap_security_cfm, diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 1686fa60e278..4f54c7df3a94 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -2130,11 +2130,6 @@ static int rfcomm_run(void *unused) return 0; } -static bool rfcomm_match(struct hci_conn *hcon) -{ - return hcon->type == ACL_LINK; -} - static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) { struct rfcomm_session *s; @@ -2181,7 +2176,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) static struct hci_cb rfcomm_cb = { .name = "RFCOMM", - .match = rfcomm_match, .security_cfm = rfcomm_security_cfm }; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 127479bf475b..fe8728041ad0 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1367,13 +1367,11 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) return lm; } -static bool sco_match(struct hci_conn *hcon) -{ - return hcon->type == SCO_LINK || hcon->type == ESCO_LINK; -} - static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) { + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) + return; + BT_DBG("hcon %p bdaddr %pMR status %u", hcon, &hcon->dst, status); if (!status) { @@ -1388,6 +1386,9 @@ static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) { + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) + return; + BT_DBG("hcon %p reason %d", hcon, reason); sco_conn_del(hcon, bt_to_errno(reason)); @@ -1413,7 +1414,6 @@ drop: static struct hci_cb sco_cb = { .name = "SCO", - .match = sco_match, .connect_cfm = sco_connect_cfm, .disconn_cfm = sco_disconn_cfm, }; From 95b530407348f42134eb6c1591f0d12afca890df Mon Sep 17 00:00:00 2001 From: Joseph Huang Date: Thu, 6 Mar 2025 12:23:05 -0500 Subject: [PATCH 14/71] net: dsa: mv88e6xxx: Verify after ATU Load ops [ Upstream commit dc5340c3133a3ebe54853fd299116149e528cfaa ] ATU Load operations could fail silently if there's not enough space on the device to hold the new entry. When this happens, the symptom depends on the unknown flood settings. If unknown multicast flood is disabled, the multicast packets are dropped when the ATU table is full. If unknown multicast flood is enabled, the multicast packets will be flooded to all ports. Either way, IGMP snooping is broken when the ATU Load operation fails silently. Do a Read-After-Write verification after each fdb/mdb add operation to make sure that the operation was really successful, and return -ENOSPC otherwise. Fixes: defb05b9b9b4 ("net: dsa: mv88e6xxx: Add support for fdb_add, fdb_del, and fdb_getnext") Signed-off-by: Joseph Huang Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250306172306.3859214-1-Joseph.Huang@garmin.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6xxx/chip.c | 59 ++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d94b46316a11..af0565c3a364 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2107,13 +2107,11 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, return err; } -static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, - const unsigned char *addr, u16 vid, - u8 state) +static int mv88e6xxx_port_db_get(struct mv88e6xxx_chip *chip, + const unsigned char *addr, u16 vid, + u16 *fid, struct mv88e6xxx_atu_entry *entry) { - struct mv88e6xxx_atu_entry entry; struct mv88e6xxx_vtu_entry vlan; - u16 fid; int err; /* Ports have two private address databases: one for when the port is @@ -2124,7 +2122,7 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, * VLAN ID into the port's database used for VLAN-unaware bridging. */ if (vid == 0) { - fid = MV88E6XXX_FID_BRIDGED; + *fid = MV88E6XXX_FID_BRIDGED; } else { err = mv88e6xxx_vtu_get(chip, vid, &vlan); if (err) @@ -2134,14 +2132,39 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, if (!vlan.valid) return -EOPNOTSUPP; - fid = vlan.fid; + *fid = vlan.fid; } - entry.state = 0; - ether_addr_copy(entry.mac, addr); - eth_addr_dec(entry.mac); + entry->state = 0; + ether_addr_copy(entry->mac, addr); + eth_addr_dec(entry->mac); - err = mv88e6xxx_g1_atu_getnext(chip, fid, &entry); + return mv88e6xxx_g1_atu_getnext(chip, *fid, entry); +} + +static bool mv88e6xxx_port_db_find(struct mv88e6xxx_chip *chip, + const unsigned char *addr, u16 vid) +{ + struct mv88e6xxx_atu_entry entry; + u16 fid; + int err; + + err = mv88e6xxx_port_db_get(chip, addr, vid, &fid, &entry); + if (err) + return false; + + return entry.state && ether_addr_equal(entry.mac, addr); +} + +static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, + const unsigned char *addr, u16 vid, + u8 state) +{ + struct mv88e6xxx_atu_entry entry; + u16 fid; + int err; + + err = mv88e6xxx_port_db_get(chip, addr, vid, &fid, &entry); if (err) return err; @@ -2739,6 +2762,13 @@ static int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, mv88e6xxx_reg_lock(chip); err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid, MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC); + if (err) + goto out; + + if (!mv88e6xxx_port_db_find(chip, addr, vid)) + err = -ENOSPC; + +out: mv88e6xxx_reg_unlock(chip); return err; @@ -6502,6 +6532,13 @@ static int mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, mv88e6xxx_reg_lock(chip); err = mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid, MV88E6XXX_G1_ATU_DATA_STATE_MC_STATIC); + if (err) + goto out; + + if (!mv88e6xxx_port_db_find(chip, mdb->addr, mdb->vid)) + err = -ENOSPC; + +out: mv88e6xxx_reg_unlock(chip); return err; From c49e91520d6fb280af0e755d3e3005e7c251b510 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 6 Mar 2025 10:33:20 +0800 Subject: [PATCH 15/71] net: mctp i2c: Copy headers if cloned [ Upstream commit df8ce77ba8b7c012a3edd1ca7368b46831341466 ] Use skb_cow_head() prior to modifying the TX SKB. This is necessary when the SKB has been cloned, to avoid modifying other shared clones. Signed-off-by: Matt Johnston Fixes: f5b8abf9fc3d ("mctp i2c: MCTP I2C binding driver") Link: https://patch.msgid.link/20250306-matt-mctp-i2c-cow-v1-1-293827212681@codeconstruct.com.au Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/mctp/mctp-i2c.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c index 7635a8b3c35c..17619d011689 100644 --- a/drivers/net/mctp/mctp-i2c.c +++ b/drivers/net/mctp/mctp-i2c.c @@ -543,6 +543,7 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, struct mctp_i2c_hdr *hdr; struct mctp_hdr *mhdr; u8 lldst, llsrc; + int rc; if (len > MCTP_I2C_MAXMTU) return -EMSGSIZE; @@ -553,6 +554,10 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, lldst = *((u8 *)daddr); llsrc = *((u8 *)saddr); + rc = skb_cow_head(skb, sizeof(struct mctp_i2c_hdr)); + if (rc) + return rc; + skb_push(skb, sizeof(struct mctp_i2c_hdr)); skb_reset_mac_header(skb); hdr = (void *)skb_mac_header(skb); From 486033f5777e6a766c138bc3248e3d0cc6bc12a4 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 6 Mar 2025 05:16:18 -0800 Subject: [PATCH 16/71] netpoll: hold rcu read lock in __netpoll_send_skb() [ Upstream commit 505ead7ab77f289f12d8a68ac83da068e4d4408b ] The function __netpoll_send_skb() is being invoked without holding the RCU read lock. This oversight triggers a warning message when CONFIG_PROVE_RCU_LIST is enabled: net/core/netpoll.c:330 suspicious rcu_dereference_check() usage! netpoll_send_skb netpoll_send_udp write_ext_msg console_flush_all console_unlock vprintk_emit To prevent npinfo from disappearing unexpectedly, ensure that __netpoll_send_skb() is protected with the RCU read lock. Fixes: 2899656b494dcd1 ("netpoll: take rcu_read_lock_bh() in netpoll_send_skb_on_dev()") Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250306-netpoll_rcu_v2-v2-1-bc4f5c51742a@debian.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/netpoll.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 681eeb2b7399..657abbb7d0d7 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -326,6 +326,7 @@ static int netpoll_owner_active(struct net_device *dev) static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { netdev_tx_t status = NETDEV_TX_BUSY; + netdev_tx_t ret = NET_XMIT_DROP; struct net_device *dev; unsigned long tries; /* It is up to the caller to keep npinfo alive. */ @@ -334,11 +335,12 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) lockdep_assert_irqs_disabled(); dev = np->dev; + rcu_read_lock(); npinfo = rcu_dereference_bh(dev->npinfo); if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { dev_kfree_skb_irq(skb); - return NET_XMIT_DROP; + goto out; } /* don't get messages out of order, and no recursion */ @@ -377,7 +379,10 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } - return NETDEV_TX_OK; + ret = NETDEV_TX_OK; +out: + rcu_read_unlock(); + return ret; } netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) From c40cd24bfb9bfbb315c118ca14ebe6cf52e2dd1e Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 10 Feb 2025 11:34:41 -0800 Subject: [PATCH 17/71] drm/hyperv: Fix address space leak when Hyper-V DRM device is removed [ Upstream commit aed709355fd05ef747e1af24a1d5d78cd7feb81e ] When a Hyper-V DRM device is probed, the driver allocates MMIO space for the vram, and maps it cacheable. If the device removed, or in the error path for device probing, the MMIO space is released but no unmap is done. Consequently the kernel address space for the mapping is leaked. Fix this by adding iounmap() calls in the device removal path, and in the error path during device probing. Fixes: f1f63cbb705d ("drm/hyperv: Fix an error handling path in hyperv_vmbus_probe()") Fixes: a0ab5abced55 ("drm/hyperv : Removing the restruction of VRAM allocation with PCI bar size") Signed-off-by: Michael Kelley Reviewed-by: Saurabh Sengar Tested-by: Saurabh Sengar Link: https://lore.kernel.org/r/20250210193441.2414-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20250210193441.2414-1-mhklinux@outlook.com> Signed-off-by: Sasha Levin --- drivers/gpu/drm/hyperv/hyperv_drm_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c index 68050409dd26..499c7c8916df 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c @@ -157,6 +157,7 @@ static int hyperv_vmbus_probe(struct hv_device *hdev, return 0; err_free_mmio: + iounmap(hv->vram); vmbus_free_mmio(hv->mem->start, hv->fb_size); err_vmbus_close: vmbus_close(hdev->channel); @@ -175,6 +176,7 @@ static int hyperv_vmbus_remove(struct hv_device *hdev) vmbus_close(hdev->channel); hv_set_drvdata(hdev, NULL); + iounmap(hv->vram); vmbus_free_mmio(hv->mem->start, hv->fb_size); return 0; From 1c954950f848569f35e8a11e8c04422a83a95744 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 9 Mar 2025 20:52:08 -0700 Subject: [PATCH 18/71] Drivers: hv: vmbus: Don't release fb_mmio resource in vmbus_free_mmio() [ Upstream commit 73fe9073c0cc28056cb9de0c8a516dac070f1d1f ] The VMBus driver manages the MMIO space it owns via the hyperv_mmio resource tree. Because the synthetic video framebuffer portion of the MMIO space is initially setup by the Hyper-V host for each guest, the VMBus driver does an early reserve of that portion of MMIO space in the hyperv_mmio resource tree. It saves a pointer to that resource in fb_mmio. When a VMBus driver requests MMIO space and passes "true" for the "fb_overlap_ok" argument, the reserved framebuffer space is used if possible. In that case it's not necessary to do another request against the "shadow" hyperv_mmio resource tree because that resource was already requested in the early reserve steps. However, the vmbus_free_mmio() function currently does no special handling for the fb_mmio resource. When a framebuffer device is removed, or the driver is unbound, the current code for vmbus_free_mmio() releases the reserved resource, leaving fb_mmio pointing to memory that has been freed. If the same or another driver is subsequently bound to the device, vmbus_allocate_mmio() checks against fb_mmio, and potentially gets garbage. Furthermore a second unbind operation produces this "nonexistent resource" error because of the unbalanced behavior between vmbus_allocate_mmio() and vmbus_free_mmio(): [ 55.499643] resource: Trying to free nonexistent resource <0x00000000f0000000-0x00000000f07fffff> Fix this by adding logic to vmbus_free_mmio() to recognize when MMIO space in the fb_mmio reserved area would be released, and don't release it. This filtering ensures the fb_mmio resource always exists, and makes vmbus_free_mmio() more parallel with vmbus_allocate_mmio(). Fixes: be000f93e5d7 ("drivers:hv: Track allocations of children of hv_vmbus in private resource tree") Signed-off-by: Michael Kelley Tested-by: Saurabh Sengar Reviewed-by: Saurabh Sengar Link: https://lore.kernel.org/r/20250310035208.275764-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20250310035208.275764-1-mhklinux@outlook.com> Signed-off-by: Sasha Levin --- drivers/hv/vmbus_drv.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index a2191bc5c153..dfbfdbf9cbd7 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2409,12 +2409,25 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size) struct resource *iter; mutex_lock(&hyperv_mmio_lock); + + /* + * If all bytes of the MMIO range to be released are within the + * special case fb_mmio shadow region, skip releasing the shadow + * region since no corresponding __request_region() was done + * in vmbus_allocate_mmio(). + */ + if (fb_mmio && start >= fb_mmio->start && + (start + size - 1 <= fb_mmio->end)) + goto skip_shadow_release; + for (iter = hyperv_mmio; iter; iter = iter->sibling) { if ((iter->start >= start + size) || (iter->end <= start)) continue; __release_region(iter, start, size); } + +skip_shadow_release: release_mem_region(start, size); mutex_unlock(&hyperv_mmio_lock); From 1598307c914ba3d2642a2b03d1ff11efbdb7c6c2 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Fri, 7 Mar 2025 10:18:20 +0800 Subject: [PATCH 19/71] net/mlx5: handle errors in mlx5_chains_create_table() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit eab0396353be1c778eba1c0b5180176f04dd21ce ] In mlx5_chains_create_table(), the return value of mlx5_get_fdb_sub_ns() and mlx5_get_flow_namespace() must be checked to prevent NULL pointer dereferences. If either function fails, the function should log error message with mlx5_core_warn() and return error pointer. Fixes: 39ac237ce009 ("net/mlx5: E-Switch, Refactor chains and priorities") Signed-off-by: Wentao Liang Reviewed-by: Tariq Toukan Link: https://patch.msgid.link/20250307021820.2646-1-vulab@iscas.ac.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index df58cba37930..64c1071bece8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -196,6 +196,11 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains, ns = mlx5_get_flow_namespace(chains->dev, chains->ns); } + if (!ns) { + mlx5_core_warn(chains->dev, "Failed to get flow namespace\n"); + return ERR_PTR(-EOPNOTSUPP); + } + ft_attr.autogroup.num_reserved_entries = 2; ft_attr.autogroup.max_num_groups = chains->group_num; ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); From e8e3e03d69f2420eaa578199a65d281c58867105 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 9 Mar 2025 13:42:15 +0000 Subject: [PATCH 20/71] eth: bnxt: do not update checksum in bnxt_xdp_build_skb() [ Upstream commit c03e7d05aa0e2f7e9a9ce5ad8a12471a53f941dc ] The bnxt_rx_pkt() updates ip_summed value at the end if checksum offload is enabled. When the XDP-MB program is attached and it returns XDP_PASS, the bnxt_xdp_build_skb() is called to update skb_shared_info. The main purpose of bnxt_xdp_build_skb() is to update skb_shared_info, but it updates ip_summed value too if checksum offload is enabled. This is actually duplicate work. When the bnxt_rx_pkt() updates ip_summed value, it checks if ip_summed is CHECKSUM_NONE or not. It means that ip_summed should be CHECKSUM_NONE at this moment. But ip_summed may already be updated to CHECKSUM_UNNECESSARY in the XDP-MB-PASS path. So the by skb_checksum_none_assert() WARNS about it. This is duplicate work and updating ip_summed in the bnxt_xdp_build_skb() is not needed. Splat looks like: WARNING: CPU: 3 PID: 5782 at ./include/linux/skbuff.h:5155 bnxt_rx_pkt+0x479b/0x7610 [bnxt_en] Modules linked in: bnxt_re bnxt_en rdma_ucm rdma_cm iw_cm ib_cm ib_uverbs veth xt_nat xt_tcpudp xt_conntrack nft_chain_nat xt_MASQUERADE nf_] CPU: 3 UID: 0 PID: 5782 Comm: socat Tainted: G W 6.14.0-rc4+ #27 Tainted: [W]=WARN Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 0603 11/01/2021 RIP: 0010:bnxt_rx_pkt+0x479b/0x7610 [bnxt_en] Code: 54 24 0c 4c 89 f1 4c 89 ff c1 ea 1f ff d3 0f 1f 00 49 89 c6 48 85 c0 0f 84 4c e5 ff ff 48 89 c7 e8 ca 3d a0 c8 e9 8f f4 ff ff <0f> 0b f RSP: 0018:ffff88881ba09928 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 00000000c7590303 RCX: 0000000000000000 RDX: 1ffff1104e7d1610 RSI: 0000000000000001 RDI: ffff8881c91300b8 RBP: ffff88881ba09b28 R08: ffff888273e8b0d0 R09: ffff888273e8b070 R10: ffff888273e8b010 R11: ffff888278b0f000 R12: ffff888273e8b080 R13: ffff8881c9130e00 R14: ffff8881505d3800 R15: ffff888273e8b000 FS: 00007f5a2e7be080(0000) GS:ffff88881ba00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fff2e708ff8 CR3: 000000013e3b0000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ? __warn+0xcd/0x2f0 ? bnxt_rx_pkt+0x479b/0x7610 ? report_bug+0x326/0x3c0 ? handle_bug+0x53/0xa0 ? exc_invalid_op+0x14/0x50 ? asm_exc_invalid_op+0x16/0x20 ? bnxt_rx_pkt+0x479b/0x7610 ? bnxt_rx_pkt+0x3e41/0x7610 ? __pfx_bnxt_rx_pkt+0x10/0x10 ? napi_complete_done+0x2cf/0x7d0 __bnxt_poll_work+0x4e8/0x1220 ? __pfx___bnxt_poll_work+0x10/0x10 ? __pfx_mark_lock.part.0+0x10/0x10 bnxt_poll_p5+0x36a/0xfa0 ? __pfx_bnxt_poll_p5+0x10/0x10 __napi_poll.constprop.0+0xa0/0x440 net_rx_action+0x899/0xd00 ... Following ping.py patch adds xdp-mb-pass case. so ping.py is going to be able to reproduce this issue. Fixes: 1dc4c557bfed ("bnxt: adding bnxt_xdp_build_skb to build skb from multibuffer xdp_buff") Signed-off-by: Taehee Yoo Reviewed-by: Somnath Kotur Link: https://patch.msgid.link/20250309134219.91670-5-ap420073@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 11 ++--------- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 3 +-- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7d9677d0f730..393a983f6d69 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1994,7 +1994,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, if (!skb) goto oom_next_rx; } else { - skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, rxr->page_pool, &xdp, rxcmp1); + skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, + rxr->page_pool, &xdp); if (!skb) { /* we should be able to free the old skb here */ bnxt_xdp_buff_frags_free(rxr, &xdp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index aa56db138d6b..d9a7b85343a4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -462,20 +462,13 @@ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp) struct sk_buff * bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, - struct page_pool *pool, struct xdp_buff *xdp, - struct rx_cmp_ext *rxcmp1) + struct page_pool *pool, struct xdp_buff *xdp) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); if (!skb) return NULL; - skb_checksum_none_assert(skb); - if (RX_CMP_L4_CS_OK(rxcmp1)) { - if (bp->dev->features & NETIF_F_RXCSUM) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = RX_CMP_ENCAP(rxcmp1); - } - } + xdp_update_skb_shared_info(skb, num_frags, sinfo->xdp_frags_size, BNXT_RX_PAGE_SIZE * sinfo->nr_frags, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index ea430d6961df..ae8159c5f56c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -33,6 +33,5 @@ void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, struct xdp_buff *xdp); struct sk_buff *bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, struct page_pool *pool, - struct xdp_buff *xdp, - struct rx_cmp_ext *rxcmp1); + struct xdp_buff *xdp); #endif From af757f5ee3f754c5dceefb05c12ff37cb46fc682 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Wed, 5 Mar 2025 14:15:09 +0200 Subject: [PATCH 21/71] net: switchdev: Convert blocking notification chain to a raw one [ Upstream commit 62531a1effa87bdab12d5104015af72e60d926ff ] A blocking notification chain uses a read-write semaphore to protect the integrity of the chain. The semaphore is acquired for writing when adding / removing notifiers to / from the chain and acquired for reading when traversing the chain and informing notifiers about an event. In case of the blocking switchdev notification chain, recursive notifications are possible which leads to the semaphore being acquired twice for reading and to lockdep warnings being generated [1]. Specifically, this can happen when the bridge driver processes a SWITCHDEV_BRPORT_UNOFFLOADED event which causes it to emit notifications about deferred events when calling switchdev_deferred_process(). Fix this by converting the notification chain to a raw notification chain in a similar fashion to the netdev notification chain. Protect the chain using the RTNL mutex by acquiring it when modifying the chain. Events are always informed under the RTNL mutex, but add an assertion in call_switchdev_blocking_notifiers() to make sure this is not violated in the future. Maintain the "blocking" prefix as events are always emitted from process context and listeners are allowed to block. [1]: WARNING: possible recursive locking detected 6.14.0-rc4-custom-g079270089484 #1 Not tainted -------------------------------------------- ip/52731 is trying to acquire lock: ffffffff850918d8 ((switchdev_blocking_notif_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x58/0xa0 but task is already holding lock: ffffffff850918d8 ((switchdev_blocking_notif_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x58/0xa0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock((switchdev_blocking_notif_chain).rwsem); lock((switchdev_blocking_notif_chain).rwsem); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by ip/52731: #0: ffffffff84f795b0 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x727/0x1dc0 #1: ffffffff8731f628 (&net->rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x790/0x1dc0 #2: ffffffff850918d8 ((switchdev_blocking_notif_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x58/0xa0 stack backtrace: ... ? __pfx_down_read+0x10/0x10 ? __pfx_mark_lock+0x10/0x10 ? __pfx_switchdev_port_attr_set_deferred+0x10/0x10 blocking_notifier_call_chain+0x58/0xa0 switchdev_port_attr_notify.constprop.0+0xb3/0x1b0 ? __pfx_switchdev_port_attr_notify.constprop.0+0x10/0x10 ? mark_held_locks+0x94/0xe0 ? switchdev_deferred_process+0x11a/0x340 switchdev_port_attr_set_deferred+0x27/0xd0 switchdev_deferred_process+0x164/0x340 br_switchdev_port_unoffload+0xc8/0x100 [bridge] br_switchdev_blocking_event+0x29f/0x580 [bridge] notifier_call_chain+0xa2/0x440 blocking_notifier_call_chain+0x6e/0xa0 switchdev_bridge_port_unoffload+0xde/0x1a0 ... Fixes: f7a70d650b0b6 ("net: bridge: switchdev: Ensure deferred event delivery on unoffload") Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Reviewed-by: Simon Horman Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Link: https://patch.msgid.link/20250305121509.631207-1-amcohen@nvidia.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/switchdev/switchdev.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 2e14d4c37e2d..9aec24490e8f 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -381,7 +381,7 @@ bool switchdev_port_obj_act_is_deferred(struct net_device *dev, EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred); static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); -static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain); +static RAW_NOTIFIER_HEAD(switchdev_blocking_notif_chain); /** * register_switchdev_notifier - Register notifier @@ -427,17 +427,27 @@ EXPORT_SYMBOL_GPL(call_switchdev_notifiers); int register_switchdev_blocking_notifier(struct notifier_block *nb) { - struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain; + struct raw_notifier_head *chain = &switchdev_blocking_notif_chain; + int err; - return blocking_notifier_chain_register(chain, nb); + rtnl_lock(); + err = raw_notifier_chain_register(chain, nb); + rtnl_unlock(); + + return err; } EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier); int unregister_switchdev_blocking_notifier(struct notifier_block *nb) { - struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain; + struct raw_notifier_head *chain = &switchdev_blocking_notif_chain; + int err; - return blocking_notifier_chain_unregister(chain, nb); + rtnl_lock(); + err = raw_notifier_chain_unregister(chain, nb); + rtnl_unlock(); + + return err; } EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier); @@ -445,10 +455,11 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev, struct switchdev_notifier_info *info, struct netlink_ext_ack *extack) { + ASSERT_RTNL(); info->dev = dev; info->extack = extack; - return blocking_notifier_call_chain(&switchdev_blocking_notif_chain, - val, info); + return raw_notifier_call_chain(&switchdev_blocking_notif_chain, + val, info); } EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers); From 418119dd3feef96f65d6c2d4b727d9ce8052870d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 6 Mar 2025 02:39:22 +0000 Subject: [PATCH 22/71] bonding: fix incorrect MAC address setting to receive NS messages [ Upstream commit 0c5e145a350de3b38cd5ae77a401b12c46fb7c1d ] When validation on the backup slave is enabled, we need to validate the Neighbor Solicitation (NS) messages received on the backup slave. To receive these messages, the correct destination MAC address must be added to the slave. However, the target in bonding is a unicast address, which we cannot use directly. Instead, we should first convert it to a Solicited-Node Multicast Address and then derive the corresponding MAC address. Fix the incorrect MAC address setting on both slave_set_ns_maddr() and slave_set_ns_maddrs(). Since the two function names are similar. Add some description for the functions. Also only use one mac_addr variable in slave_set_ns_maddr() to save some code and logic. Fixes: 8eb36164d1a6 ("bonding: add ns target multicast address to slave device") Acked-by: Jay Vosburgh Reviewed-by: Nikolay Aleksandrov Signed-off-by: Hangbin Liu Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250306023923.38777-2-liuhangbin@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_options.c | 55 +++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index c8536dc7d860..21ca95cdef42 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1238,10 +1238,28 @@ static bool slave_can_set_ns_maddr(const struct bonding *bond, struct slave *sla slave->dev->flags & IFF_MULTICAST; } +/** + * slave_set_ns_maddrs - add/del all NS mac addresses for slave + * @bond: bond device + * @slave: slave device + * @add: add or remove all the NS mac addresses + * + * This function tries to add or delete all the NS mac addresses on the slave + * + * Note, the IPv6 NS target address is the unicast address in Neighbor + * Solicitation (NS) message. The dest address of NS message should be + * solicited-node multicast address of the target. The dest mac of NS message + * is converted from the solicited-node multicast address. + * + * This function is called when + * * arp_validate changes + * * enslaving, releasing new slaves + */ static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add) { struct in6_addr *targets = bond->params.ns_targets; char slot_maddr[MAX_ADDR_LEN]; + struct in6_addr mcaddr; int i; if (!slave_can_set_ns_maddr(bond, slave)) @@ -1251,7 +1269,8 @@ static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool if (ipv6_addr_any(&targets[i])) break; - if (!ndisc_mc_map(&targets[i], slot_maddr, slave->dev, 0)) { + addrconf_addr_solict_mult(&targets[i], &mcaddr); + if (!ndisc_mc_map(&mcaddr, slot_maddr, slave->dev, 0)) { if (add) dev_mc_add(slave->dev, slot_maddr); else @@ -1274,23 +1293,43 @@ void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave) slave_set_ns_maddrs(bond, slave, false); } +/** + * slave_set_ns_maddr - set new NS mac address for slave + * @bond: bond device + * @slave: slave device + * @target: the new IPv6 target + * @slot: the old IPv6 target in the slot + * + * This function tries to replace the old mac address to new one on the slave. + * + * Note, the target/slot IPv6 address is the unicast address in Neighbor + * Solicitation (NS) message. The dest address of NS message should be + * solicited-node multicast address of the target. The dest mac of NS message + * is converted from the solicited-node multicast address. + * + * This function is called when + * * An IPv6 NS target is added or removed. + */ static void slave_set_ns_maddr(struct bonding *bond, struct slave *slave, struct in6_addr *target, struct in6_addr *slot) { - char target_maddr[MAX_ADDR_LEN], slot_maddr[MAX_ADDR_LEN]; + char mac_addr[MAX_ADDR_LEN]; + struct in6_addr mcast_addr; if (!bond->params.arp_validate || !slave_can_set_ns_maddr(bond, slave)) return; - /* remove the previous maddr from slave */ + /* remove the previous mac addr from slave */ + addrconf_addr_solict_mult(slot, &mcast_addr); if (!ipv6_addr_any(slot) && - !ndisc_mc_map(slot, slot_maddr, slave->dev, 0)) - dev_mc_del(slave->dev, slot_maddr); + !ndisc_mc_map(&mcast_addr, mac_addr, slave->dev, 0)) + dev_mc_del(slave->dev, mac_addr); - /* add new maddr on slave if target is set */ + /* add new mac addr on slave if target is set */ + addrconf_addr_solict_mult(target, &mcast_addr); if (!ipv6_addr_any(target) && - !ndisc_mc_map(target, target_maddr, slave->dev, 0)) - dev_mc_add(slave->dev, target_maddr); + !ndisc_mc_map(&mcast_addr, mac_addr, slave->dev, 0)) + dev_mc_add(slave->dev, mac_addr); } static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot, From a62a25c6ad58fae997f48a0749afeda1c252ae51 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Sun, 9 Mar 2025 17:07:38 +0900 Subject: [PATCH 23/71] netfilter: nf_conncount: Fully initialize struct nf_conncount_tuple in insert_tree() [ Upstream commit d653bfeb07ebb3499c403404c21ac58a16531607 ] Since commit b36e4523d4d5 ("netfilter: nf_conncount: fix garbage collection confirm race"), `cpu` and `jiffies32` were introduced to the struct nf_conncount_tuple. The commit made nf_conncount_add() initialize `conn->cpu` and `conn->jiffies32` when allocating the struct. In contrast, count_tree() was not changed to initialize them. By commit 34848d5c896e ("netfilter: nf_conncount: Split insert and traversal"), count_tree() was split and the relevant allocation code now resides in insert_tree(). Initialize `conn->cpu` and `conn->jiffies32` in insert_tree(). BUG: KMSAN: uninit-value in find_or_evict net/netfilter/nf_conncount.c:117 [inline] BUG: KMSAN: uninit-value in __nf_conncount_add+0xd9c/0x2850 net/netfilter/nf_conncount.c:143 find_or_evict net/netfilter/nf_conncount.c:117 [inline] __nf_conncount_add+0xd9c/0x2850 net/netfilter/nf_conncount.c:143 count_tree net/netfilter/nf_conncount.c:438 [inline] nf_conncount_count+0x82f/0x1e80 net/netfilter/nf_conncount.c:521 connlimit_mt+0x7f6/0xbd0 net/netfilter/xt_connlimit.c:72 __nft_match_eval net/netfilter/nft_compat.c:403 [inline] nft_match_eval+0x1a5/0x300 net/netfilter/nft_compat.c:433 expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] nft_do_chain+0x426/0x2290 net/netfilter/nf_tables_core.c:288 nft_do_chain_ipv4+0x1a5/0x230 net/netfilter/nft_chain_filter.c:23 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 nf_hook_slow_list+0x24d/0x860 net/netfilter/core.c:663 NF_HOOK_LIST include/linux/netfilter.h:350 [inline] ip_sublist_rcv+0x17b7/0x17f0 net/ipv4/ip_input.c:633 ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:669 __netif_receive_skb_list_ptype net/core/dev.c:5936 [inline] __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5983 __netif_receive_skb_list net/core/dev.c:6035 [inline] netif_receive_skb_list_internal+0x1085/0x1700 net/core/dev.c:6126 netif_receive_skb_list+0x5a/0x460 net/core/dev.c:6178 xdp_recv_frames net/bpf/test_run.c:280 [inline] xdp_test_run_batch net/bpf/test_run.c:361 [inline] bpf_test_run_xdp_live+0x2e86/0x3480 net/bpf/test_run.c:390 bpf_prog_test_run_xdp+0xf1d/0x1ae0 net/bpf/test_run.c:1316 bpf_prog_test_run+0x5e5/0xa30 kernel/bpf/syscall.c:4407 __sys_bpf+0x6aa/0xd90 kernel/bpf/syscall.c:5813 __do_sys_bpf kernel/bpf/syscall.c:5902 [inline] __se_sys_bpf kernel/bpf/syscall.c:5900 [inline] __ia32_sys_bpf+0xa0/0xe0 kernel/bpf/syscall.c:5900 ia32_sys_call+0x394d/0x4180 arch/x86/include/generated/asm/syscalls_32.h:358 do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/common.c:387 do_fast_syscall_32+0x38/0x80 arch/x86/entry/common.c:412 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:450 entry_SYSENTER_compat_after_hwframe+0x84/0x8e Uninit was created at: slab_post_alloc_hook mm/slub.c:4121 [inline] slab_alloc_node mm/slub.c:4164 [inline] kmem_cache_alloc_noprof+0x915/0xe10 mm/slub.c:4171 insert_tree net/netfilter/nf_conncount.c:372 [inline] count_tree net/netfilter/nf_conncount.c:450 [inline] nf_conncount_count+0x1415/0x1e80 net/netfilter/nf_conncount.c:521 connlimit_mt+0x7f6/0xbd0 net/netfilter/xt_connlimit.c:72 __nft_match_eval net/netfilter/nft_compat.c:403 [inline] nft_match_eval+0x1a5/0x300 net/netfilter/nft_compat.c:433 expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] nft_do_chain+0x426/0x2290 net/netfilter/nf_tables_core.c:288 nft_do_chain_ipv4+0x1a5/0x230 net/netfilter/nft_chain_filter.c:23 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 nf_hook_slow_list+0x24d/0x860 net/netfilter/core.c:663 NF_HOOK_LIST include/linux/netfilter.h:350 [inline] ip_sublist_rcv+0x17b7/0x17f0 net/ipv4/ip_input.c:633 ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:669 __netif_receive_skb_list_ptype net/core/dev.c:5936 [inline] __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5983 __netif_receive_skb_list net/core/dev.c:6035 [inline] netif_receive_skb_list_internal+0x1085/0x1700 net/core/dev.c:6126 netif_receive_skb_list+0x5a/0x460 net/core/dev.c:6178 xdp_recv_frames net/bpf/test_run.c:280 [inline] xdp_test_run_batch net/bpf/test_run.c:361 [inline] bpf_test_run_xdp_live+0x2e86/0x3480 net/bpf/test_run.c:390 bpf_prog_test_run_xdp+0xf1d/0x1ae0 net/bpf/test_run.c:1316 bpf_prog_test_run+0x5e5/0xa30 kernel/bpf/syscall.c:4407 __sys_bpf+0x6aa/0xd90 kernel/bpf/syscall.c:5813 __do_sys_bpf kernel/bpf/syscall.c:5902 [inline] __se_sys_bpf kernel/bpf/syscall.c:5900 [inline] __ia32_sys_bpf+0xa0/0xe0 kernel/bpf/syscall.c:5900 ia32_sys_call+0x394d/0x4180 arch/x86/include/generated/asm/syscalls_32.h:358 do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/common.c:387 do_fast_syscall_32+0x38/0x80 arch/x86/entry/common.c:412 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:450 entry_SYSENTER_compat_after_hwframe+0x84/0x8e Reported-by: syzbot+83fed965338b573115f7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=83fed965338b573115f7 Fixes: b36e4523d4d5 ("netfilter: nf_conncount: fix garbage collection confirm race") Signed-off-by: Kohei Enju Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conncount.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 71869ad46646..6156c0751056 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -377,6 +377,8 @@ restart: conn->tuple = *tuple; conn->zone = *zone; + conn->cpu = raw_smp_processor_id(); + conn->jiffies32 = (u32)jiffies; memcpy(rbconn->key, key, sizeof(u32) * data->keylen); nf_conncount_list_init(&rbconn->list); From 917e52043097dc7ed930b8581d2a51d74506ce24 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Mar 2025 10:45:53 +0300 Subject: [PATCH 24/71] ipvs: prevent integer overflow in do_ip_vs_get_ctl() [ Upstream commit 80b78c39eb86e6b55f56363b709eb817527da5aa ] The get->num_services variable is an unsigned int which is controlled by the user. The struct_size() function ensures that the size calculation does not overflow an unsigned long, however, we are saving the result to an int so the calculation can overflow. Both "len" and "get->num_services" come from the user. This check is just a sanity check to help the user and ensure they are using the API correctly. An integer overflow here is not a big deal. This has no security impact. Save the result from struct_size() type size_t to fix this integer overflow bug. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Dan Carpenter Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 18e37b32a5d6..6cc50f05c46c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2854,12 +2854,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) case IP_VS_SO_GET_SERVICES: { struct ip_vs_get_services *get; - int size; + size_t size; get = (struct ip_vs_get_services *)arg; size = struct_size(get, entrytable, get->num_services); if (*len != size) { - pr_err("length: %u != %u\n", *len, size); + pr_err("length: %u != %zu\n", *len, size); ret = -EINVAL; goto out; } @@ -2895,12 +2895,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) case IP_VS_SO_GET_DESTS: { struct ip_vs_get_dests *get; - int size; + size_t size; get = (struct ip_vs_get_dests *)arg; size = struct_size(get, entrytable, get->num_dests); if (*len != size) { - pr_err("length: %u != %u\n", *len, size); + pr_err("length: %u != %zu\n", *len, size); ret = -EINVAL; goto out; } From e5ee00607bbfc97ef1526ea95b6b2458ac9e7cb7 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 6 Mar 2025 15:23:54 -0800 Subject: [PATCH 25/71] net_sched: Prevent creation of classes with TC_H_ROOT [ Upstream commit 0c3057a5a04d07120b3d0ec9c79568fceb9c921e ] The function qdisc_tree_reduce_backlog() uses TC_H_ROOT as a termination condition when traversing up the qdisc tree to update parent backlog counters. However, if a class is created with classid TC_H_ROOT, the traversal terminates prematurely at this class instead of reaching the actual root qdisc, causing parent statistics to be incorrectly maintained. In case of DRR, this could lead to a crash as reported by Mingi Cho. Prevent the creation of any Qdisc class with classid TC_H_ROOT (0xFFFFFFFF) across all qdisc types, as suggested by Jamal. Reported-by: Mingi Cho Signed-off-by: Cong Wang Reviewed-by: Simon Horman Fixes: 066a3b5b2346 ("[NET_SCHED] sch_api: fix qdisc_tree_decrease_qlen() loop") Link: https://patch.msgid.link/20250306232355.93864-2-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_api.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index cb379849c51a..c395e7a98232 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -2200,6 +2200,12 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, return -EOPNOTSUPP; } + /* Prevent creation of traffic classes with classid TC_H_ROOT */ + if (clid == TC_H_ROOT) { + NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT"); + return -EINVAL; + } + new_cl = cl; err = -EOPNOTSUPP; if (cops->change) From 42d5b131da14baa3d8662fb935ae6204caab2430 Mon Sep 17 00:00:00 2001 From: Alexey Kashavkin Date: Sun, 2 Mar 2025 00:14:36 +0300 Subject: [PATCH 26/71] netfilter: nft_exthdr: fix offset with ipv4_find_option() [ Upstream commit 6edd78af9506bb182518da7f6feebd75655d9a0e ] There is an incorrect calculation in the offset variable which causes the nft_skb_copy_to_reg() function to always return -EFAULT. Adding the start variable is redundant. In the __ip_options_compile() function the correct offset is specified when finding the function. There is no need to add the size of the iphdr structure to the offset. Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options") Signed-off-by: Alexey Kashavkin Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_exthdr.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index de588f7b69c4..60d18bd60d82 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -86,7 +86,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; struct iphdr *iph, _iph; - unsigned int start; bool found = false; __be32 info; int optlen; @@ -94,7 +93,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (!iph) return -EBADMSG; - start = sizeof(struct iphdr); optlen = iph->ihl * 4 - (int)sizeof(struct iphdr); if (optlen <= 0) @@ -104,7 +102,7 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, /* Copy the options since __ip_options_compile() modifies * the options. */ - if (skb_copy_bits(skb, start, opt->__data, optlen)) + if (skb_copy_bits(skb, sizeof(struct iphdr), opt->__data, optlen)) return -EBADMSG; opt->optlen = optlen; @@ -119,18 +117,18 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, found = target == IPOPT_SSRR ? opt->is_strictroute : !opt->is_strictroute; if (found) - *offset = opt->srr + start; + *offset = opt->srr; break; case IPOPT_RR: if (!opt->rr) break; - *offset = opt->rr + start; + *offset = opt->rr; found = true; break; case IPOPT_RA: if (!opt->router_alert) break; - *offset = opt->router_alert + start; + *offset = opt->router_alert; found = true; break; default: From b309e7542614b2cb5ee692791e2f398b1c33f4cd Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 7 Mar 2025 20:28:53 +0100 Subject: [PATCH 27/71] gre: Fix IPv6 link-local address generation. [ Upstream commit 183185a18ff96751db52a46ccf93fff3a1f42815 ] Use addrconf_addr_gen() to generate IPv6 link-local addresses on GRE devices in most cases and fall back to using add_v4_addrs() only in case the GRE configuration is incompatible with addrconf_addr_gen(). GRE used to use addrconf_addr_gen() until commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") restricted this use to gretap and ip6gretap devices, and created add_v4_addrs() (borrowed from SIT) for non-Ethernet GRE ones. The original problem came when commit 9af28511be10 ("addrconf: refuse isatap eui64 for INADDR_ANY") made __ipv6_isatap_ifid() fail when its addr parameter was 0. The commit says that this would create an invalid address, however, I couldn't find any RFC saying that the generated interface identifier would be wrong. Anyway, since gre over IPv4 devices pass their local tunnel address to __ipv6_isatap_ifid(), that commit broke their IPv6 link-local address generation when the local address was unspecified. Then commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") tried to fix that case by defining add_v4_addrs() and calling it to generate the IPv6 link-local address instead of using addrconf_addr_gen() (apart for gretap and ip6gretap devices, which would still use the regular addrconf_addr_gen(), since they have a MAC address). That broke several use cases because add_v4_addrs() isn't properly integrated into the rest of IPv6 Neighbor Discovery code. Several of these shortcomings have been fixed over time, but add_v4_addrs() remains broken on several aspects. In particular, it doesn't send any Router Sollicitations, so the SLAAC process doesn't start until the interface receives a Router Advertisement. Also, add_v4_addrs() mostly ignores the address generation mode of the interface (/proc/sys/net/ipv6/conf/*/addr_gen_mode), thus breaking the IN6_ADDR_GEN_MODE_RANDOM and IN6_ADDR_GEN_MODE_STABLE_PRIVACY cases. Fix the situation by using add_v4_addrs() only in the specific scenario where the normal method would fail. That is, for interfaces that have all of the following characteristics: * run over IPv4, * transport IP packets directly, not Ethernet (that is, not gretap interfaces), * tunnel endpoint is INADDR_ANY (that is, 0), * device address generation mode is EUI64. In all other cases, revert back to the regular addrconf_addr_gen(). Also, remove the special case for ip6gre interfaces in add_v4_addrs(), since ip6gre devices now always use addrconf_addr_gen() instead. Fixes: e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/559c32ce5c9976b269e6337ac9abb6a96abe5096.1741375285.git.gnault@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv6/addrconf.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f52527c86e71..69fb4d7c9c98 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3162,16 +3162,13 @@ static void add_v4_addrs(struct inet6_dev *idev) struct in6_addr addr; struct net_device *dev; struct net *net = dev_net(idev->dev); - int scope, plen, offset = 0; + int scope, plen; u32 pflags = 0; ASSERT_RTNL(); memset(&addr, 0, sizeof(struct in6_addr)); - /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */ - if (idev->dev->addr_len == sizeof(struct in6_addr)) - offset = sizeof(struct in6_addr) - 4; - memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); + memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) { scope = IPV6_ADDR_COMPATv4; @@ -3481,7 +3478,13 @@ static void addrconf_gre_config(struct net_device *dev) return; } - if (dev->type == ARPHRD_ETHER) { + /* Generate the IPv6 link-local address using addrconf_addr_gen(), + * unless we have an IPv4 GRE device not bound to an IP address and + * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this + * case). Such devices fall back to add_v4_addrs() instead. + */ + if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 && + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) { addrconf_addr_gen(idev, true); return; } From 2532adbfe917c0e71dba2650ffc6efe396314c87 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Sat, 8 Mar 2025 01:45:59 +0100 Subject: [PATCH 28/71] net: openvswitch: remove misbehaving actions length check [ Upstream commit a1e64addf3ff9257b45b78bc7d743781c3f41340 ] The actions length check is unreliable and produces different results depending on the initial length of the provided netlink attribute and the composition of the actual actions inside of it. For example, a user can add 4088 empty clone() actions without triggering -EMSGSIZE, on attempt to add 4089 such actions the operation will fail with the -EMSGSIZE verdict. However, if another 16 KB of other actions will be *appended* to the previous 4089 clone() actions, the check passes and the flow is successfully installed into the openvswitch datapath. The reason for a such a weird behavior is the way memory is allocated. When ovs_flow_cmd_new() is invoked, it calls ovs_nla_copy_actions(), that in turn calls nla_alloc_flow_actions() with either the actual length of the user-provided actions or the MAX_ACTIONS_BUFSIZE. The function adds the size of the sw_flow_actions structure and then the actually allocated memory is rounded up to the closest power of two. So, if the user-provided actions are larger than MAX_ACTIONS_BUFSIZE, then MAX_ACTIONS_BUFSIZE + sizeof(*sfa) rounded up is 32K + 24 -> 64K. Later, while copying individual actions, we look at ksize(), which is 64K, so this way the MAX_ACTIONS_BUFSIZE check is not actually triggered and the user can easily allocate almost 64 KB of actions. However, when the initial size is less than MAX_ACTIONS_BUFSIZE, but the actions contain ones that require size increase while copying (such as clone() or sample()), then the limit check will be performed during the reserve_sfa_size() and the user will not be allowed to create actions that yield more than 32 KB internally. This is one part of the problem. The other part is that it's not actually possible for the userspace application to know beforehand if the particular set of actions will be rejected or not. Certain actions require more space in the internal representation, e.g. an empty clone() takes 4 bytes in the action list passed in by the user, but it takes 12 bytes in the internal representation due to an extra nested attribute, and some actions require less space in the internal representations, e.g. set(tunnel(..)) normally takes 64+ bytes in the action list provided by the user, but only needs to store a single pointer in the internal implementation, since all the data is stored in the tunnel_info structure instead. And the action size limit is applied to the internal representation, not to the action list passed by the user. So, it's not possible for the userpsace application to predict if the certain combination of actions will be rejected or not, because it is not possible for it to calculate how much space these actions will take in the internal representation without knowing kernel internals. All that is causing random failures in ovs-vswitchd in userspace and inability to handle certain traffic patterns as a result. For example, it is reported that adding a bit more than a 1100 VMs in an OpenStack setup breaks the network due to OVS not being able to handle ARP traffic anymore in some cases (it tries to install a proper datapath flow, but the kernel rejects it with -EMSGSIZE, even though the action list isn't actually that large.) Kernel behavior must be consistent and predictable in order for the userspace application to use it in a reasonable way. ovs-vswitchd has a mechanism to re-direct parts of the traffic and partially handle it in userspace if the required action list is oversized, but that doesn't work properly if we can't actually tell if the action list is oversized or not. Solution for this is to check the size of the user-provided actions instead of the internal representation. This commit just removes the check from the internal part because there is already an implicit size check imposed by the netlink protocol. The attribute can't be larger than 64 KB. Realistically, we could reduce the limit to 32 KB, but we'll be risking to break some existing setups that rely on the fact that it's possible to create nearly 64 KB action lists today. Vast majority of flows in real setups are below 100-ish bytes. So removal of the limit will not change real memory consumption on the system. The absolutely worst case scenario is if someone adds a flow with 64 KB of empty clone() actions. That will yield a 192 KB in the internal representation consuming 256 KB block of memory. However, that list of actions is not meaningful and also a no-op. Real world very large action lists (that can occur for a rare cases of BUM traffic handling) are unlikely to contain a large number of clones and will likely have a lot of tunnel attributes making the internal representation comparable in size to the original action list. So, it should be fine to just remove the limit. Commit in the 'Fixes' tag is the first one that introduced the difference between internal representation and the user-provided action lists, but there were many more afterwards that lead to the situation we have today. Fixes: 7d5437c709de ("openvswitch: Add tunneling interface.") Signed-off-by: Ilya Maximets Reviewed-by: Aaron Conole Link: https://patch.msgid.link/20250308004609.2881861-1-i.maximets@ovn.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/openvswitch/flow_netlink.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index e3c85ceb1f0a..5ebbec656895 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2302,14 +2302,10 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) OVS_FLOW_ATTR_MASK, true, skb); } -#define MAX_ACTIONS_BUFSIZE (32 * 1024) - static struct sw_flow_actions *nla_alloc_flow_actions(int size) { struct sw_flow_actions *sfa; - WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); - sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL); if (!sfa) return ERR_PTR(-ENOMEM); @@ -2465,15 +2461,6 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); - if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) { - OVS_NLERR(log, "Flow action size exceeds max %u", - MAX_ACTIONS_BUFSIZE); - return ERR_PTR(-EMSGSIZE); - } - new_acts_size = MAX_ACTIONS_BUFSIZE; - } - acts = nla_alloc_flow_actions(new_acts_size); if (IS_ERR(acts)) return (void *)acts; @@ -3492,7 +3479,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, int err; u32 mpls_label_count = 0; - *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); + *sfa = nla_alloc_flow_actions(nla_len(attr)); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); From 86ff45f5f61ae1d0d17f0f6d8797b052eacfd8f1 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 11 Mar 2025 00:01:43 +0200 Subject: [PATCH 29/71] net/mlx5: Bridge, fix the crash caused by LAG state check [ Upstream commit 4b8eeed4fb105770ce6dc84a2c6ef953c7b71cbb ] When removing LAG device from bridge, NETDEV_CHANGEUPPER event is triggered. Driver finds the lower devices (PFs) to flush all the offloaded entries. And mlx5_lag_is_shared_fdb is checked, it returns false if one of PF is unloaded. In such case, mlx5_esw_bridge_lag_rep_get() and its caller return NULL, instead of the alive PF, and the flush is skipped. Besides, the bridge fdb entry's lastuse is updated in mlx5 bridge event handler. But this SWITCHDEV_FDB_ADD_TO_BRIDGE event can be ignored in this case because the upper interface for bond is deleted, and the entry will never be aged because lastuse is never updated. To make things worse, as the entry is alive, mlx5 bridge workqueue keeps sending that event, which is then handled by kernel bridge notifier. It causes the following crash when accessing the passed bond netdev which is already destroyed. To fix this issue, remove such checks. LAG state is already checked in commit 15f8f168952f ("net/mlx5: Bridge, verify LAG state when adding bond to bridge"), driver still need to skip offload if LAG becomes invalid state after initialization. Oops: stack segment: 0000 [#1] SMP CPU: 3 UID: 0 PID: 23695 Comm: kworker/u40:3 Tainted: G OE 6.11.0_mlnx #1 Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5_bridge_wq mlx5_esw_bridge_update_work [mlx5_core] RIP: 0010:br_switchdev_event+0x2c/0x110 [bridge] Code: 44 00 00 48 8b 02 48 f7 00 00 02 00 00 74 69 41 54 55 53 48 83 ec 08 48 8b a8 08 01 00 00 48 85 ed 74 4a 48 83 fe 02 48 89 d3 <4c> 8b 65 00 74 23 76 49 48 83 fe 05 74 7e 48 83 fe 06 75 2f 0f b7 RSP: 0018:ffffc900092cfda0 EFLAGS: 00010297 RAX: ffff888123bfe000 RBX: ffffc900092cfe08 RCX: 00000000ffffffff RDX: ffffc900092cfe08 RSI: 0000000000000001 RDI: ffffffffa0c585f0 RBP: 6669746f6e690a30 R08: 0000000000000000 R09: ffff888123ae92c8 R10: 0000000000000000 R11: fefefefefefefeff R12: ffff888123ae9c60 R13: 0000000000000001 R14: ffffc900092cfe08 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88852c980000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f15914c8734 CR3: 0000000002830005 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __die_body+0x1a/0x60 ? die+0x38/0x60 ? do_trap+0x10b/0x120 ? do_error_trap+0x64/0xa0 ? exc_stack_segment+0x33/0x50 ? asm_exc_stack_segment+0x22/0x30 ? br_switchdev_event+0x2c/0x110 [bridge] ? sched_balance_newidle.isra.149+0x248/0x390 notifier_call_chain+0x4b/0xa0 atomic_notifier_call_chain+0x16/0x20 mlx5_esw_bridge_update+0xec/0x170 [mlx5_core] mlx5_esw_bridge_update_work+0x19/0x40 [mlx5_core] process_scheduled_works+0x81/0x390 worker_thread+0x106/0x250 ? bh_worker+0x110/0x110 kthread+0xb7/0xe0 ? kthread_park+0x80/0x80 ret_from_fork+0x2d/0x50 ? kthread_park+0x80/0x80 ret_from_fork_asm+0x11/0x20 Fixes: ff9b7521468b ("net/mlx5: Bridge, support LAG") Signed-off-by: Jianbo Liu Reviewed-by: Vlad Buslov Signed-off-by: Tariq Toukan Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/1741644104-97767-6-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index ce85b48d327d..6748c92941b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -48,15 +48,10 @@ mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw) struct list_head *iter; netdev_for_each_lower_dev(dev, lower, iter) { - struct mlx5_core_dev *mdev; - struct mlx5e_priv *priv; - if (!mlx5e_eswitch_rep(lower)) continue; - priv = netdev_priv(lower); - mdev = priv->mdev; - if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw)) + if (mlx5_esw_bridge_dev_same_esw(lower, esw)) return lower; } @@ -121,7 +116,7 @@ static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device * priv = netdev_priv(rep); mdev = priv->mdev; if (netif_is_lag_master(dev)) - return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev); + return mlx5_lag_is_master(mdev); return true; } @@ -436,6 +431,9 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, if (!rep) return NOTIFY_DONE; + if (netif_is_lag_master(dev) && !mlx5_lag_is_shared_fdb(esw->dev)) + return NOTIFY_DONE; + switch (event) { case SWITCHDEV_FDB_ADD_TO_BRIDGE: fdb_info = container_of(info, From 39e507d4f4337f950e568a5a568c774a6976bf93 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Tue, 11 Mar 2025 00:01:44 +0200 Subject: [PATCH 30/71] net/mlx5e: Prevent bridge link show failure for non-eswitch-allowed devices [ Upstream commit e92df790d07a8eea873efcb84776e7b71f81c7d5 ] mlx5_eswitch_get_vepa returns -EPERM if the device lacks eswitch_manager capability, blocking mlx5e_bridge_getlink from retrieving VEPA mode. Since mlx5e_bridge_getlink implements ndo_bridge_getlink, returning -EPERM causes bridge link show to fail instead of skipping devices without this capability. To avoid this, return -EOPNOTSUPP from mlx5e_bridge_getlink when mlx5_eswitch_get_vepa fails, ensuring the command continues processing other devices while ignoring those without the necessary capability. Fixes: 4b89251de024 ("net/mlx5: Support ndo bridge_setlink and getlink") Signed-off-by: Carolina Jubran Reviewed-by: Jianbo Liu Signed-off-by: Tariq Toukan Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/1741644104-97767-7-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8ee6a81b42b4..f520949b2024 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4791,11 +4791,9 @@ static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; u8 mode, setting; - int err; - err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting); - if (err) - return err; + if (mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting)) + return -EOPNOTSUPP; mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, From db1daaca25ed7dfafab00f5cf90bb3e086cb4099 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 9 Jan 2025 14:30:47 +0100 Subject: [PATCH 31/71] nvme-fc: go straight to connecting state when initializing [ Upstream commit d3d380eded7ee5fc2fc53b3b0e72365ded025c4a ] The initial controller initialization mimiks the reconnect loop behavior by switching from NEW to RESETTING and then to CONNECTING. The transition from NEW to CONNECTING is a valid transition, so there is no point entering the RESETTING state. TCP and RDMA also transition directly to CONNECTING state. Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/fc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 3dbf926fd99f..2b0f15de7711 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3525,8 +3525,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); spin_unlock_irqrestore(&rport->lock, flags); - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) || - !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { dev_err(ctrl->ctrl.device, "NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum); goto fail_ctrl; From 86f653f37bd744b450dab2e79c9c1712f6520a7f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 16 Jan 2025 18:07:45 +0200 Subject: [PATCH 32/71] hrtimers: Mark is_migration_base() with __always_inline [ Upstream commit 27af31e44949fa85550176520ef7086a0d00fd7b ] When is_migration_base() is unused, it prevents kernel builds with clang, `make W=1` and CONFIG_WERROR=y: kernel/time/hrtimer.c:156:20: error: unused function 'is_migration_base' [-Werror,-Wunused-function] 156 | static inline bool is_migration_base(struct hrtimer_clock_base *base) | ^~~~~~~~~~~~~~~~~ Fix this by marking it with __always_inline. [ tglx: Use __always_inline instead of __maybe_unused and move it into the usage sites conditional ] Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250116160745.243358-1-andriy.shevchenko@linux.intel.com Signed-off-by: Sasha Levin --- kernel/time/hrtimer.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f6d799646dd9..e60863ab74b5 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -145,11 +145,6 @@ static struct hrtimer_cpu_base migration_cpu_base = { #define migration_base migration_cpu_base.clock_base[0] -static inline bool is_migration_base(struct hrtimer_clock_base *base) -{ - return base == &migration_base; -} - /* * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock * means that all timers which are tied to this base via timer->base are @@ -274,11 +269,6 @@ again: #else /* CONFIG_SMP */ -static inline bool is_migration_base(struct hrtimer_clock_base *base) -{ - return false; -} - static inline struct hrtimer_clock_base * lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) { @@ -1378,6 +1368,18 @@ static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, } } +#ifdef CONFIG_SMP +static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) +{ + return base == &migration_base; +} +#else +static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) +{ + return false; +} +#endif + /* * This function is called on PREEMPT_RT kernels when the fast path * deletion of a timer failed because the timer callback function was From e4beb8aa3508c72156e01ba348d26c733746b52a Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Fri, 10 Jan 2025 10:05:54 +0900 Subject: [PATCH 33/71] powercap: call put_device() on an error path in powercap_register_control_type() [ Upstream commit 93c66fbc280747ea700bd6199633d661e3c819b3 ] powercap_register_control_type() calls device_register(), but does not release the refcount of the device when it fails. Call put_device() before returning an error to balance the refcount. Since the kfree(control_type) will be done by powercap_release(), remove the lines in powercap_register_control_type() before returning the error. This bug was found by an experimental verifier that I am developing. Signed-off-by: Joe Hattori Link: https://patch.msgid.link/20250110010554.1583411-1-joe@pf.is.s.u-tokyo.ac.jp [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/powercap/powercap_sys.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index ff736b006198..fd475e463d1f 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -626,8 +626,7 @@ struct powercap_control_type *powercap_register_control_type( dev_set_name(&control_type->dev, "%s", name); result = device_register(&control_type->dev); if (result) { - if (control_type->allocated) - kfree(control_type); + put_device(&control_type->dev); return ERR_PTR(result); } idr_init(&control_type->idr); From 9bfa80c8aa4e06dff55a953c3fffbfc68a3a3b1c Mon Sep 17 00:00:00 2001 From: Chengen Du Date: Tue, 14 Jan 2025 12:12:34 +0800 Subject: [PATCH 34/71] iscsi_ibft: Fix UBSAN shift-out-of-bounds warning in ibft_attr_show_nic() [ Upstream commit 07e0d99a2f701123ad3104c0f1a1e66bce74d6e5 ] When performing an iSCSI boot using IPv6, iscsistart still reads the /sys/firmware/ibft/ethernetX/subnet-mask entry. Since the IPv6 prefix length is 64, this causes the shift exponent to become negative, triggering a UBSAN warning. As the concept of a subnet mask does not apply to IPv6, the value is set to ~0 to suppress the warning message. Signed-off-by: Chengen Du Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Sasha Levin --- drivers/firmware/iscsi_ibft.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c index 6e9788324fea..371f24569b3b 100644 --- a/drivers/firmware/iscsi_ibft.c +++ b/drivers/firmware/iscsi_ibft.c @@ -310,7 +310,10 @@ static ssize_t ibft_attr_show_nic(void *data, int type, char *buf) str += sprintf_ipaddr(str, nic->ip_addr); break; case ISCSI_BOOT_ETH_SUBNET_MASK: - val = cpu_to_be32(~((1 << (32-nic->subnet_mask_prefix))-1)); + if (nic->subnet_mask_prefix > 32) + val = cpu_to_be32(~0); + else + val = cpu_to_be32(~((1 << (32-nic->subnet_mask_prefix))-1)); str += sprintf(str, "%pI4", &val); break; case ISCSI_BOOT_ETH_PREFIX_LEN: From 6d816086d78e1d7f4d73795f545c841046e6223c Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 28 Jan 2025 16:35:39 -0500 Subject: [PATCH 35/71] scsi: core: Use GFP_NOIO to avoid circular locking dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5363ee9d110e139584c2d92a0b640bc210588506 ] Filesystems can write to disk from page reclaim with __GFP_FS set. Marc found a case where scsi_realloc_sdev_budget_map() ends up in page reclaim with GFP_KERNEL, where it could try to take filesystem locks again, leading to a deadlock. WARNING: possible circular locking dependency detected 6.13.0 #1 Not tainted ------------------------------------------------------ kswapd0/70 is trying to acquire lock: ffff8881025d5d78 (&q->q_usage_counter(io)){++++}-{0:0}, at: blk_mq_submit_bio+0x461/0x6e0 but task is already holding lock: ffffffff81ef5f40 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0x9f/0x760 The full lockdep splat can be found in Marc's report: https://lkml.org/lkml/2025/1/24/1101 Avoid the potential deadlock by doing the allocation with GFP_NOIO, which prevents both filesystem and block layer recursion. Reported-by: Marc Aurèle La France Signed-off-by: Rik van Riel Link: https://lore.kernel.org/r/20250129104525.0ae8421e@fangorn Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 852d509b19b2..69288303e600 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -245,7 +245,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, } ret = sbitmap_init_node(&sdev->budget_map, scsi_device_max_queue_depth(sdev), - new_shift, GFP_KERNEL, + new_shift, GFP_NOIO, sdev->request_queue->node, false, true); if (!ret) sbitmap_resize(&sdev->budget_map, depth); From 24602e2664c515a4f2950d7b52c3d5997463418c Mon Sep 17 00:00:00 2001 From: Magnus Lindholm Date: Sat, 25 Jan 2025 10:49:22 +0100 Subject: [PATCH 36/71] scsi: qla1280: Fix kernel oops when debug level > 2 [ Upstream commit 5233e3235dec3065ccc632729675575dbe3c6b8a ] A null dereference or oops exception will eventually occur when qla1280.c driver is compiled with DEBUG_QLA1280 enabled and ql_debug_level > 2. I think its clear from the code that the intention here is sg_dma_len(s) not length of sg_next(s) when printing the debug info. Signed-off-by: Magnus Lindholm Link: https://lore.kernel.org/r/20250125095033.26188-1-linmag7@gmail.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla1280.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c index 1e7f4d138e06..0bb80d135f56 100644 --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -2866,7 +2866,7 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp) dprintk(3, "S/G Segment phys_addr=%x %x, len=0x%x\n", cpu_to_le32(upper_32_bits(dma_handle)), cpu_to_le32(lower_32_bits(dma_handle)), - cpu_to_le32(sg_dma_len(sg_next(s)))); + cpu_to_le32(sg_dma_len(s))); remseg--; } dprintk(5, "qla1280_64bit_start_scsi: Scatter/gather " From ae45fe47ccfe8b3053092de45840d0dea3cd30d4 Mon Sep 17 00:00:00 2001 From: Gannon Kolding Date: Mon, 27 Jan 2025 02:39:02 -0700 Subject: [PATCH 37/71] ACPI: resource: IRQ override for Eluktronics MECH-17 [ Upstream commit 607ab6f85f4194b644ea95ac5fe660ef575db3b4 ] The Eluktronics MECH-17 (GM7RG7N) needs IRQ overriding for the keyboard to work. Adding a DMI_MATCH entry for this laptop model makes the internal keyboard function normally. Signed-off-by: Gannon Kolding Link: https://patch.msgid.link/20250127093902.328361-1-gannon.kolding@gmail.com Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/resource.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 34cb7894e54e..d4fb1436f9f5 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -553,6 +553,12 @@ static const struct dmi_system_id maingear_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "RP-15"), }, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Eluktronics Inc."), + DMI_MATCH(DMI_BOARD_NAME, "MECH-17"), + }, + }, { /* TongFang GM6XGxX/TUXEDO Stellaris 16 Gen5 AMD */ .matches = { From 6bbed0b3ad8b23ff0e584671aa7e22165e558f62 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 5 Feb 2025 13:22:11 -0300 Subject: [PATCH 38/71] smb: client: fix noisy when tree connecting to DFS interlink targets [ Upstream commit 773dc23ff81838b6f74d7fabba5a441cc6a93982 ] When the client attempts to tree connect to a domain-based DFS namespace from a DFS interlink target, the server will return STATUS_BAD_NETWORK_NAME and the following will appear on dmesg: CIFS: VFS: BAD_NETWORK_NAME: \\dom\dfs Since a DFS share might contain several DFS interlinks and they expire after 10 minutes, the above message might end up being flooded on dmesg when mounting or accessing them. Print this only once per share. Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 217d381eb9fe..0ae931e023ce 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -1999,7 +1999,7 @@ tcon_exit: tcon_error_exit: if (rsp && rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) - cifs_tcon_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); + cifs_dbg(VFS | ONCE, "BAD_NETWORK_NAME: %s\n", tree); goto tcon_exit; } From 936041b69a3bb3428390fa22b94d7b713bf9ec47 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 12 Jan 2025 23:39:01 -0600 Subject: [PATCH 39/71] alpha/elf: Fix misc/setarch test of util-linux by removing 32bit support [ Upstream commit b029628be267cba3c7684ec684749fe3e4372398 ] Richard Henderson writes[1]: > There was a Spec benchmark (I forget which) which was memory bound and ran > twice as fast with 32-bit pointers. > > I copied the idea from DEC to the ELF abi, but never did all the other work > to allow the toolchain to take advantage. > > Amusingly, a later Spec changed the benchmark data sets to not fit into a > 32-bit address space, specifically because of this. > > I expect one could delete the ELF bit and personality and no one would > notice. Not even the 10 remaining Alpha users. In [2] it was pointed out that parts of setarch weren't working properly on alpha because it has it's own SET_PERSONALITY implementation. In the discussion that followed Richard Henderson pointed out that the 32bit pointer support for alpha was never completed. Fix this by removing alpha's 32bit pointer support. As a bit of paranoia refuse to execute any alpha binaries that have the EF_ALPHA_32BIT flag set. Just in case someone somewhere has binaries that try to use alpha's 32bit pointer support. Link: https://lkml.kernel.org/r/CAFXwXrkgu=4Qn-v1PjnOR4SG0oUb9LSa0g6QXpBq4ttm52pJOQ@mail.gmail.com [1] Link: https://lkml.kernel.org/r/20250103140148.370368-1-glaubitz@physik.fu-berlin.de [2] Signed-off-by: Eric W. Biederman Reviewed-by: Richard Henderson Reviewed-by: Arnd Bergmann Reviewed-by: John Paul Adrian Glaubitz Tested-by: John Paul Adrian Glaubitz Link: https://lore.kernel.org/r/87y0zfs26i.fsf_-_@email.froward.int.ebiederm.org Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- arch/alpha/include/asm/elf.h | 6 +----- arch/alpha/include/asm/pgtable.h | 2 +- arch/alpha/include/asm/processor.h | 8 ++------ arch/alpha/kernel/osf_sys.c | 11 ++--------- 4 files changed, 6 insertions(+), 21 deletions(-) diff --git a/arch/alpha/include/asm/elf.h b/arch/alpha/include/asm/elf.h index 8049997fa372..2039a8c8d547 100644 --- a/arch/alpha/include/asm/elf.h +++ b/arch/alpha/include/asm/elf.h @@ -74,7 +74,7 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; /* * This is used to ensure we don't load something for the wrong architecture. */ -#define elf_check_arch(x) ((x)->e_machine == EM_ALPHA) +#define elf_check_arch(x) (((x)->e_machine == EM_ALPHA) && !((x)->e_flags & EF_ALPHA_32BIT)) /* * These are used to set parameters in the core dumps. @@ -145,10 +145,6 @@ extern int dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task); : amask (AMASK_CIX) ? "ev6" : "ev67"); \ }) -#define SET_PERSONALITY(EX) \ - set_personality(((EX).e_flags & EF_ALPHA_32BIT) \ - ? PER_LINUX_32BIT : PER_LINUX) - extern int alpha_l1i_cacheshape; extern int alpha_l1d_cacheshape; extern int alpha_l2_cacheshape; diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 9e45f6735d5d..6fe4e9deeb5e 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -322,7 +322,7 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) extern void paging_init(void); -/* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT. */ +/* We have our own get_unmapped_area */ #define HAVE_ARCH_UNMAPPED_AREA #endif /* _ALPHA_PGTABLE_H */ diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h index 714abe494e5f..916a2dd782c1 100644 --- a/arch/alpha/include/asm/processor.h +++ b/arch/alpha/include/asm/processor.h @@ -8,23 +8,19 @@ #ifndef __ASM_ALPHA_PROCESSOR_H #define __ASM_ALPHA_PROCESSOR_H -#include /* for ADDR_LIMIT_32BIT */ - /* * We have a 42-bit user address space: 4TB user VM... */ #define TASK_SIZE (0x40000000000UL) -#define STACK_TOP \ - (current->personality & ADDR_LIMIT_32BIT ? 0x80000000 : 0x00120000000UL) +#define STACK_TOP (0x00120000000UL) #define STACK_TOP_MAX 0x00120000000UL /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ -#define TASK_UNMAPPED_BASE \ - ((current->personality & ADDR_LIMIT_32BIT) ? 0x40000000 : TASK_SIZE / 2) +#define TASK_UNMAPPED_BASE (TASK_SIZE / 2) /* This is dead. Everything has been moved to thread_info. */ struct thread_struct { }; diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index c54469b369cb..03b70b6c9250 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1213,8 +1213,7 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) return ret; } -/* Get an address range which is currently unmapped. Similar to the - generic version except that we know how to honor ADDR_LIMIT_32BIT. */ +/* Get an address range which is currently unmapped. */ static unsigned long arch_get_unmapped_area_1(unsigned long addr, unsigned long len, @@ -1236,13 +1235,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - unsigned long limit; - - /* "32 bit" actually means 31 bit, since pointers sign extend. */ - if (current->personality & ADDR_LIMIT_32BIT) - limit = 0x80000000; - else - limit = TASK_SIZE; + unsigned long limit = TASK_SIZE; if (len > limit) return -ENOMEM; From 89811c6208d4bc6d56edb6db1e7dfef5a994ebb0 Mon Sep 17 00:00:00 2001 From: Brahmajit Das Date: Tue, 21 Jan 2025 21:56:48 +0530 Subject: [PATCH 40/71] vboxsf: fix building with GCC 15 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4e7487245abcbc5a1a1aea54e4d3b33c53804bda ] Building with GCC 15 results in build error fs/vboxsf/super.c:24:54: error: initializer-string for array of ‘unsigned char’ is too long [-Werror=unterminated-string-initialization] 24 | static const unsigned char VBSF_MOUNT_SIGNATURE[4] = "\000\377\376\375"; | ^~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors Due to GCC having enabled -Werror=unterminated-string-initialization[0] by default. Separately initializing each array element of VBSF_MOUNT_SIGNATURE to ensure NUL termination, thus satisfying GCC 15 and fixing the build error. [0]: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wno-unterminated-string-initialization Signed-off-by: Brahmajit Das Link: https://lore.kernel.org/r/20250121162648.1408743-1-brahmajit.xyz@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/vboxsf/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index 74952e58cca0..48f33d4994dc 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -21,7 +21,8 @@ #define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */ -static const unsigned char VBSF_MOUNT_SIGNATURE[4] = "\000\377\376\375"; +static const unsigned char VBSF_MOUNT_SIGNATURE[4] = { '\000', '\377', '\376', + '\375' }; static int follow_symlinks; module_param(follow_symlinks, int, 0444); From fc16b17906a6b90a546df8a7b469444476e531ca Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Wed, 22 Jan 2025 09:29:00 +0800 Subject: [PATCH 41/71] HID: intel-ish-hid: fix the length of MNG_SYNC_FW_CLOCK in doorbell [ Upstream commit 4b54ae69197b9f416baa0fceadff7e89075f8454 ] The timestamps in the Firmware log and HID sensor samples are incorrect. They show 1970-01-01 because the current IPC driver only uses the first 8 bytes of bootup time when synchronizing time with the firmware. The firmware converts the bootup time to UTC time, which results in the display of 1970-01-01. In write_ipc_from_queue(), when sending the MNG_SYNC_FW_CLOCK message, the clock is updated according to the definition of ipc_time_update_msg. However, in _ish_sync_fw_clock(), the message length is specified as the size of uint64_t when building the doorbell. As a result, the firmware only receives the first 8 bytes of struct ipc_time_update_msg. This patch corrects the length in the doorbell to ensure the entire ipc_time_update_msg is sent, fixing the timestamp issue. Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/intel-ish-hid/ipc/ipc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c index dd5fc60874ba..b1a41c90c574 100644 --- a/drivers/hid/intel-ish-hid/ipc/ipc.c +++ b/drivers/hid/intel-ish-hid/ipc/ipc.c @@ -577,14 +577,14 @@ static void fw_reset_work_fn(struct work_struct *unused) static void _ish_sync_fw_clock(struct ishtp_device *dev) { static unsigned long prev_sync; - uint64_t usec; + struct ipc_time_update_msg time = {}; if (prev_sync && time_before(jiffies, prev_sync + 20 * HZ)) return; prev_sync = jiffies; - usec = ktime_to_us(ktime_get_boottime()); - ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &usec, sizeof(uint64_t)); + /* The fields of time would be updated while sending message */ + ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &time, sizeof(time)); } /** From 3358a3dee6a44503226dfdd327118a09cbd28561 Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Wed, 22 Jan 2025 09:29:01 +0800 Subject: [PATCH 42/71] HID: intel-ish-hid: Send clock sync message immediately after reset [ Upstream commit 7e0d1cff12b895f44f4ddc8cf50311bc1f775201 ] The ISH driver performs a clock sync with the firmware once at system startup and then every 20 seconds. If a firmware reset occurs right after a clock sync, the driver would wait 20 seconds before performing another clock sync with the firmware. This is particularly problematic with the introduction of the "load firmware from host" feature, where the driver performs a clock sync with the bootloader and then has to wait 20 seconds before syncing with the main firmware. This patch clears prev_sync immediately upon receiving an IPC reset, so that the main firmware and driver will perform a clock sync immediately after completing the IPC handshake. Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/intel-ish-hid/ipc/ipc.c | 9 ++++++--- drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h | 2 ++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c index b1a41c90c574..42141a78bdb4 100644 --- a/drivers/hid/intel-ish-hid/ipc/ipc.c +++ b/drivers/hid/intel-ish-hid/ipc/ipc.c @@ -517,6 +517,10 @@ static int ish_fw_reset_handler(struct ishtp_device *dev) /* ISH FW is dead */ if (!ish_is_input_ready(dev)) return -EPIPE; + + /* Send clock sync at once after reset */ + ishtp_dev->prev_sync = 0; + /* * Set HOST2ISH.ILUP. Apparently we need this BEFORE sending * RESET_NOTIFY_ACK - FW will be checking for it @@ -576,13 +580,12 @@ static void fw_reset_work_fn(struct work_struct *unused) */ static void _ish_sync_fw_clock(struct ishtp_device *dev) { - static unsigned long prev_sync; struct ipc_time_update_msg time = {}; - if (prev_sync && time_before(jiffies, prev_sync + 20 * HZ)) + if (dev->prev_sync && time_before(jiffies, dev->prev_sync + 20 * HZ)) return; - prev_sync = jiffies; + dev->prev_sync = jiffies; /* The fields of time would be updated while sending message */ ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &time, sizeof(time)); } diff --git a/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h b/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h index 32142c7d9a04..9b2ee3fe04b8 100644 --- a/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h +++ b/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h @@ -212,6 +212,8 @@ struct ishtp_device { unsigned int ipc_tx_cnt; unsigned long long ipc_tx_bytes_cnt; + /* Time of the last clock sync */ + unsigned long prev_sync; const struct ishtp_hw_ops *ops; size_t mtu; uint32_t ishtp_msg_hdr; From 9acdb0059fb6b82158e15adae91e629cb5974564 Mon Sep 17 00:00:00 2001 From: "Chia-Lin Kao (AceLan)" Date: Wed, 15 Jan 2025 15:00:20 +0800 Subject: [PATCH 43/71] HID: ignore non-functional sensor in HP 5MP Camera [ Upstream commit 363236d709e75610b628c2a4337ccbe42e454b6d ] The HP 5MP Camera (USB ID 0408:5473) reports a HID sensor interface that is not actually implemented. Attempting to access this non-functional sensor via iio_info causes system hangs as runtime PM tries to wake up an unresponsive sensor. [453] hid-sensor-hub 0003:0408:5473.0003: Report latency attributes: ffffffff:ffffffff [453] hid-sensor-hub 0003:0408:5473.0003: common attributes: 5:1, 2:1, 3:1 ffffffff:ffffffff Add this device to the HID ignore list since the sensor interface is non-functional by design and should not be exposed to userspace. Signed-off-by: Chia-Lin Kao (AceLan) Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index d8c5e24e7d44..29b8c96ce9f4 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1072,6 +1072,7 @@ #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3001 0x3001 #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3003 0x3003 #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008 0x3008 +#define USB_DEVICE_ID_QUANTA_HP_5MP_CAMERA_5473 0x5473 #define I2C_VENDOR_ID_RAYDIUM 0x2386 #define I2C_PRODUCT_ID_RAYDIUM_4B33 0x4b33 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index debc49272a5c..875c44e5cf6c 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -882,6 +882,7 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_DPAD) }, #endif { HID_USB_DEVICE(USB_VENDOR_ID_YEALINK, USB_DEVICE_ID_YEALINK_P1K_P4K_B2K) }, + { HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_HP_5MP_CAMERA_5473) }, { } }; From b3047f4c4a54716999dcbfce9f10f03594392858 Mon Sep 17 00:00:00 2001 From: Ievgen Vovk Date: Sun, 12 Jan 2025 13:13:14 +0900 Subject: [PATCH 44/71] HID: hid-apple: Apple Magic Keyboard a3203 USB-C support [ Upstream commit 2813e00dcd748cef47d2bffaa04071de93fddf00 ] Add Apple Magic Keyboard 2024 model (with USB-C port) device ID (0320) to those recognized by the hid-apple driver. Keyboard is otherwise compatible with the existing implementation for its earlier 2021 model. Signed-off-by: Ievgen Vovk Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-apple.c | 5 +++++ drivers/hid/hid-ids.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 37b2ce9b50fe..da8751601534 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -425,6 +425,7 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input, hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2015) table = magic_keyboard_2015_fn_keys; else if (hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021 || + hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2024 || hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021 || hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021) table = apple2021_fn_keys; @@ -1030,6 +1031,10 @@ static const struct hid_device_id apple_devices[] = { .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY }, { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021), .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2024), + .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY }, + { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2024), + .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021), .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY }, { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021), diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 29b8c96ce9f4..4187d890bcc1 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -184,6 +184,7 @@ #define USB_DEVICE_ID_APPLE_IRCONTROL4 0x8242 #define USB_DEVICE_ID_APPLE_IRCONTROL5 0x8243 #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021 0x029c +#define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2024 0x0320 #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021 0x029a #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021 0x029f #define USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT 0x8102 From 62a4c7ac84d5ff8dda506f0575f2184b9fdc7318 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Thu, 16 Jan 2025 23:12:17 -0700 Subject: [PATCH 45/71] HID: apple: fix up the F6 key on the Omoton KB066 keyboard [ Upstream commit 819083cb6eedcc8495cbf84845877bcc741b93b3 ] The Omoton KB066 is an Apple A1255 keyboard clone (HID product code 05ac:022c). On both keyboards, the F6 key becomes Num Lock when the Fn key is held. But unlike its Apple exemplar, when the Omoton's F6 key is pressed without Fn, it sends the usage code 0xC0301 from the reserved section of the consumer page instead of the standard F6 usage code 0x7003F from the keyboard page. The nonstandard code is translated to KEY_UNKNOWN and becomes useless on Linux. The Omoton KB066 is a pretty popular keyboard, judging from its 29,058 reviews on Amazon at time of writing, so let's account for its quirk to make it more usable. By the way, it would be nice if we could automatically set fnmode to 0 for Omoton keyboards because they handle the Fn key internally and the kernel's Fn key handling creates undesirable side effects such as making F1 and F2 always Brightness Up and Brightness Down in fnmode=1 (the default) or always F1 and F2 in fnmode=2. Unfortunately I don't think there's a way to identify Bluetooth keyboards more specifically than the HID product code which is obviously inaccurate. Users of Omoton keyboards will just have to set fnmode to 0 manually to get full Fn key functionality. Signed-off-by: Alex Henrie Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-apple.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index da8751601534..86a5e09544c4 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -497,6 +497,9 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input, } } + if (usage->hid == 0xc0301) /* Omoton KB066 quirk */ + code = KEY_F6; + if (usage->code != code) { input_event_with_scancode(input, usage->type, code, usage->hid, value); From 5932970c3f81caed445aedcd9218433c791dbe6d Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 29 Jan 2025 20:53:03 +0100 Subject: [PATCH 46/71] sched: Clarify wake_up_q()'s write to task->wake_q.next [ Upstream commit bcc6244e13b4d4903511a1ea84368abf925031c0 ] Clarify that wake_up_q() does an atomic write to task->wake_q.next, after which a concurrent __wake_q_add() can immediately overwrite task->wake_q.next again. Signed-off-by: Jann Horn Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250129-sched-wakeup-prettier-v1-1-2f51f5f663fa@google.com Signed-off-by: Sasha Levin --- kernel/sched/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9be8a509b5f3..9b01fdceb622 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -997,9 +997,10 @@ void wake_up_q(struct wake_q_head *head) struct task_struct *task; task = container_of(node, struct task_struct, wake_q); - /* Task can safely be re-inserted now: */ node = node->next; - task->wake_q.next = NULL; + /* pairs with cmpxchg_relaxed() in __wake_q_add() */ + WRITE_ONCE(task->wake_q.next, NULL); + /* Task can safely be re-inserted now. */ /* * wake_up_process() executes a full barrier, which pairs with From 96850a2a90929e11f616c4732a358b3e59e40b9b Mon Sep 17 00:00:00 2001 From: Sybil Isabel Dorsett Date: Mon, 3 Feb 2025 16:33:15 +0000 Subject: [PATCH 47/71] platform/x86: thinkpad_acpi: Fix invalid fan speed on ThinkPad X120e MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1046cac109225eda0973b898e053aeb3d6c10e1d ] On ThinkPad X120e, fan speed is reported in ticks per revolution rather than RPM. Recalculate the fan speed value reported for ThinkPad X120e to RPM based on a 22.5 kHz clock. Based on the information on https://www.thinkwiki.org/wiki/How_to_control_fan_speed, the same problem is highly likely to be relevant to at least Edge11, but Edge11 is not addressed in this patch. Signed-off-by: Sybil Isabel Dorsett Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20250203163255.5525-1-sybdorsett@proton.me Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/x86/thinkpad_acpi.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index a57e236be050..a8b6f38d344f 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -8161,6 +8161,7 @@ static struct ibm_struct volume_driver_data = { #define FAN_NS_CTRL_STATUS BIT(2) /* Bit which determines control is enabled or not */ #define FAN_NS_CTRL BIT(4) /* Bit which determines control is by host or EC */ +#define FAN_CLOCK_TPM (22500*60) /* Ticks per minute for a 22.5 kHz clock */ enum { /* Fan control constants */ fan_status_offset = 0x2f, /* EC register 0x2f */ @@ -8214,6 +8215,7 @@ static int fan_watchdog_maxinterval; static bool fan_with_ns_addr; static bool ecfw_with_fan_dec_rpm; +static bool fan_speed_in_tpr; static struct mutex fan_mutex; @@ -8396,8 +8398,11 @@ static int fan_get_speed(unsigned int *speed) !acpi_ec_read(fan_rpm_offset + 1, &hi))) return -EIO; - if (likely(speed)) + if (likely(speed)) { *speed = (hi << 8) | lo; + if (fan_speed_in_tpr && *speed != 0) + *speed = FAN_CLOCK_TPM / *speed; + } break; case TPACPI_FAN_RD_TPEC_NS: if (!acpi_ec_read(fan_rpm_status_ns, &lo)) @@ -8430,8 +8435,11 @@ static int fan2_get_speed(unsigned int *speed) if (rc) return -EIO; - if (likely(speed)) + if (likely(speed)) { *speed = (hi << 8) | lo; + if (fan_speed_in_tpr && *speed != 0) + *speed = FAN_CLOCK_TPM / *speed; + } break; case TPACPI_FAN_RD_TPEC_NS: @@ -8959,6 +8967,7 @@ static const struct attribute_group fan_driver_attr_group = { #define TPACPI_FAN_NOFAN 0x0008 /* no fan available */ #define TPACPI_FAN_NS 0x0010 /* For EC with non-Standard register addresses */ #define TPACPI_FAN_DECRPM 0x0020 /* For ECFW's with RPM in register as decimal */ +#define TPACPI_FAN_TPR 0x0040 /* Fan speed is in Ticks Per Revolution */ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_QEC_IBM('1', 'Y', TPACPI_FAN_Q1), @@ -8981,6 +8990,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_Q_LNV3('N', '2', 'U', TPACPI_FAN_NS), /* X13 Yoga Gen 2*/ TPACPI_Q_LNV3('N', '1', 'O', TPACPI_FAN_NOFAN), /* X1 Tablet (2nd gen) */ TPACPI_Q_LNV3('R', '0', 'Q', TPACPI_FAN_DECRPM),/* L480 */ + TPACPI_Q_LNV('8', 'F', TPACPI_FAN_TPR), /* ThinkPad x120e */ }; static int __init fan_init(struct ibm_init_struct *iibm) @@ -9044,6 +9054,8 @@ static int __init fan_init(struct ibm_init_struct *iibm) if (quirks & TPACPI_FAN_Q1) fan_quirk1_setup(); + if (quirks & TPACPI_FAN_TPR) + fan_speed_in_tpr = true; /* Try and probe the 2nd fan */ tp_features.second_fan = 1; /* needed for get_speed to work */ res = fan2_get_speed(&speed); From 4209d21f6f7a3d079fd5cd65683613d8f18d3592 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Thu, 6 Feb 2025 14:39:41 -0500 Subject: [PATCH 48/71] platform/x86: thinkpad_acpi: Support for V9 DYTC platform profiles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9cff907cbf8c7fb5345918dbcc7b74a01656f34f ] Newer Thinkpad AMD platforms are using V9 DYTC and this changes the profiles used for PSC mode. Add support for this update. Tested on P14s G5 AMD Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20250206193953.58365-1-mpearson-lenovo@squebb.ca Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/x86/thinkpad_acpi.c | 34 ++++++++++++++++++---------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index a8b6f38d344f..26ca9c453a59 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10485,6 +10485,10 @@ static struct ibm_struct proxsensor_driver_data = { #define DYTC_MODE_PSC_BALANCE 5 /* Default mode aka balanced */ #define DYTC_MODE_PSC_PERFORM 7 /* High power mode aka performance */ +#define DYTC_MODE_PSCV9_LOWPOWER 1 /* Low power mode */ +#define DYTC_MODE_PSCV9_BALANCE 3 /* Default mode aka balanced */ +#define DYTC_MODE_PSCV9_PERFORM 4 /* High power mode aka performance */ + #define DYTC_ERR_MASK 0xF /* Bits 0-3 in cmd result are the error result */ #define DYTC_ERR_SUCCESS 1 /* CMD completed successful */ @@ -10505,6 +10509,10 @@ static int dytc_capabilities; static bool dytc_mmc_get_available; static int profile_force; +static int platform_psc_profile_lowpower = DYTC_MODE_PSC_LOWPOWER; +static int platform_psc_profile_balanced = DYTC_MODE_PSC_BALANCE; +static int platform_psc_profile_performance = DYTC_MODE_PSC_PERFORM; + static int convert_dytc_to_profile(int funcmode, int dytcmode, enum platform_profile_option *profile) { @@ -10526,19 +10534,15 @@ static int convert_dytc_to_profile(int funcmode, int dytcmode, } return 0; case DYTC_FUNCTION_PSC: - switch (dytcmode) { - case DYTC_MODE_PSC_LOWPOWER: + if (dytcmode == platform_psc_profile_lowpower) *profile = PLATFORM_PROFILE_LOW_POWER; - break; - case DYTC_MODE_PSC_BALANCE: + else if (dytcmode == platform_psc_profile_balanced) *profile = PLATFORM_PROFILE_BALANCED; - break; - case DYTC_MODE_PSC_PERFORM: + else if (dytcmode == platform_psc_profile_performance) *profile = PLATFORM_PROFILE_PERFORMANCE; - break; - default: /* Unknown mode */ + else return -EINVAL; - } + return 0; case DYTC_FUNCTION_AMT: /* For now return balanced. It's the closest we have to 'auto' */ @@ -10559,19 +10563,19 @@ static int convert_profile_to_dytc(enum platform_profile_option profile, int *pe if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_LOWPOWER; else if (dytc_capabilities & BIT(DYTC_FC_PSC)) - *perfmode = DYTC_MODE_PSC_LOWPOWER; + *perfmode = platform_psc_profile_lowpower; break; case PLATFORM_PROFILE_BALANCED: if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_BALANCE; else if (dytc_capabilities & BIT(DYTC_FC_PSC)) - *perfmode = DYTC_MODE_PSC_BALANCE; + *perfmode = platform_psc_profile_balanced; break; case PLATFORM_PROFILE_PERFORMANCE: if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_PERFORM; else if (dytc_capabilities & BIT(DYTC_FC_PSC)) - *perfmode = DYTC_MODE_PSC_PERFORM; + *perfmode = platform_psc_profile_performance; break; default: /* Unknown profile */ return -EOPNOTSUPP; @@ -10760,6 +10764,7 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) if (output & BIT(DYTC_QUERY_ENABLE_BIT)) dytc_version = (output >> DYTC_QUERY_REV_BIT) & 0xF; + dbg_printk(TPACPI_DBG_INIT, "DYTC version %d\n", dytc_version); /* Check DYTC is enabled and supports mode setting */ if (dytc_version < 5) return -ENODEV; @@ -10798,6 +10803,11 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) } } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) { /* PSC MODE */ pr_debug("PSC is supported\n"); + if (dytc_version >= 9) { /* update profiles for DYTC 9 and up */ + platform_psc_profile_lowpower = DYTC_MODE_PSCV9_LOWPOWER; + platform_psc_profile_balanced = DYTC_MODE_PSCV9_BALANCE; + platform_psc_profile_performance = DYTC_MODE_PSCV9_PERFORM; + } } else { dbg_printk(TPACPI_DBG_INIT, "No DYTC support available\n"); return -ENODEV; From 4d293411ad0b6a590f536d1e02d497333cb6629d Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Fri, 31 Jan 2025 12:02:55 +0100 Subject: [PATCH 49/71] s390/cio: Fix CHPID "configure" attribute caching [ Upstream commit 32ae4a2992529e2c7934e422035fad1d9b0f1fb5 ] In some environments, the SCLP firmware interface used to query a CHPID's configured state is not supported. On these environments, rapidly reading the corresponding sysfs attribute produces inconsistent results: $ cat /sys/devices/css0/chp0.00/configure cat: /sys/devices/css0/chp0.00/configure: Operation not supported $ cat /sys/devices/css0/chp0.00/configure 3 This occurs for example when Linux is run as a KVM guest. The inconsistency is a result of CIO using cached results for generating the value of the "configure" attribute while failing to handle the situation where no data was returned by SCLP. Fix this by not updating the cache-expiration timestamp when SCLP returns no data. With the fix applied, the system response is consistent: $ cat /sys/devices/css0/chp0.00/configure cat: /sys/devices/css0/chp0.00/configure: Operation not supported $ cat /sys/devices/css0/chp0.00/configure cat: /sys/devices/css0/chp0.00/configure: Operation not supported Reviewed-by: Vineeth Vijayan Reviewed-by: Eric Farman Tested-by: Eric Farman Signed-off-by: Peter Oberparleiter Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- drivers/s390/cio/chp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index 5440f285f349..7e00c061538d 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -661,7 +661,8 @@ static int info_update(void) if (time_after(jiffies, chp_info_expires)) { /* Data is too old, update. */ rc = sclp_chp_read_info(&chp_info); - chp_info_expires = jiffies + CHP_INFO_UPDATE_INTERVAL ; + if (!rc) + chp_info_expires = jiffies + CHP_INFO_UPDATE_INTERVAL; } mutex_unlock(&info_lock); From 6c31c8761a1bfd36a8c18eb7e4816dd39e6665cb Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 11 Feb 2025 09:47:11 +0100 Subject: [PATCH 50/71] thermal/cpufreq_cooling: Remove structure member documentation [ Upstream commit a6768c4f92e152265590371975d44c071a5279c7 ] The structure member documentation refers to a member which does not exist any more. Remove it. Link: https://lore.kernel.org/all/202501220046.h3PMBCti-lkp@intel.com/ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501220046.h3PMBCti-lkp@intel.com/ Signed-off-by: Daniel Lezcano Acked-by: Viresh Kumar Link: https://patch.msgid.link/20250211084712.2746705-1-daniel.lezcano@linaro.org [ rjw: Minor changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/thermal/cpufreq_cooling.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c index 9f8b438fcf8f..bac0f52a361b 100644 --- a/drivers/thermal/cpufreq_cooling.c +++ b/drivers/thermal/cpufreq_cooling.c @@ -57,8 +57,6 @@ struct time_in_idle { * @max_level: maximum cooling level. One less than total number of valid * cpufreq frequencies. * @em: Reference on the Energy Model of the device - * @cdev: thermal_cooling_device pointer to keep track of the - * registered cooling device. * @policy: cpufreq policy. * @cooling_ops: cpufreq callbacks to thermal cooling device ops * @idle_time: idle time stats From 1a95cff6e15945e8bc7a4e4cec9f9b85f0fb08e7 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 12 Feb 2025 16:14:38 +0100 Subject: [PATCH 51/71] Xen/swiotlb: mark xen_swiotlb_fixup() __init [ Upstream commit 75ad02318af2e4ae669e26a79f001bd5e1f97472 ] It's sole user (pci_xen_swiotlb_init()) is __init, too. Signed-off-by: Jan Beulich Reviewed-by: Stefano Stabellini Message-ID: Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/swiotlb-xen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 0893c1012de6..fe52c8cbf136 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -112,7 +112,7 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr) } #ifdef CONFIG_X86 -int xen_swiotlb_fixup(void *buf, unsigned long nslabs) +int __init xen_swiotlb_fixup(void *buf, unsigned long nslabs) { int rc; unsigned int order = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT); From ce0bdc1a748fc8fa5fbd5919f16019864b36adbe Mon Sep 17 00:00:00 2001 From: Edson Juliano Drosdeck Date: Sat, 1 Feb 2025 11:39:30 -0300 Subject: [PATCH 52/71] ALSA: hda/realtek: Limit mic boost on Positivo ARN50 [ Upstream commit 76b0a22d4cf7dc9091129560fdc04e73eb9db4cb ] The internal mic boost on the Positivo ARN50 is too high. Fix this by applying the ALC269_FIXUP_LIMIT_INT_MIC_BOOST fixup to the machine to limit the gain. Signed-off-by: Edson Juliano Drosdeck Link: https://patch.msgid.link/20250201143930.25089-1-edson.drosdeck@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e5e222e74d78..102af43f7442 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10333,6 +10333,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1f66, 0x0105, "Ayaneo Portable Game Player", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x2014, 0x800a, "Positivo ARN50", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x2782, 0x0214, "VAIO VJFE-CL", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x2782, 0x0228, "Infinix ZERO BOOK 13", ALC269VB_FIXUP_INFINIX_ZERO_BOOK_13), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), From 1ffc9e94238ce1498b143eef6f4fb1d1b35743cf Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 5 Feb 2025 00:20:42 +0000 Subject: [PATCH 53/71] ASoC: rsnd: don't indicate warning on rsnd_kctrl_accept_runtime() [ Upstream commit c3fc002b206c6c83d1e3702b979733002ba6fb2c ] rsnd_kctrl_accept_runtime() (1) is used for runtime convert rate (= Synchronous SRC Mode). Now, rsnd driver has 2 kctrls for it (A): "SRC Out Rate Switch" (B): "SRC Out Rate" // it calls (1) (A): can be called anytime (B): can be called only runtime, and will indicate warning if it was used at non-runtime. To use runtime convert rate (= Synchronous SRC Mode), user might uses command in below order. (X): > amixer set "SRC Out Rate" on > aplay xxx.wav & (Y): > amixer set "SRC Out Rate" 48010 // convert rate to 48010Hz (Y): calls B (X): calls both A and B. In this case, when user calls (X), it calls both (A) and (B), but it is not yet start running. So, (B) will indicate warning. This warning was added by commit b5c088689847 ("ASoC: rsnd: add warning message to rsnd_kctrl_accept_runtime()"), but the message sounds like the operation was not correct. Let's update warning message. The message is very SRC specific, implement it in src.c Signed-off-by: Kuninori Morimoto Reviewed-by: Yoshihiro Shimoda Link: https://patch.msgid.link/8734gt2qed.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sh/rcar/core.c | 14 -------------- sound/soc/sh/rcar/rsnd.h | 1 - sound/soc/sh/rcar/src.c | 18 +++++++++++++++++- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 7e380d71b0f8..0964b4e3fbdf 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1694,20 +1694,6 @@ int rsnd_kctrl_accept_anytime(struct rsnd_dai_stream *io) return 1; } -int rsnd_kctrl_accept_runtime(struct rsnd_dai_stream *io) -{ - struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); - struct rsnd_priv *priv = rsnd_io_to_priv(io); - struct device *dev = rsnd_priv_to_dev(priv); - - if (!runtime) { - dev_warn(dev, "Can't update kctrl when idle\n"); - return 0; - } - - return 1; -} - struct rsnd_kctrl_cfg *rsnd_kctrl_init_m(struct rsnd_kctrl_cfg_m *cfg) { cfg->cfg.val = cfg->val; diff --git a/sound/soc/sh/rcar/rsnd.h b/sound/soc/sh/rcar/rsnd.h index f8ef6836ef84..690f4932357c 100644 --- a/sound/soc/sh/rcar/rsnd.h +++ b/sound/soc/sh/rcar/rsnd.h @@ -742,7 +742,6 @@ struct rsnd_kctrl_cfg_s { #define rsnd_kctrl_vals(x) ((x).val) /* = (x).cfg.val[0] */ int rsnd_kctrl_accept_anytime(struct rsnd_dai_stream *io); -int rsnd_kctrl_accept_runtime(struct rsnd_dai_stream *io); struct rsnd_kctrl_cfg *rsnd_kctrl_init_m(struct rsnd_kctrl_cfg_m *cfg); struct rsnd_kctrl_cfg *rsnd_kctrl_init_s(struct rsnd_kctrl_cfg_s *cfg); int rsnd_kctrl_new(struct rsnd_mod *mod, diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c index f832165e46bc..9893839666d7 100644 --- a/sound/soc/sh/rcar/src.c +++ b/sound/soc/sh/rcar/src.c @@ -530,6 +530,22 @@ static irqreturn_t rsnd_src_interrupt(int irq, void *data) return IRQ_HANDLED; } +static int rsnd_src_kctrl_accept_runtime(struct rsnd_dai_stream *io) +{ + struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); + + if (!runtime) { + struct rsnd_priv *priv = rsnd_io_to_priv(io); + struct device *dev = rsnd_priv_to_dev(priv); + + dev_warn(dev, "\"SRC Out Rate\" can use during running\n"); + + return 0; + } + + return 1; +} + static int rsnd_src_probe_(struct rsnd_mod *mod, struct rsnd_dai_stream *io, struct rsnd_priv *priv) @@ -593,7 +609,7 @@ static int rsnd_src_pcm_new(struct rsnd_mod *mod, rsnd_io_is_play(io) ? "SRC Out Rate" : "SRC In Rate", - rsnd_kctrl_accept_runtime, + rsnd_src_kctrl_accept_runtime, rsnd_src_set_convert_rate, &src->sync, 192000); From 17458c1193bbc3f4851503b7838d8b8940a85a7a Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 5 Feb 2025 00:20:48 +0000 Subject: [PATCH 54/71] ASoC: rsnd: adjust convert rate limitation [ Upstream commit 89f9cf185885d4358aa92b48e51d0f09b71775aa ] Current rsnd driver supports Synchronous SRC Mode, but HW allow to update rate only within 1% from current rate. Adjust to it. Becially, this feature is used to fine-tune subtle difference that occur during sampling rate conversion in SRC. So, it should be called within 1% margin of rate difference. If there was difference over 1%, it will apply with 1% increments by using loop without indicating error message. Cc: Yoshihiro Shimoda Signed-off-by: Kuninori Morimoto Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://patch.msgid.link/871pwd2qe8.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sh/rcar/src.c | 98 ++++++++++++++++++++++++++++++++--------- 1 file changed, 76 insertions(+), 22 deletions(-) diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c index 9893839666d7..e985681363e2 100644 --- a/sound/soc/sh/rcar/src.c +++ b/sound/soc/sh/rcar/src.c @@ -34,6 +34,7 @@ struct rsnd_src { struct rsnd_mod *dma; struct rsnd_kctrl_cfg_s sen; /* sync convert enable */ struct rsnd_kctrl_cfg_s sync; /* sync convert */ + u32 current_sync_rate; int irq; }; @@ -99,7 +100,7 @@ static u32 rsnd_src_convert_rate(struct rsnd_dai_stream *io, if (!rsnd_src_sync_is_enabled(mod)) return rsnd_io_converted_rate(io); - convert_rate = src->sync.val; + convert_rate = src->current_sync_rate; if (!convert_rate) convert_rate = rsnd_io_converted_rate(io); @@ -200,13 +201,73 @@ static const u32 chan222222[] = { static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, struct rsnd_mod *mod) { + struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); + struct rsnd_priv *priv = rsnd_mod_to_priv(mod); + struct rsnd_src *src = rsnd_mod_to_src(mod); + u32 fin, fout, new_rate; + int inc, cnt, rate; + u64 base, val; + + if (!runtime) + return; + + if (!rsnd_src_sync_is_enabled(mod)) + return; + + fin = rsnd_src_get_in_rate(priv, io); + fout = rsnd_src_get_out_rate(priv, io); + + new_rate = src->sync.val; + + if (!new_rate) + new_rate = fout; + + /* Do nothing if no diff */ + if (new_rate == src->current_sync_rate) + return; + + /* + * SRCm_IFSVR::INTIFS can change within 1% + * see + * SRCm_IFSVR::INTIFS Note + */ + inc = fout / 100; + cnt = abs(new_rate - fout) / inc; + if (fout > new_rate) + inc *= -1; + + /* + * After start running SRC, we can update only SRC_IFSVR + * for Synchronous Mode + */ + base = (u64)0x0400000 * fin; + rate = fout; + for (int i = 0; i < cnt; i++) { + val = base; + rate += inc; + do_div(val, rate); + + rsnd_mod_write(mod, SRC_IFSVR, val); + } + val = base; + do_div(val, new_rate); + + rsnd_mod_write(mod, SRC_IFSVR, val); + + /* update current_sync_rate */ + src->current_sync_rate = new_rate; +} + +static void rsnd_src_init_convert_rate(struct rsnd_dai_stream *io, + struct rsnd_mod *mod) +{ + struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); struct rsnd_priv *priv = rsnd_mod_to_priv(mod); struct device *dev = rsnd_priv_to_dev(priv); - struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); int is_play = rsnd_io_is_play(io); int use_src = 0; u32 fin, fout; - u32 ifscr, fsrate, adinr; + u32 ifscr, adinr; u32 cr, route; u32 i_busif, o_busif, tmp; const u32 *bsdsr_table; @@ -244,26 +305,15 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, adinr = rsnd_get_adinr_bit(mod, io) | chan; /* - * SRC_IFSCR / SRC_IFSVR - */ - ifscr = 0; - fsrate = 0; - if (use_src) { - u64 n; - - ifscr = 1; - n = (u64)0x0400000 * fin; - do_div(n, fout); - fsrate = n; - } - - /* + * SRC_IFSCR * SRC_SRCCR / SRC_ROUTE_MODE0 */ + ifscr = 0; cr = 0x00011110; route = 0x0; if (use_src) { route = 0x1; + ifscr = 0x1; if (rsnd_src_sync_is_enabled(mod)) { cr |= 0x1; @@ -334,7 +384,6 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, rsnd_mod_write(mod, SRC_SRCIR, 1); /* initialize */ rsnd_mod_write(mod, SRC_ADINR, adinr); rsnd_mod_write(mod, SRC_IFSCR, ifscr); - rsnd_mod_write(mod, SRC_IFSVR, fsrate); rsnd_mod_write(mod, SRC_SRCCR, cr); rsnd_mod_write(mod, SRC_BSDSR, bsdsr_table[idx]); rsnd_mod_write(mod, SRC_BSISR, bsisr_table[idx]); @@ -347,6 +396,9 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, rsnd_adg_set_src_timesel_gen2(mod, io, fin, fout); + /* update SRC_IFSVR */ + rsnd_src_set_convert_rate(io, mod); + return; convert_rate_err: @@ -466,7 +518,8 @@ static int rsnd_src_init(struct rsnd_mod *mod, int ret; /* reset sync convert_rate */ - src->sync.val = 0; + src->sync.val = + src->current_sync_rate = 0; ret = rsnd_mod_power_on(mod); if (ret < 0) @@ -474,7 +527,7 @@ static int rsnd_src_init(struct rsnd_mod *mod, rsnd_src_activation(mod); - rsnd_src_set_convert_rate(io, mod); + rsnd_src_init_convert_rate(io, mod); rsnd_src_status_clear(mod); @@ -492,7 +545,8 @@ static int rsnd_src_quit(struct rsnd_mod *mod, rsnd_mod_power_off(mod); /* reset sync convert_rate */ - src->sync.val = 0; + src->sync.val = + src->current_sync_rate = 0; return 0; } @@ -600,7 +654,7 @@ static int rsnd_src_pcm_new(struct rsnd_mod *mod, "SRC Out Rate Switch" : "SRC In Rate Switch", rsnd_kctrl_accept_anytime, - rsnd_src_set_convert_rate, + rsnd_src_init_convert_rate, &src->sen, 1); if (ret < 0) return ret; From 5d2ca607ad64f1e1f929be77f69fd9f51ee7c75f Mon Sep 17 00:00:00 2001 From: Vitaly Rodionov Date: Wed, 5 Feb 2025 16:08:46 +0000 Subject: [PATCH 55/71] ASoC: arizona/madera: use fsleep() in up/down DAPM event delays. [ Upstream commit 679074942c2502a95842a80471d8fb718165ac77 ] Using `fsleep` instead of `msleep` resolves some customer complaints regarding the precision of up/down DAPM event timing. `fsleep()` automatically selects the appropriate sleep function, making the delay time more predictable. Signed-off-by: Vitaly Rodionov Link: https://patch.msgid.link/20250205160849.500306-1-vitalyr@opensource.cirrus.com Reviewed-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/arizona.c | 14 +++++++------- sound/soc/codecs/madera.c | 10 +++++----- sound/soc/codecs/wm5110.c | 8 ++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c index 7434aeeda292..7a74941c608f 100644 --- a/sound/soc/codecs/arizona.c +++ b/sound/soc/codecs/arizona.c @@ -967,7 +967,7 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, case ARIZONA_OUT3L_ENA_SHIFT: case ARIZONA_OUT3R_ENA_SHIFT: priv->out_up_pending++; - priv->out_up_delay += 17; + priv->out_up_delay += 17000; break; case ARIZONA_OUT4L_ENA_SHIFT: case ARIZONA_OUT4R_ENA_SHIFT: @@ -977,7 +977,7 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, case WM8997: break; default: - priv->out_up_delay += 10; + priv->out_up_delay += 10000; break; } break; @@ -999,7 +999,7 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, if (!priv->out_up_pending && priv->out_up_delay) { dev_dbg(component->dev, "Power up delay: %d\n", priv->out_up_delay); - msleep(priv->out_up_delay); + fsleep(priv->out_up_delay); priv->out_up_delay = 0; } break; @@ -1017,7 +1017,7 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, case ARIZONA_OUT3L_ENA_SHIFT: case ARIZONA_OUT3R_ENA_SHIFT: priv->out_down_pending++; - priv->out_down_delay++; + priv->out_down_delay += 1000; break; case ARIZONA_OUT4L_ENA_SHIFT: case ARIZONA_OUT4R_ENA_SHIFT: @@ -1028,10 +1028,10 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, break; case WM8998: case WM1814: - priv->out_down_delay += 5; + priv->out_down_delay += 5000; break; default: - priv->out_down_delay++; + priv->out_down_delay += 1000; break; } break; @@ -1053,7 +1053,7 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, if (!priv->out_down_pending && priv->out_down_delay) { dev_dbg(component->dev, "Power down delay: %d\n", priv->out_down_delay); - msleep(priv->out_down_delay); + fsleep(priv->out_down_delay); priv->out_down_delay = 0; } break; diff --git a/sound/soc/codecs/madera.c b/sound/soc/codecs/madera.c index b9f19fbd2911..30e680ee1069 100644 --- a/sound/soc/codecs/madera.c +++ b/sound/soc/codecs/madera.c @@ -2322,10 +2322,10 @@ int madera_out_ev(struct snd_soc_dapm_widget *w, case CS42L92: case CS47L92: case CS47L93: - out_up_delay = 6; + out_up_delay = 6000; break; default: - out_up_delay = 17; + out_up_delay = 17000; break; } @@ -2356,7 +2356,7 @@ int madera_out_ev(struct snd_soc_dapm_widget *w, case MADERA_OUT3R_ENA_SHIFT: priv->out_up_pending--; if (!priv->out_up_pending) { - msleep(priv->out_up_delay); + fsleep(priv->out_up_delay); priv->out_up_delay = 0; } break; @@ -2375,7 +2375,7 @@ int madera_out_ev(struct snd_soc_dapm_widget *w, case MADERA_OUT3L_ENA_SHIFT: case MADERA_OUT3R_ENA_SHIFT: priv->out_down_pending++; - priv->out_down_delay++; + priv->out_down_delay += 1000; break; default: break; @@ -2392,7 +2392,7 @@ int madera_out_ev(struct snd_soc_dapm_widget *w, case MADERA_OUT3R_ENA_SHIFT: priv->out_down_pending--; if (!priv->out_down_pending) { - msleep(priv->out_down_delay); + fsleep(priv->out_down_delay); priv->out_down_delay = 0; } break; diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index e0b971620d0f..6db17349484c 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -302,7 +302,7 @@ static int wm5110_hp_pre_enable(struct snd_soc_dapm_widget *w) } else { wseq = wm5110_no_dre_left_enable; nregs = ARRAY_SIZE(wm5110_no_dre_left_enable); - priv->out_up_delay += 10; + priv->out_up_delay += 10000; } break; case ARIZONA_OUT1R_ENA_SHIFT: @@ -312,7 +312,7 @@ static int wm5110_hp_pre_enable(struct snd_soc_dapm_widget *w) } else { wseq = wm5110_no_dre_right_enable; nregs = ARRAY_SIZE(wm5110_no_dre_right_enable); - priv->out_up_delay += 10; + priv->out_up_delay += 10000; } break; default: @@ -338,7 +338,7 @@ static int wm5110_hp_pre_disable(struct snd_soc_dapm_widget *w) snd_soc_component_update_bits(component, ARIZONA_SPARE_TRIGGERS, ARIZONA_WS_TRG1, 0); - priv->out_down_delay += 27; + priv->out_down_delay += 27000; } break; case ARIZONA_OUT1R_ENA_SHIFT: @@ -350,7 +350,7 @@ static int wm5110_hp_pre_disable(struct snd_soc_dapm_widget *w) snd_soc_component_update_bits(component, ARIZONA_SPARE_TRIGGERS, ARIZONA_WS_TRG2, 0); - priv->out_down_delay += 27; + priv->out_down_delay += 27000; } break; default: From e6607c7008d8abe4ae25e52be6cd0a2bd2ca1897 Mon Sep 17 00:00:00 2001 From: Terry Cheong Date: Thu, 6 Feb 2025 11:47:23 +0200 Subject: [PATCH 56/71] ASoC: SOF: Intel: hda: add softdep pre to snd-hda-codec-hdmi module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 33b7dc7843dbdc9b90c91d11ba30b107f9138ffd ] In enviornment without KMOD requesting module may fail to load snd-hda-codec-hdmi, resulting in HDMI audio not usable. Add softdep to loading HDMI codec module first to ensure we can load it correctly. Signed-off-by: Terry Cheong Reviewed-by: Bard Liao Reviewed-by: Johny Lin Reviewed-by: Péter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://patch.msgid.link/20250206094723.18013-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sof/intel/hda-codec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sof/intel/hda-codec.c b/sound/soc/sof/intel/hda-codec.c index a0dfd7de431f..a75f81116643 100644 --- a/sound/soc/sof/intel/hda-codec.c +++ b/sound/soc/sof/intel/hda-codec.c @@ -282,6 +282,7 @@ int hda_codec_i915_exit(struct snd_sof_dev *sdev) } EXPORT_SYMBOL_NS(hda_codec_i915_exit, SND_SOC_SOF_HDA_AUDIO_CODEC_I915); +MODULE_SOFTDEP("pre: snd-hda-codec-hdmi"); #endif MODULE_LICENSE("Dual BSD/GPL"); From d81ee62948b61a5e737aadc6ddacc148a5f1a2bf Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 12 Feb 2025 12:15:35 +0100 Subject: [PATCH 57/71] net: wwan: mhi_wwan_mbim: Silence sequence number glitch errors [ Upstream commit 0d1fac6d26aff5df21bb4ec980d9b7a11c410b96 ] When using the Qualcomm X55 modem on the ThinkPad X13s, the kernel log is constantly being filled with errors related to a "sequence number glitch", e.g.: [ 1903.284538] sequence number glitch prev=16 curr=0 [ 1913.812205] sequence number glitch prev=50 curr=0 [ 1923.698219] sequence number glitch prev=142 curr=0 [ 2029.248276] sequence number glitch prev=1555 curr=0 [ 2046.333059] sequence number glitch prev=70 curr=0 [ 2076.520067] sequence number glitch prev=272 curr=0 [ 2158.704202] sequence number glitch prev=2655 curr=0 [ 2218.530776] sequence number glitch prev=2349 curr=0 [ 2225.579092] sequence number glitch prev=6 curr=0 Internet connectivity is working fine, so this error seems harmless. It looks like modem does not preserve the sequence number when entering low power state; the amount of errors depends on how actively the modem is being used. A similar issue has also been seen on USB-based MBIM modems [1]. However, in cdc_ncm.c the "sequence number glitch" message is a debug message instead of an error. Apply the same to the mhi_wwan_mbim.c driver to silence these errors when using the modem. [1]: https://lists.freedesktop.org/archives/libmbim-devel/2016-November/000781.html Signed-off-by: Stephan Gerhold Reviewed-by: Loic Poulain Acked-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250212-mhi-wwan-mbim-sequence-glitch-v1-1-503735977cbd@linaro.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/wwan/mhi_wwan_mbim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c index ef70bb7c88ad..43c20deab318 100644 --- a/drivers/net/wwan/mhi_wwan_mbim.c +++ b/drivers/net/wwan/mhi_wwan_mbim.c @@ -209,7 +209,7 @@ static int mbim_rx_verify_nth16(struct mhi_mbim_context *mbim, struct sk_buff *s if (mbim->rx_seq + 1 != le16_to_cpu(nth16->wSequence) && (mbim->rx_seq || le16_to_cpu(nth16->wSequence)) && !(mbim->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) { - net_err_ratelimited("sequence number glitch prev=%d curr=%d\n", + net_dbg_ratelimited("sequence number glitch prev=%d curr=%d\n", mbim->rx_seq, le16_to_cpu(nth16->wSequence)); } mbim->rx_seq = le16_to_cpu(nth16->wSequence); From f404cc4cde46e662af0767776f629c48f8436b9b Mon Sep 17 00:00:00 2001 From: Christopher Lentocha Date: Tue, 18 Feb 2025 08:59:29 -0500 Subject: [PATCH 58/71] nvme-pci: quirk Acer FA100 for non-uniqueue identifiers [ Upstream commit fcd875445866a5219cf2be3101e276b21fc843f3 ] In order for two Acer FA100 SSDs to work in one PC (in the case of myself, a Lenovo Legion T5 28IMB05), and not show one drive and not the other, and sometimes mix up what drive shows up (randomly), these two lines of code need to be added, and then both of the SSDs will show up and not conflict when booting off of one of them. If you boot up your computer with both SSDs installed without this patch, you may also randomly get into a kernel panic (if the initrd is not set up) or stuck in the initrd "/init" process, it is set up, however, if you do apply this patch, there should not be problems with booting or seeing both contents of the drive. Tested with the btrfs filesystem with a RAID configuration of having the root drive '/' combined to make two 256GB Acer FA100 SSDs become 512GB in total storage. Kernel Logs with patch applied (`dmesg -t | grep -i nvm`): ``` ... nvme 0000:04:00.0: platform quirk: setting simple suspend nvme nvme0: pci function 0000:04:00.0 nvme 0000:05:00.0: platform quirk: setting simple suspend nvme nvme1: pci function 0000:05:00.0 nvme nvme1: missing or invalid SUBNQN field. nvme nvme1: allocated 64 MiB host memory buffer. nvme nvme0: missing or invalid SUBNQN field. nvme nvme0: allocated 64 MiB host memory buffer. nvme nvme1: 8/0/0 default/read/poll queues nvme nvme1: Ignoring bogus Namespace Identifiers nvme nvme0: 8/0/0 default/read/poll queues nvme nvme0: Ignoring bogus Namespace Identifiers nvme0n1: p1 p2 ... ``` Kernel Logs with patch not applied (`dmesg -t | grep -i nvm`): ``` ... nvme 0000:04:00.0: platform quirk: setting simple suspend nvme nvme0: pci function 0000:04:00.0 nvme 0000:05:00.0: platform quirk: setting simple suspend nvme nvme1: pci function 0000:05:00.0 nvme nvme0: missing or invalid SUBNQN field. nvme nvme1: missing or invalid SUBNQN field. nvme nvme0: allocated 64 MiB host memory buffer. nvme nvme1: allocated 64 MiB host memory buffer. nvme nvme0: 8/0/0 default/read/poll queues nvme nvme1: 8/0/0 default/read/poll queues nvme nvme1: globally duplicate IDs for nsid 1 nvme nvme1: VID:DID 1dbe:5216 model:Acer SSD FA100 256GB firmware:1.Z.J.2X nvme0n1: p1 p2 ... ``` Signed-off-by: Christopher Lentocha Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f939b6dc295e..afcb9668dad9 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3577,6 +3577,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1cc1, 0x5350), /* ADATA XPG GAMMIX S50 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1dbe, 0x5216), /* Acer/INNOGRIT FA100/5216 NVMe SSD */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1dbe, 0x5236), /* ADATA XPG GAMMIX S70 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1e49, 0x0021), /* ZHITAI TiPro5000 NVMe SSD */ From 6eea8a5c1c649a8bbff0dc37522542885073bb5e Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 17 Feb 2025 17:08:27 +0100 Subject: [PATCH 59/71] nvme-tcp: add basic support for the C2HTermReq PDU [ Upstream commit 84e009042d0f3dfe91bec60bcd208ee3f866cbcd ] Previously, the NVMe/TCP host driver did not handle the C2HTermReq PDU, instead printing "unsupported pdu type (3)" when received. This patch adds support for processing the C2HTermReq PDU, allowing the driver to print the Fatal Error Status field. Example of output: nvme nvme4: Received C2HTermReq (FES = Invalid PDU Header Field) Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/tcp.c | 43 ++++++++++++++++++++++++++++++++++++++++ include/linux/nvme-tcp.h | 2 ++ 2 files changed, 45 insertions(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 29489c2c52fb..f7ef727ee44e 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -694,6 +694,40 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, return 0; } +static void nvme_tcp_handle_c2h_term(struct nvme_tcp_queue *queue, + struct nvme_tcp_term_pdu *pdu) +{ + u16 fes; + const char *msg; + u32 plen = le32_to_cpu(pdu->hdr.plen); + + static const char * const msg_table[] = { + [NVME_TCP_FES_INVALID_PDU_HDR] = "Invalid PDU Header Field", + [NVME_TCP_FES_PDU_SEQ_ERR] = "PDU Sequence Error", + [NVME_TCP_FES_HDR_DIGEST_ERR] = "Header Digest Error", + [NVME_TCP_FES_DATA_OUT_OF_RANGE] = "Data Transfer Out Of Range", + [NVME_TCP_FES_R2T_LIMIT_EXCEEDED] = "R2T Limit Exceeded", + [NVME_TCP_FES_UNSUPPORTED_PARAM] = "Unsupported Parameter", + }; + + if (plen < NVME_TCP_MIN_C2HTERM_PLEN || + plen > NVME_TCP_MAX_C2HTERM_PLEN) { + dev_err(queue->ctrl->ctrl.device, + "Received a malformed C2HTermReq PDU (plen = %u)\n", + plen); + return; + } + + fes = le16_to_cpu(pdu->fes); + if (fes && fes < ARRAY_SIZE(msg_table)) + msg = msg_table[fes]; + else + msg = "Unknown"; + + dev_err(queue->ctrl->ctrl.device, + "Received C2HTermReq (FES = %s)\n", msg); +} + static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, unsigned int *offset, size_t *len) { @@ -715,6 +749,15 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, return 0; hdr = queue->pdu; + if (unlikely(hdr->type == nvme_tcp_c2h_term)) { + /* + * C2HTermReq never includes Header or Data digests. + * Skip the checks. + */ + nvme_tcp_handle_c2h_term(queue, (void *)queue->pdu); + return -EINVAL; + } + if (queue->hdr_digest) { ret = nvme_tcp_verify_hdgst(queue, queue->pdu, hdr->hlen); if (unlikely(ret)) diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index 57ebe1267f7f..0278ce3ad1fb 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -13,6 +13,8 @@ #define NVME_TCP_ADMIN_CCSZ SZ_8K #define NVME_TCP_DIGEST_LENGTH 4 #define NVME_TCP_MIN_MAXH2CDATA 4096 +#define NVME_TCP_MIN_C2HTERM_PLEN 24 +#define NVME_TCP_MAX_C2HTERM_PLEN 152 enum nvme_tcp_pfv { NVME_TCP_PFV_1_0 = 0x0, From cd3f60e499bbca26cbab7441211e6bf2c98f7ad4 Mon Sep 17 00:00:00 2001 From: Ruozhu Li Date: Sun, 16 Feb 2025 20:49:56 +0800 Subject: [PATCH 60/71] nvmet-rdma: recheck queue state is LIVE in state lock in recv done [ Upstream commit 3988ac1c67e6e84d2feb987d7b36d5791174b3da ] The queue state checking in nvmet_rdma_recv_done is not in queue state lock.Queue state can transfer to LIVE in cm establish handler between state checking and state lock here, cause a silent drop of nvme connect cmd. Recheck queue state whether in LIVE state in state lock to prevent this issue. Signed-off-by: Ruozhu Li Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/rdma.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index a6d55ebb8238..298c46834a53 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -997,6 +997,27 @@ out_err: nvmet_req_complete(&cmd->req, status); } +static bool nvmet_rdma_recv_not_live(struct nvmet_rdma_queue *queue, + struct nvmet_rdma_rsp *rsp) +{ + unsigned long flags; + bool ret = true; + + spin_lock_irqsave(&queue->state_lock, flags); + /* + * recheck queue state is not live to prevent a race condition + * with RDMA_CM_EVENT_ESTABLISHED handler. + */ + if (queue->state == NVMET_RDMA_Q_LIVE) + ret = false; + else if (queue->state == NVMET_RDMA_Q_CONNECTING) + list_add_tail(&rsp->wait_list, &queue->rsp_wait_list); + else + nvmet_rdma_put_rsp(rsp); + spin_unlock_irqrestore(&queue->state_lock, flags); + return ret; +} + static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvmet_rdma_cmd *cmd = @@ -1038,17 +1059,9 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) rsp->req.port = queue->port; rsp->n_rdma = 0; - if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) { - unsigned long flags; - - spin_lock_irqsave(&queue->state_lock, flags); - if (queue->state == NVMET_RDMA_Q_CONNECTING) - list_add_tail(&rsp->wait_list, &queue->rsp_wait_list); - else - nvmet_rdma_put_rsp(rsp); - spin_unlock_irqrestore(&queue->state_lock, flags); + if (unlikely(queue->state != NVMET_RDMA_Q_LIVE) && + nvmet_rdma_recv_not_live(queue, rsp)) return; - } nvmet_rdma_handle_command(queue, rsp); } From 638ffdc4ad425f80f8c103fa40c0749e0d3c2ab8 Mon Sep 17 00:00:00 2001 From: Yu-Chun Lin Date: Tue, 18 Feb 2025 16:12:16 +0800 Subject: [PATCH 61/71] sctp: Fix undefined behavior in left shift operation [ Upstream commit 606572eb22c1786a3957d24307f5760bb058ca19 ] According to the C11 standard (ISO/IEC 9899:2011, 6.5.7): "If E1 has a signed type and E1 x 2^E2 is not representable in the result type, the behavior is undefined." Shifting 1 << 31 causes signed integer overflow, which leads to undefined behavior. Fix this by explicitly using '1U << 31' to ensure the shift operates on an unsigned type, avoiding undefined behavior. Signed-off-by: Yu-Chun Lin Link: https://patch.msgid.link/20250218081217.3468369-1-eleanor15x@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sctp/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index ee6514af830f..0527728aee98 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -735,7 +735,7 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( * value SHOULD be the smallest TSN not acknowledged by the * receiver of the request plus 2^31. */ - init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1 << 31); + init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1U << 31); sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, init_tsn, GFP_ATOMIC); From 8c6715b24ae3f91491908c0ad259fd6caba0326c Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Fri, 14 Feb 2025 09:02:03 +0100 Subject: [PATCH 62/71] nvme: only allow entering LIVE from CONNECTING state [ Upstream commit d2fe192348f93fe3a0cb1e33e4aba58e646397f4 ] The fabric transports and also the PCI transport are not entering the LIVE state from NEW or RESETTING. This makes the state machine more restrictive and allows to catch not supported state transitions, e.g. directly switching from RESETTING to LIVE. Reviewed-by: Sagi Grimberg Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ba76cd3b5f85..6a636fe6506b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -501,8 +501,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, switch (new_state) { case NVME_CTRL_LIVE: switch (old_state) { - case NVME_CTRL_NEW: - case NVME_CTRL_RESETTING: case NVME_CTRL_CONNECTING: changed = true; fallthrough; From 55132107faddf9ff2a6453eea9e9c40bd1b5d2e3 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Sat, 8 Feb 2025 00:54:35 +0000 Subject: [PATCH 63/71] ASoC: tas2770: Fix volume scale [ Upstream commit 579cd64b9df8a60284ec3422be919c362de40e41 ] The scale starts at -100dB, not -128dB. Signed-off-by: Hector Martin Signed-off-by: Mark Brown Link: https://patch.msgid.link/20250208-asoc-tas2770-v1-1-cf50ff1d59a3@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2770.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c index 8557759acb1f..e284a3a85459 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c @@ -508,7 +508,7 @@ static int tas2770_codec_probe(struct snd_soc_component *component) } static DECLARE_TLV_DB_SCALE(tas2770_digital_tlv, 1100, 50, 0); -static DECLARE_TLV_DB_SCALE(tas2770_playback_volume, -12750, 50, 0); +static DECLARE_TLV_DB_SCALE(tas2770_playback_volume, -10050, 50, 0); static const struct snd_kcontrol_new tas2770_snd_controls[] = { SOC_SINGLE_TLV("Speaker Playback Volume", TAS2770_PLAY_CFG_REG2, From 12566097c9337c72c1cde2ce0ade85316335ddea Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 18 Feb 2025 18:35:35 +1000 Subject: [PATCH 64/71] ASoC: tas2764: Fix power control mask [ Upstream commit a3f172359e22b2c11b750d23560481a55bf86af1 ] Reviewed-by: Neal Gompa Signed-off-by: Hector Martin Signed-off-by: James Calligeros Link: https://patch.msgid.link/20250218-apple-codec-changes-v2-1-932760fd7e07@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2764.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2764.h b/sound/soc/codecs/tas2764.h index 168af772a898..d13ecae9c9c2 100644 --- a/sound/soc/codecs/tas2764.h +++ b/sound/soc/codecs/tas2764.h @@ -25,7 +25,7 @@ /* Power Control */ #define TAS2764_PWR_CTRL TAS2764_REG(0X0, 0x02) -#define TAS2764_PWR_CTRL_MASK GENMASK(1, 0) +#define TAS2764_PWR_CTRL_MASK GENMASK(2, 0) #define TAS2764_PWR_CTRL_ACTIVE 0x0 #define TAS2764_PWR_CTRL_MUTE BIT(0) #define TAS2764_PWR_CTRL_SHUTDOWN BIT(1) From 5c5194a09608d45fe0d4e75218f061df938b4080 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 18 Feb 2025 18:36:02 +1000 Subject: [PATCH 65/71] ASoC: tas2764: Set the SDOUT polarity correctly [ Upstream commit f5468beeab1b1adfc63c2717b1f29ef3f49a5fab ] TX launch polarity needs to be the opposite of RX capture polarity, to generate the right bit slot alignment. Reviewed-by: Neal Gompa Signed-off-by: Hector Martin Signed-off-by: James Calligeros Link: https://patch.msgid.link/20250218-apple-codec-changes-v2-28-932760fd7e07@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2764.c | 10 +++++++++- sound/soc/codecs/tas2764.h | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 2e0ed3e68fa5..fc8479d3d285 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -367,7 +367,7 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) { struct snd_soc_component *component = dai->component; struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component); - u8 tdm_rx_start_slot = 0, asi_cfg_0 = 0, asi_cfg_1 = 0; + u8 tdm_rx_start_slot = 0, asi_cfg_0 = 0, asi_cfg_1 = 0, asi_cfg_4 = 0; int ret; switch (fmt & SND_SOC_DAIFMT_INV_MASK) { @@ -376,12 +376,14 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) fallthrough; case SND_SOC_DAIFMT_NB_NF: asi_cfg_1 = TAS2764_TDM_CFG1_RX_RISING; + asi_cfg_4 = TAS2764_TDM_CFG4_TX_FALLING; break; case SND_SOC_DAIFMT_IB_IF: asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; fallthrough; case SND_SOC_DAIFMT_IB_NF: asi_cfg_1 = TAS2764_TDM_CFG1_RX_FALLING; + asi_cfg_4 = TAS2764_TDM_CFG4_TX_RISING; break; } @@ -391,6 +393,12 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) if (ret < 0) return ret; + ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG4, + TAS2764_TDM_CFG4_TX_MASK, + asi_cfg_4); + if (ret < 0) + return ret; + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; diff --git a/sound/soc/codecs/tas2764.h b/sound/soc/codecs/tas2764.h index d13ecae9c9c2..9490f2686e38 100644 --- a/sound/soc/codecs/tas2764.h +++ b/sound/soc/codecs/tas2764.h @@ -79,6 +79,12 @@ #define TAS2764_TDM_CFG3_RXS_SHIFT 0x4 #define TAS2764_TDM_CFG3_MASK GENMASK(3, 0) +/* TDM Configuration Reg4 */ +#define TAS2764_TDM_CFG4 TAS2764_REG(0X0, 0x0d) +#define TAS2764_TDM_CFG4_TX_MASK BIT(0) +#define TAS2764_TDM_CFG4_TX_RISING 0x0 +#define TAS2764_TDM_CFG4_TX_FALLING BIT(0) + /* TDM Configuration Reg5 */ #define TAS2764_TDM_CFG5 TAS2764_REG(0X0, 0x0e) #define TAS2764_TDM_CFG5_VSNS_MASK BIT(6) From 3cb53dd5572916f7f267c0baedef251b9844ad5c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Feb 2025 11:02:58 +0100 Subject: [PATCH 66/71] fuse: don't truncate cached, mutated symlink [ Upstream commit b4c173dfbb6c78568578ff18f9e8822d7bd0e31b ] Fuse allows the value of a symlink to change and this property is exploited by some filesystems (e.g. CVMFS). It has been observed, that sometimes after changing the symlink contents, the value is truncated to the old size. This is caused by fuse_getattr() racing with fuse_reverse_inval_inode(). fuse_reverse_inval_inode() updates the fuse_inode's attr_version, which results in fuse_change_attributes() exiting before updating the cached attributes This is okay, as the cached attributes remain invalid and the next call to fuse_change_attributes() will likely update the inode with the correct values. The reason this causes problems is that cached symlinks will be returned through page_get_link(), which truncates the symlink to inode->i_size. This is correct for filesystems that don't mutate symlinks, but in this case it causes bad behavior. The solution is to just remove this truncation. This can cause a regression in a filesystem that relies on supplying a symlink larger than the file size, but this is unlikely. If that happens we'd need to make this behavior conditional. Reported-by: Laura Promberger Tested-by: Sam Lewis Signed-off-by: Miklos Szeredi Link: https://lore.kernel.org/r/20250220100258.793363-1-mszeredi@redhat.com Reviewed-by: Bernd Schubert Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/fuse/dir.c | 2 +- fs/namei.c | 24 +++++++++++++++++++----- include/linux/fs.h | 2 ++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index aa2be4c1ea8f..de31cb8eb720 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1445,7 +1445,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, goto out_err; if (fc->cache_symlinks) - return page_get_link(dentry, inode, callback); + return page_get_link_raw(dentry, inode, callback); err = -ECHILD; if (!dentry) diff --git a/fs/namei.c b/fs/namei.c index 166d71c82d7a..6ce07cde1c27 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -5156,10 +5156,9 @@ const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done) EXPORT_SYMBOL(vfs_get_link); /* get the link contents into pagecache */ -const char *page_get_link(struct dentry *dentry, struct inode *inode, - struct delayed_call *callback) +static char *__page_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) { - char *kaddr; struct page *page; struct address_space *mapping = inode->i_mapping; @@ -5178,8 +5177,23 @@ const char *page_get_link(struct dentry *dentry, struct inode *inode, } set_delayed_call(callback, page_put_link, page); BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM); - kaddr = page_address(page); - nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1); + return page_address(page); +} + +const char *page_get_link_raw(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) +{ + return __page_get_link(dentry, inode, callback); +} +EXPORT_SYMBOL_GPL(page_get_link_raw); + +const char *page_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) +{ + char *kaddr = __page_get_link(dentry, inode, callback); + + if (!IS_ERR(kaddr)) + nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1); return kaddr; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 0d32634c5cf0..08fba309ddc7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3385,6 +3385,8 @@ extern const struct file_operations generic_ro_fops; extern int readlink_copy(char __user *, int, const char *); extern int page_readlink(struct dentry *, char __user *, int); +extern const char *page_get_link_raw(struct dentry *, struct inode *, + struct delayed_call *); extern const char *page_get_link(struct dentry *, struct inode *, struct delayed_call *); extern void page_put_link(void *); From 9bd4fa7b52d4a9697f161b48a7e975633e86723a Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 17 Jan 2025 07:19:13 -0800 Subject: [PATCH 67/71] perf/x86/intel: Use better start period for frequency mode [ Upstream commit a26b24b2e21f6222635a95426b9ef9eec63d69b1 ] Freqency mode is the current default mode of Linux perf. A period of 1 is used as a starting period. The period is auto-adjusted on each tick or an overflow, to meet the frequency target. The start period of 1 is too low and may trigger some issues: - Many HWs do not support period 1 well. https://lore.kernel.org/lkml/875xs2oh69.ffs@tglx/ - For an event that occurs frequently, period 1 is too far away from the real period. Lots of samples are generated at the beginning. The distribution of samples may not be even. - A low starting period for frequently occurring events also challenges virtualization, which has a longer path to handle a PMI. The limit_period value only checks the minimum acceptable value for HW. It cannot be used to set the start period, because some events may need a very low period. The limit_period cannot be set too high. It doesn't help with the events that occur frequently. It's hard to find a universal starting period for all events. The idea implemented by this patch is to only give an estimate for the popular HW and HW cache events. For the rest of the events, start from the lowest possible recommended value. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250117151913.3043942-3-kan.liang@linux.intel.com Signed-off-by: Sasha Levin --- arch/x86/events/intel/core.c | 85 ++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 117f86283183..b9dde3769412 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3852,6 +3852,85 @@ static inline bool intel_pmu_has_cap(struct perf_event *event, int idx) return test_bit(idx, (unsigned long *)&intel_cap->capabilities); } +static u64 intel_pmu_freq_start_period(struct perf_event *event) +{ + int type = event->attr.type; + u64 config, factor; + s64 start; + + /* + * The 127 is the lowest possible recommended SAV (sample after value) + * for a 4000 freq (default freq), according to the event list JSON file. + * Also, assume the workload is idle 50% time. + */ + factor = 64 * 4000; + if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE) + goto end; + + /* + * The estimation of the start period in the freq mode is + * based on the below assumption. + * + * For a cycles or an instructions event, 1GHZ of the + * underlying platform, 1 IPC. The workload is idle 50% time. + * The start period = 1,000,000,000 * 1 / freq / 2. + * = 500,000,000 / freq + * + * Usually, the branch-related events occur less than the + * instructions event. According to the Intel event list JSON + * file, the SAV (sample after value) of a branch-related event + * is usually 1/4 of an instruction event. + * The start period of branch-related events = 125,000,000 / freq. + * + * The cache-related events occurs even less. The SAV is usually + * 1/20 of an instruction event. + * The start period of cache-related events = 25,000,000 / freq. + */ + config = event->attr.config & PERF_HW_EVENT_MASK; + if (type == PERF_TYPE_HARDWARE) { + switch (config) { + case PERF_COUNT_HW_CPU_CYCLES: + case PERF_COUNT_HW_INSTRUCTIONS: + case PERF_COUNT_HW_BUS_CYCLES: + case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND: + case PERF_COUNT_HW_STALLED_CYCLES_BACKEND: + case PERF_COUNT_HW_REF_CPU_CYCLES: + factor = 500000000; + break; + case PERF_COUNT_HW_BRANCH_INSTRUCTIONS: + case PERF_COUNT_HW_BRANCH_MISSES: + factor = 125000000; + break; + case PERF_COUNT_HW_CACHE_REFERENCES: + case PERF_COUNT_HW_CACHE_MISSES: + factor = 25000000; + break; + default: + goto end; + } + } + + if (type == PERF_TYPE_HW_CACHE) + factor = 25000000; +end: + /* + * Usually, a prime or a number with less factors (close to prime) + * is chosen as an SAV, which makes it less likely that the sampling + * period synchronizes with some periodic event in the workload. + * Minus 1 to make it at least avoiding values near power of twos + * for the default freq. + */ + start = DIV_ROUND_UP_ULL(factor, event->attr.sample_freq) - 1; + + if (start > x86_pmu.max_period) + start = x86_pmu.max_period; + + if (x86_pmu.limit_period) + x86_pmu.limit_period(event, &start); + + return start; +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); @@ -3863,6 +3942,12 @@ static int intel_pmu_hw_config(struct perf_event *event) if (ret) return ret; + if (event->attr.freq && event->attr.sample_freq) { + event->hw.sample_period = intel_pmu_freq_start_period(event); + event->hw.last_period = event->hw.sample_period; + local64_set(&event->hw.period_left, event->hw.sample_period); + } + if (event->attr.precise_ip) { if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) return -EINVAL; From 452382b2737dcdaa969d3871e16d14de7402107c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Feb 2025 22:32:33 +0100 Subject: [PATCH 68/71] x86/irq: Define trace events conditionally [ Upstream commit 9de7695925d5d2d2085681ba935857246eb2817d ] When both of X86_LOCAL_APIC and X86_THERMAL_VECTOR are disabled, the irq tracing produces a W=1 build warning for the tracing definitions: In file included from include/trace/trace_events.h:27, from include/trace/define_trace.h:113, from arch/x86/include/asm/trace/irq_vectors.h:383, from arch/x86/kernel/irq.c:29: include/trace/stages/init.h:2:23: error: 'str__irq_vectors__trace_system_name' defined but not used [-Werror=unused-const-variable=] Make the tracepoints conditional on the same symbosl that guard their usage. Signed-off-by: Arnd Bergmann Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250225213236.3141752-1-arnd@kernel.org Signed-off-by: Sasha Levin --- arch/x86/kernel/irq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 766ffe3ba313..439fdb3f5fdf 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -23,8 +23,10 @@ #include #include +#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_THERMAL_VECTOR) #define CREATE_TRACE_POINTS #include +#endif DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); From 3c6e077b2a2de28ea0e7e55434a8f5927eb1fc4d Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 24 Feb 2025 19:11:52 +0100 Subject: [PATCH 69/71] mptcp: safety check before fallback [ Upstream commit db75a16813aabae3b78c06b1b99f5e314c1f55d3 ] Recently, some fallback have been initiated, while the connection was not supposed to fallback. Add a safety check with a warning to detect when an wrong attempt to fallback is being done. This should help detecting any future issues quicker. Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250224-net-mptcp-misc-fixes-v1-3-f550f636b435@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/mptcp/protocol.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 77e727d81cc2..25c1cda5c1bc 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -981,6 +981,8 @@ static inline void __mptcp_do_fallback(struct mptcp_sock *msk) pr_debug("TCP fallback already done (msk=%p)\n", msk); return; } + if (WARN_ON_ONCE(!READ_ONCE(msk->allow_infinite_fallback))) + return; set_bit(MPTCP_FALLBACK_DONE, &msk->flags); } From 82be3cb72b16bab2bc5f864e69185bd0b57e5179 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 14 Jan 2025 10:57:25 +0100 Subject: [PATCH 70/71] drm/nouveau: Do not override forced connector status [ Upstream commit 01f1d77a2630e774ce33233c4e6723bca3ae9daa ] Keep user-forced connector status even if it cannot be programmed. Same behavior as for the rest of the drivers. Signed-off-by: Thomas Zimmermann Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20250114100214.195386-1-tzimmermann@suse.de Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_connector.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index bdf5262ebd35..5a30d115525a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -755,7 +755,6 @@ nouveau_connector_force(struct drm_connector *connector) if (!nv_encoder) { NV_ERROR(drm, "can't find encoder to force %s on!\n", connector->name); - connector->status = connector_status_disconnected; return; } From 8cc4da21a2d955d21bff3ff582d2225677dea3ce Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 28 Feb 2025 21:26:56 +0800 Subject: [PATCH 71/71] block: fix 'kmem_cache of name 'bio-108' already exists' [ Upstream commit b654f7a51ffb386131de42aa98ed831f8c126546 ] Device mapper bioset often has big bio_slab size, which can be more than 1000, then 8byte can't hold the slab name any more, cause the kmem_cache allocation warning of 'kmem_cache of name 'bio-108' already exists'. Fix the warning by extending bio_slab->name to 12 bytes, but fix output of /proc/slabinfo Reported-by: Guangwu Zhang Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250228132656.2838008-1-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index 3318e0022fdf..a7323d567c79 100644 --- a/block/bio.c +++ b/block/bio.c @@ -73,7 +73,7 @@ struct bio_slab { struct kmem_cache *slab; unsigned int slab_ref; unsigned int slab_size; - char name[8]; + char name[12]; }; static DEFINE_MUTEX(bio_slab_lock); static DEFINE_XARRAY(bio_slabs);